using Microsoft.Extensions.Logging; using MarketAlly.AIPlugin.Context.Configuration; using System.Diagnostics; namespace MarketAlly.AIPlugin.Context.Monitoring { /// /// Provides health checks for context storage and operations /// public class HealthCheckService { private readonly ContextConfiguration _configuration; private readonly ILogger _logger; private readonly Timer _healthCheckTimer; private HealthStatus _lastHealthStatus; private readonly object _healthLock = new(); public event EventHandler? HealthStatusChanged; public HealthCheckService(ContextConfiguration configuration, ILogger logger) { _configuration = configuration; _logger = logger; _lastHealthStatus = new HealthStatus { IsHealthy = true, CheckTime = DateTime.UtcNow }; if (_configuration.Monitoring.EnableHealthChecks) { var interval = TimeSpan.FromSeconds(_configuration.Monitoring.HealthCheckIntervalSeconds); _healthCheckTimer = new Timer(PerformHealthCheck, null, TimeSpan.Zero, interval); } } /// /// Performs a comprehensive health check /// public async Task CheckHealthAsync(CancellationToken cancellationToken = default) { var stopwatch = Stopwatch.StartNew(); var healthStatus = new HealthStatus { CheckTime = DateTime.UtcNow, IsHealthy = true, Details = new List() }; try { // Check storage accessibility await CheckStorageHealthAsync(healthStatus, cancellationToken); // Check memory usage CheckMemoryHealth(healthStatus); // Check disk space await CheckDiskSpaceAsync(healthStatus, cancellationToken); // Check file system permissions await CheckFileSystemPermissionsAsync(healthStatus, cancellationToken); // Check configuration validity CheckConfigurationHealth(healthStatus); // Check for stuck operations (if we had a way to track them) CheckOperationalHealth(healthStatus); stopwatch.Stop(); healthStatus.CheckDurationMs = stopwatch.ElapsedMilliseconds; // Determine overall health healthStatus.IsHealthy = healthStatus.Details.All(d => d.IsHealthy); // Update cached status lock (_healthLock) { var wasHealthy = _lastHealthStatus.IsHealthy; _lastHealthStatus = healthStatus; // Fire event if health status changed if (wasHealthy != healthStatus.IsHealthy) { HealthStatusChanged?.Invoke(this, new HealthStatusChangedEventArgs { PreviousStatus = wasHealthy, CurrentStatus = healthStatus.IsHealthy, Details = healthStatus }); } } _logger.LogInformation("Health check completed in {Duration}ms - Status: {Status}", stopwatch.ElapsedMilliseconds, healthStatus.IsHealthy ? "Healthy" : "Unhealthy"); return healthStatus; } catch (Exception ex) { _logger.LogError(ex, "Health check failed with exception"); stopwatch.Stop(); return new HealthStatus { CheckTime = DateTime.UtcNow, CheckDurationMs = stopwatch.ElapsedMilliseconds, IsHealthy = false, Error = ex.Message, Details = new List { new HealthCheckDetail { Component = "HealthCheck", IsHealthy = false, Message = $"Health check failed: {ex.Message}", CheckTime = DateTime.UtcNow } } }; } } /// /// Gets the last known health status without performing a new check /// public HealthStatus GetLastHealthStatus() { lock (_healthLock) { return _lastHealthStatus; } } /// /// Checks if context storage is accessible and functional /// private async Task CheckStorageHealthAsync(HealthStatus healthStatus, CancellationToken cancellationToken) { var detail = new HealthCheckDetail { Component = "Storage", CheckTime = DateTime.UtcNow }; try { var testStoragePath = Path.Combine(_configuration.StoragePath, ".health-check"); // Ensure directory exists if (!Directory.Exists(testStoragePath)) { Directory.CreateDirectory(testStoragePath); } // Test write operation var testFilePath = Path.Combine(testStoragePath, $"health-{Guid.NewGuid():N}.tmp"); var testContent = $"Health check at {DateTime.UtcNow:O}"; await File.WriteAllTextAsync(testFilePath, testContent, cancellationToken); // Test read operation var readContent = await File.ReadAllTextAsync(testFilePath, cancellationToken); if (readContent != testContent) { throw new Exception("Read content doesn't match written content"); } // Cleanup File.Delete(testFilePath); detail.IsHealthy = true; detail.Message = "Storage is accessible and functional"; } catch (Exception ex) { detail.IsHealthy = false; detail.Message = $"Storage check failed: {ex.Message}"; detail.Error = ex.Message; } healthStatus.Details.Add(detail); } /// /// Checks current memory usage /// private void CheckMemoryHealth(HealthStatus healthStatus) { var detail = new HealthCheckDetail { Component = "Memory", CheckTime = DateTime.UtcNow }; try { var memoryUsage = GC.GetTotalMemory(false); var memoryUsageMB = memoryUsage / (1024.0 * 1024.0); // Warn if memory usage is above 500MB (configurable threshold) var memoryThresholdMB = 500; detail.IsHealthy = memoryUsageMB < memoryThresholdMB; detail.Message = $"Memory usage: {memoryUsageMB:F1} MB"; if (!detail.IsHealthy) { detail.Message += $" (exceeds threshold of {memoryThresholdMB} MB)"; } detail.Metadata = new Dictionary { ["memory_bytes"] = memoryUsage, ["memory_mb"] = memoryUsageMB, ["threshold_mb"] = memoryThresholdMB }; } catch (Exception ex) { detail.IsHealthy = false; detail.Message = $"Memory check failed: {ex.Message}"; detail.Error = ex.Message; } healthStatus.Details.Add(detail); } /// /// Checks available disk space /// private async Task CheckDiskSpaceAsync(HealthStatus healthStatus, CancellationToken cancellationToken) { var detail = new HealthCheckDetail { Component = "DiskSpace", CheckTime = DateTime.UtcNow }; try { var storagePath = Path.GetFullPath(_configuration.StoragePath); var driveInfo = new DriveInfo(Path.GetPathRoot(storagePath)!); var availableSpaceGB = driveInfo.AvailableFreeSpace / (1024.0 * 1024.0 * 1024.0); var totalSpaceGB = driveInfo.TotalSize / (1024.0 * 1024.0 * 1024.0); var usedPercentage = ((totalSpaceGB - availableSpaceGB) / totalSpaceGB) * 100; // Warn if disk usage is above 90% var diskUsageThreshold = 90.0; detail.IsHealthy = usedPercentage < diskUsageThreshold; detail.Message = $"Disk usage: {usedPercentage:F1}% ({availableSpaceGB:F1} GB available)"; if (!detail.IsHealthy) { detail.Message += $" (exceeds threshold of {diskUsageThreshold}%)"; } detail.Metadata = new Dictionary { ["available_space_gb"] = availableSpaceGB, ["total_space_gb"] = totalSpaceGB, ["used_percentage"] = usedPercentage, ["threshold_percentage"] = diskUsageThreshold }; } catch (Exception ex) { detail.IsHealthy = false; detail.Message = $"Disk space check failed: {ex.Message}"; detail.Error = ex.Message; } healthStatus.Details.Add(detail); } /// /// Checks file system permissions /// private async Task CheckFileSystemPermissionsAsync(HealthStatus healthStatus, CancellationToken cancellationToken) { var detail = new HealthCheckDetail { Component = "Permissions", CheckTime = DateTime.UtcNow }; try { var storagePath = _configuration.StoragePath; // Check if we can create directories var testDir = Path.Combine(storagePath, $".perm-test-{Guid.NewGuid():N}"); Directory.CreateDirectory(testDir); // Check if we can create and write files var testFile = Path.Combine(testDir, "test.txt"); await File.WriteAllTextAsync(testFile, "permission test", cancellationToken); // Check if we can read files var content = await File.ReadAllTextAsync(testFile, cancellationToken); // Check if we can delete files and directories File.Delete(testFile); Directory.Delete(testDir); detail.IsHealthy = true; detail.Message = "File system permissions are correct"; } catch (Exception ex) { detail.IsHealthy = false; detail.Message = $"Permission check failed: {ex.Message}"; detail.Error = ex.Message; } healthStatus.Details.Add(detail); } /// /// Validates configuration settings /// private void CheckConfigurationHealth(HealthStatus healthStatus) { var detail = new HealthCheckDetail { Component = "Configuration", CheckTime = DateTime.UtcNow }; try { var issues = new List(); // Check storage path if (string.IsNullOrEmpty(_configuration.StoragePath)) { issues.Add("Storage path is not configured"); } // Check retention settings if (_configuration.Retention.RetentionDays <= 0) { issues.Add("Invalid retention days setting"); } if (_configuration.Retention.MaxEntriesPerFile <= 0) { issues.Add("Invalid max entries per file setting"); } // Check performance settings if (_configuration.Performance.MaxConcurrentOperations <= 0) { issues.Add("Invalid max concurrent operations setting"); } // Check search settings if (_configuration.Search.EnableSemanticSearch && string.IsNullOrEmpty(_configuration.Search.OpenAIApiKey)) { issues.Add("Semantic search enabled but API key not configured"); } detail.IsHealthy = issues.Count == 0; detail.Message = detail.IsHealthy ? "Configuration is valid" : $"Configuration issues: {string.Join(", ", issues)}"; if (issues.Count > 0) { detail.Metadata = new Dictionary { ["issues"] = issues }; } } catch (Exception ex) { detail.IsHealthy = false; detail.Message = $"Configuration check failed: {ex.Message}"; detail.Error = ex.Message; } healthStatus.Details.Add(detail); } /// /// Checks for operational issues /// private void CheckOperationalHealth(HealthStatus healthStatus) { var detail = new HealthCheckDetail { Component = "Operations", CheckTime = DateTime.UtcNow }; try { // In a full implementation, you might check for: // - Long-running operations // - Failed operations count // - Queue sizes // - Cache hit ratios // etc. detail.IsHealthy = true; detail.Message = "No operational issues detected"; } catch (Exception ex) { detail.IsHealthy = false; detail.Message = $"Operational check failed: {ex.Message}"; detail.Error = ex.Message; } healthStatus.Details.Add(detail); } /// /// Timer callback for periodic health checks /// private async void PerformHealthCheck(object? state) { try { await CheckHealthAsync(); } catch (Exception ex) { _logger.LogError(ex, "Periodic health check failed"); } } public void Dispose() { _healthCheckTimer?.Dispose(); } } /// /// Overall health status /// public class HealthStatus { public DateTime CheckTime { get; set; } public long CheckDurationMs { get; set; } public bool IsHealthy { get; set; } public string? Error { get; set; } public List Details { get; set; } = new(); } /// /// Health check detail for a specific component /// public class HealthCheckDetail { public string Component { get; set; } = ""; public DateTime CheckTime { get; set; } public bool IsHealthy { get; set; } public string Message { get; set; } = ""; public string? Error { get; set; } public Dictionary? Metadata { get; set; } } /// /// Event args for health status changes /// public class HealthStatusChangedEventArgs : EventArgs { public bool PreviousStatus { get; set; } public bool CurrentStatus { get; set; } public HealthStatus Details { get; set; } = new(); } }