using Microsoft.Extensions.Logging;
using MarketAlly.AIPlugin.Context.Configuration;
using System.Diagnostics;
namespace MarketAlly.AIPlugin.Context.Monitoring
{
///
/// Provides health checks for context storage and operations
///
public class HealthCheckService
{
private readonly ContextConfiguration _configuration;
private readonly ILogger _logger;
private readonly Timer _healthCheckTimer;
private HealthStatus _lastHealthStatus;
private readonly object _healthLock = new();
public event EventHandler? HealthStatusChanged;
public HealthCheckService(ContextConfiguration configuration, ILogger logger)
{
_configuration = configuration;
_logger = logger;
_lastHealthStatus = new HealthStatus { IsHealthy = true, CheckTime = DateTime.UtcNow };
if (_configuration.Monitoring.EnableHealthChecks)
{
var interval = TimeSpan.FromSeconds(_configuration.Monitoring.HealthCheckIntervalSeconds);
_healthCheckTimer = new Timer(PerformHealthCheck, null, TimeSpan.Zero, interval);
}
}
///
/// Performs a comprehensive health check
///
public async Task CheckHealthAsync(CancellationToken cancellationToken = default)
{
var stopwatch = Stopwatch.StartNew();
var healthStatus = new HealthStatus
{
CheckTime = DateTime.UtcNow,
IsHealthy = true,
Details = new List()
};
try
{
// Check storage accessibility
await CheckStorageHealthAsync(healthStatus, cancellationToken);
// Check memory usage
CheckMemoryHealth(healthStatus);
// Check disk space
await CheckDiskSpaceAsync(healthStatus, cancellationToken);
// Check file system permissions
await CheckFileSystemPermissionsAsync(healthStatus, cancellationToken);
// Check configuration validity
CheckConfigurationHealth(healthStatus);
// Check for stuck operations (if we had a way to track them)
CheckOperationalHealth(healthStatus);
stopwatch.Stop();
healthStatus.CheckDurationMs = stopwatch.ElapsedMilliseconds;
// Determine overall health
healthStatus.IsHealthy = healthStatus.Details.All(d => d.IsHealthy);
// Update cached status
lock (_healthLock)
{
var wasHealthy = _lastHealthStatus.IsHealthy;
_lastHealthStatus = healthStatus;
// Fire event if health status changed
if (wasHealthy != healthStatus.IsHealthy)
{
HealthStatusChanged?.Invoke(this, new HealthStatusChangedEventArgs
{
PreviousStatus = wasHealthy,
CurrentStatus = healthStatus.IsHealthy,
Details = healthStatus
});
}
}
_logger.LogInformation("Health check completed in {Duration}ms - Status: {Status}",
stopwatch.ElapsedMilliseconds, healthStatus.IsHealthy ? "Healthy" : "Unhealthy");
return healthStatus;
}
catch (Exception ex)
{
_logger.LogError(ex, "Health check failed with exception");
stopwatch.Stop();
return new HealthStatus
{
CheckTime = DateTime.UtcNow,
CheckDurationMs = stopwatch.ElapsedMilliseconds,
IsHealthy = false,
Error = ex.Message,
Details = new List
{
new HealthCheckDetail
{
Component = "HealthCheck",
IsHealthy = false,
Message = $"Health check failed: {ex.Message}",
CheckTime = DateTime.UtcNow
}
}
};
}
}
///
/// Gets the last known health status without performing a new check
///
public HealthStatus GetLastHealthStatus()
{
lock (_healthLock)
{
return _lastHealthStatus;
}
}
///
/// Checks if context storage is accessible and functional
///
private async Task CheckStorageHealthAsync(HealthStatus healthStatus, CancellationToken cancellationToken)
{
var detail = new HealthCheckDetail
{
Component = "Storage",
CheckTime = DateTime.UtcNow
};
try
{
var testStoragePath = Path.Combine(_configuration.StoragePath, ".health-check");
// Ensure directory exists
if (!Directory.Exists(testStoragePath))
{
Directory.CreateDirectory(testStoragePath);
}
// Test write operation
var testFilePath = Path.Combine(testStoragePath, $"health-{Guid.NewGuid():N}.tmp");
var testContent = $"Health check at {DateTime.UtcNow:O}";
await File.WriteAllTextAsync(testFilePath, testContent, cancellationToken);
// Test read operation
var readContent = await File.ReadAllTextAsync(testFilePath, cancellationToken);
if (readContent != testContent)
{
throw new Exception("Read content doesn't match written content");
}
// Cleanup
File.Delete(testFilePath);
detail.IsHealthy = true;
detail.Message = "Storage is accessible and functional";
}
catch (Exception ex)
{
detail.IsHealthy = false;
detail.Message = $"Storage check failed: {ex.Message}";
detail.Error = ex.Message;
}
healthStatus.Details.Add(detail);
}
///
/// Checks current memory usage
///
private void CheckMemoryHealth(HealthStatus healthStatus)
{
var detail = new HealthCheckDetail
{
Component = "Memory",
CheckTime = DateTime.UtcNow
};
try
{
var memoryUsage = GC.GetTotalMemory(false);
var memoryUsageMB = memoryUsage / (1024.0 * 1024.0);
// Warn if memory usage is above 500MB (configurable threshold)
var memoryThresholdMB = 500;
detail.IsHealthy = memoryUsageMB < memoryThresholdMB;
detail.Message = $"Memory usage: {memoryUsageMB:F1} MB";
if (!detail.IsHealthy)
{
detail.Message += $" (exceeds threshold of {memoryThresholdMB} MB)";
}
detail.Metadata = new Dictionary
{
["memory_bytes"] = memoryUsage,
["memory_mb"] = memoryUsageMB,
["threshold_mb"] = memoryThresholdMB
};
}
catch (Exception ex)
{
detail.IsHealthy = false;
detail.Message = $"Memory check failed: {ex.Message}";
detail.Error = ex.Message;
}
healthStatus.Details.Add(detail);
}
///
/// Checks available disk space
///
private async Task CheckDiskSpaceAsync(HealthStatus healthStatus, CancellationToken cancellationToken)
{
var detail = new HealthCheckDetail
{
Component = "DiskSpace",
CheckTime = DateTime.UtcNow
};
try
{
var storagePath = Path.GetFullPath(_configuration.StoragePath);
var driveInfo = new DriveInfo(Path.GetPathRoot(storagePath)!);
var availableSpaceGB = driveInfo.AvailableFreeSpace / (1024.0 * 1024.0 * 1024.0);
var totalSpaceGB = driveInfo.TotalSize / (1024.0 * 1024.0 * 1024.0);
var usedPercentage = ((totalSpaceGB - availableSpaceGB) / totalSpaceGB) * 100;
// Warn if disk usage is above 90%
var diskUsageThreshold = 90.0;
detail.IsHealthy = usedPercentage < diskUsageThreshold;
detail.Message = $"Disk usage: {usedPercentage:F1}% ({availableSpaceGB:F1} GB available)";
if (!detail.IsHealthy)
{
detail.Message += $" (exceeds threshold of {diskUsageThreshold}%)";
}
detail.Metadata = new Dictionary
{
["available_space_gb"] = availableSpaceGB,
["total_space_gb"] = totalSpaceGB,
["used_percentage"] = usedPercentage,
["threshold_percentage"] = diskUsageThreshold
};
}
catch (Exception ex)
{
detail.IsHealthy = false;
detail.Message = $"Disk space check failed: {ex.Message}";
detail.Error = ex.Message;
}
healthStatus.Details.Add(detail);
}
///
/// Checks file system permissions
///
private async Task CheckFileSystemPermissionsAsync(HealthStatus healthStatus, CancellationToken cancellationToken)
{
var detail = new HealthCheckDetail
{
Component = "Permissions",
CheckTime = DateTime.UtcNow
};
try
{
var storagePath = _configuration.StoragePath;
// Check if we can create directories
var testDir = Path.Combine(storagePath, $".perm-test-{Guid.NewGuid():N}");
Directory.CreateDirectory(testDir);
// Check if we can create and write files
var testFile = Path.Combine(testDir, "test.txt");
await File.WriteAllTextAsync(testFile, "permission test", cancellationToken);
// Check if we can read files
var content = await File.ReadAllTextAsync(testFile, cancellationToken);
// Check if we can delete files and directories
File.Delete(testFile);
Directory.Delete(testDir);
detail.IsHealthy = true;
detail.Message = "File system permissions are correct";
}
catch (Exception ex)
{
detail.IsHealthy = false;
detail.Message = $"Permission check failed: {ex.Message}";
detail.Error = ex.Message;
}
healthStatus.Details.Add(detail);
}
///
/// Validates configuration settings
///
private void CheckConfigurationHealth(HealthStatus healthStatus)
{
var detail = new HealthCheckDetail
{
Component = "Configuration",
CheckTime = DateTime.UtcNow
};
try
{
var issues = new List();
// Check storage path
if (string.IsNullOrEmpty(_configuration.StoragePath))
{
issues.Add("Storage path is not configured");
}
// Check retention settings
if (_configuration.Retention.RetentionDays <= 0)
{
issues.Add("Invalid retention days setting");
}
if (_configuration.Retention.MaxEntriesPerFile <= 0)
{
issues.Add("Invalid max entries per file setting");
}
// Check performance settings
if (_configuration.Performance.MaxConcurrentOperations <= 0)
{
issues.Add("Invalid max concurrent operations setting");
}
// Check search settings
if (_configuration.Search.EnableSemanticSearch && string.IsNullOrEmpty(_configuration.Search.OpenAIApiKey))
{
issues.Add("Semantic search enabled but API key not configured");
}
detail.IsHealthy = issues.Count == 0;
detail.Message = detail.IsHealthy ? "Configuration is valid" : $"Configuration issues: {string.Join(", ", issues)}";
if (issues.Count > 0)
{
detail.Metadata = new Dictionary { ["issues"] = issues };
}
}
catch (Exception ex)
{
detail.IsHealthy = false;
detail.Message = $"Configuration check failed: {ex.Message}";
detail.Error = ex.Message;
}
healthStatus.Details.Add(detail);
}
///
/// Checks for operational issues
///
private void CheckOperationalHealth(HealthStatus healthStatus)
{
var detail = new HealthCheckDetail
{
Component = "Operations",
CheckTime = DateTime.UtcNow
};
try
{
// In a full implementation, you might check for:
// - Long-running operations
// - Failed operations count
// - Queue sizes
// - Cache hit ratios
// etc.
detail.IsHealthy = true;
detail.Message = "No operational issues detected";
}
catch (Exception ex)
{
detail.IsHealthy = false;
detail.Message = $"Operational check failed: {ex.Message}";
detail.Error = ex.Message;
}
healthStatus.Details.Add(detail);
}
///
/// Timer callback for periodic health checks
///
private async void PerformHealthCheck(object? state)
{
try
{
await CheckHealthAsync();
}
catch (Exception ex)
{
_logger.LogError(ex, "Periodic health check failed");
}
}
public void Dispose()
{
_healthCheckTimer?.Dispose();
}
}
///
/// Overall health status
///
public class HealthStatus
{
public DateTime CheckTime { get; set; }
public long CheckDurationMs { get; set; }
public bool IsHealthy { get; set; }
public string? Error { get; set; }
public List Details { get; set; } = new();
}
///
/// Health check detail for a specific component
///
public class HealthCheckDetail
{
public string Component { get; set; } = "";
public DateTime CheckTime { get; set; }
public bool IsHealthy { get; set; }
public string Message { get; set; } = "";
public string? Error { get; set; }
public Dictionary? Metadata { get; set; }
}
///
/// Event args for health status changes
///
public class HealthStatusChangedEventArgs : EventArgs
{
public bool PreviousStatus { get; set; }
public bool CurrentStatus { get; set; }
public HealthStatus Details { get; set; } = new();
}
}