MarketAlly.AIPlugin.Extensions/MarketAlly.AIPlugin.Learning/Services/LLMContextService.cs

604 lines
23 KiB
C#
Executable File

using MarketAlly.AIPlugin.Learning.Configuration;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using RefactorIQ.Core.Models;
using RefactorIQ.Services.Interfaces;
using System.Collections.Concurrent;
using System.Text;
namespace MarketAlly.AIPlugin.Learning.Services
{
/// <summary>
/// Service for preparing optimized context for LLM consumption
/// </summary>
public interface ILLMContextService
{
Task<LLMContext> PrepareContextAsync(string query, int maxTokens = 8000);
Task<LLMContext> PrepareCodeAnalysisContextAsync(string filePath, string query, int maxTokens = 8000);
Task<DependencyContext> GetDependencyContextAsync(string symbolName, int maxDepth = 3);
Task<ChangeImpactContext> AnalyzeChangeImpactAsync(string filePath, int lineNumber);
Task<CodeRelationshipContext> GetCodeRelationshipsAsync(string symbolName);
}
public class LLMContextService : ILLMContextService
{
private readonly IRefactorIQClient _refactorIQClient;
private readonly AIConfiguration _config;
private readonly ILogger<LLMContextService> _logger;
private readonly ConcurrentDictionary<string, LLMContext> _contextCache;
public LLMContextService(
IRefactorIQClient refactorIQClient,
IOptions<LearningConfiguration> options,
ILogger<LLMContextService> logger)
{
_refactorIQClient = refactorIQClient;
_config = options.Value.AI;
_logger = logger;
_contextCache = new ConcurrentDictionary<string, LLMContext>();
}
public async Task<LLMContext> PrepareContextAsync(string query, int maxTokens = 8000)
{
try
{
_logger.LogInformation("Preparing LLM context for query: {Query}", query);
var cacheKey = $"context_{query.GetHashCode()}_{maxTokens}";
if (_contextCache.TryGetValue(cacheKey, out var cachedContext))
{
_logger.LogDebug("Returning cached context for query: {Query}", query);
return cachedContext;
}
var context = new LLMContext
{
Query = query,
MaxTokens = maxTokens,
GeneratedAt = DateTime.UtcNow
};
// 1. Smart chunking: Break code into semantically coherent pieces
var relevantChunks = await GetRelevantCodeChunksAsync(query, maxTokens);
context.CodeChunks = relevantChunks;
// 2. Dependency tracking: Find related code that should be included
var dependencies = await GetDependencyInformationAsync(query);
context.Dependencies = dependencies;
// 3. Code relationship mapping: Find all relationships
var relationships = await GetCodeRelationshipMappingAsync(query);
context.Relationships = relationships;
// 4. Calculate token usage and optimize
context = OptimizeContextForTokens(context, maxTokens);
// Cache the result
_contextCache.TryAdd(cacheKey, context);
_logger.LogInformation("Generated LLM context with {ChunkCount} chunks, {TokenCount} estimated tokens",
context.CodeChunks.Count, context.EstimatedTokens);
return context;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to prepare LLM context for query: {Query}", query);
throw;
}
}
public async Task<LLMContext> PrepareCodeAnalysisContextAsync(string filePath, string query, int maxTokens = 8000)
{
try
{
_logger.LogInformation("Preparing code analysis context for file: {FilePath}", filePath);
var context = new LLMContext
{
Query = query,
MaxTokens = maxTokens,
GeneratedAt = DateTime.UtcNow,
PrimaryFile = filePath
};
// Parse the target file
var sourceCode = await File.ReadAllTextAsync(filePath);
var syntaxTree = CSharpSyntaxTree.ParseText(sourceCode);
var root = await syntaxTree.GetRootAsync();
// Extract key information from the file
var fileChunk = new CodeChunk
{
FilePath = filePath,
Content = sourceCode,
Type = CodeChunkType.PrimaryFile,
Symbols = ExtractSymbolsFromSyntaxTree(root),
Dependencies = await GetFileDependenciesAsync(filePath),
EstimatedTokens = EstimateTokenCount(sourceCode)
};
context.CodeChunks.Add(fileChunk);
// Add related files based on dependencies
var relatedFiles = await GetRelatedFilesAsync(filePath, maxTokens / 2);
context.CodeChunks.AddRange(relatedFiles);
// Optimize for token limit
context = OptimizeContextForTokens(context, maxTokens);
return context;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to prepare code analysis context for file: {FilePath}", filePath);
throw;
}
}
public async Task<DependencyContext> GetDependencyContextAsync(string symbolName, int maxDepth = 3)
{
try
{
_logger.LogInformation("Getting dependency context for symbol: {SymbolName}", symbolName);
var context = new DependencyContext
{
RootSymbol = symbolName,
MaxDepth = maxDepth,
Dependencies = new List<DependencyInfo>()
};
// Get all types from RefactorIQ
var typesResult = await _refactorIQClient.GetTypesAsync();
if (!typesResult.IsSuccess || typesResult.Data == null)
{
_logger.LogWarning("Failed to get types from RefactorIQ for dependency analysis");
return context;
}
// Find the target symbol
var targetType = typesResult.Data.FirstOrDefault(t =>
t.Name.Equals(symbolName, StringComparison.OrdinalIgnoreCase) ||
t.Members.Any(m => m.Name.Equals(symbolName, StringComparison.OrdinalIgnoreCase)));
if (targetType != null)
{
await BuildDependencyTreeAsync(context, targetType, 0, maxDepth, new HashSet<string>());
}
return context;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get dependency context for symbol: {SymbolName}", symbolName);
throw;
}
}
public async Task<ChangeImpactContext> AnalyzeChangeImpactAsync(string filePath, int lineNumber)
{
try
{
_logger.LogInformation("Analyzing change impact for {FilePath}:{LineNumber}", filePath, lineNumber);
var context = new ChangeImpactContext
{
TargetFile = filePath,
TargetLine = lineNumber,
PotentiallyAffectedFiles = new List<string>(),
RiskLevel = "Low"
};
// Parse the file to understand what's at the target line
var sourceCode = await File.ReadAllTextAsync(filePath);
var lines = sourceCode.Split('\n');
if (lineNumber > 0 && lineNumber <= lines.Length)
{
var targetLine = lines[lineNumber - 1];
// Analyze what kind of change this might be
context.ChangeType = AnalyzeChangeType(targetLine);
// Find potentially affected files
context.PotentiallyAffectedFiles = await FindPotentiallyAffectedFilesAsync(filePath, targetLine);
// Calculate risk level
context.RiskLevel = CalculateRiskLevel(context.ChangeType, context.PotentiallyAffectedFiles.Count);
}
return context;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to analyze change impact for {FilePath}:{LineNumber}", filePath, lineNumber);
throw;
}
}
public async Task<CodeRelationshipContext> GetCodeRelationshipsAsync(string symbolName)
{
try
{
_logger.LogInformation("Getting code relationships for symbol: {SymbolName}", symbolName);
var context = new CodeRelationshipContext
{
TargetSymbol = symbolName,
Callers = new List<string>(),
Callees = new List<string>(),
Inheritors = new List<string>(),
Implementers = new List<string>()
};
// Use RefactorIQ to get command/method information
var commandsResult = await _refactorIQClient.GetCommandsAsync();
if (commandsResult.IsSuccess && commandsResult.Data != null)
{
// Find all references to the symbol
foreach (var command in commandsResult.Data)
{
if (command.Name.Contains(symbolName, StringComparison.OrdinalIgnoreCase))
{
// This is simplified - in a real implementation, you'd use call graph analysis
context.Callees.Add(command.Name);
}
}
}
return context;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to get code relationships for symbol: {SymbolName}", symbolName);
throw;
}
}
private async Task<List<CodeChunk>> GetRelevantCodeChunksAsync(string query, int maxTokens)
{
var chunks = new List<CodeChunk>();
// Use semantic search if available
if (_config.EnableSemanticSearch)
{
var searchResult = await _refactorIQClient.SearchSimilarAsync(query, null, _config.MaxSearchResults);
if (searchResult.IsSuccess && searchResult.Data != null)
{
foreach (var result in searchResult.Data.Where(r => r.Score >= _config.MinSimilarityScore))
{
var chunk = await CreateChunkFromSearchResult(result);
if (chunk != null)
{
chunks.Add(chunk);
}
}
}
}
return chunks;
}
private async Task<CodeChunk?> CreateChunkFromSearchResult(VectorSearchResult searchResult)
{
try
{
if (!File.Exists(searchResult.FilePath))
return null;
var content = await File.ReadAllTextAsync(searchResult.FilePath);
var lines = content.Split('\n');
// Extract context around the target line
var startLine = Math.Max(0, searchResult.LineStart - 10);
var endLine = Math.Min(lines.Length - 1, searchResult.LineStart + 20);
var chunkContent = string.Join("\n", lines[startLine..endLine]);
return new CodeChunk
{
FilePath = searchResult.FilePath,
Content = chunkContent,
Type = CodeChunkType.RelevantSection,
LineStart = startLine,
LineEnd = endLine,
RelevanceScore = searchResult.Score,
EstimatedTokens = EstimateTokenCount(chunkContent)
};
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to create chunk from search result for {FilePath}", searchResult.FilePath);
return null;
}
}
private List<string> ExtractSymbolsFromSyntaxTree(SyntaxNode root)
{
var symbols = new List<string>();
var classes = root.DescendantNodes().OfType<ClassDeclarationSyntax>();
symbols.AddRange(classes.Select(c => c.Identifier.Text));
var methods = root.DescendantNodes().OfType<MethodDeclarationSyntax>();
symbols.AddRange(methods.Select(m => m.Identifier.Text));
var properties = root.DescendantNodes().OfType<PropertyDeclarationSyntax>();
symbols.AddRange(properties.Select(p => p.Identifier.Text));
return symbols.Distinct().ToList();
}
private async Task<List<string>> GetFileDependenciesAsync(string filePath)
{
try
{
var content = await File.ReadAllTextAsync(filePath);
var syntaxTree = CSharpSyntaxTree.ParseText(content);
var root = await syntaxTree.GetRootAsync();
var usingDirectives = root.DescendantNodes().OfType<UsingDirectiveSyntax>();
return usingDirectives.Select(u => u.Name?.ToString() ?? "").Where(n => !string.IsNullOrEmpty(n)).ToList();
}
catch
{
return new List<string>();
}
}
private async Task<List<CodeChunk>> GetRelatedFilesAsync(string primaryFile, int maxTokens)
{
var relatedChunks = new List<CodeChunk>();
var remainingTokens = maxTokens;
try
{
var dependencies = await GetFileDependenciesAsync(primaryFile);
var projectDirectory = Path.GetDirectoryName(primaryFile) ?? "";
foreach (var dependency in dependencies.Take(5)) // Limit to top 5 dependencies
{
if (remainingTokens <= 0) break;
// Try to find files that might contain this dependency
var relatedFiles = Directory.GetFiles(projectDirectory, "*.cs", SearchOption.AllDirectories)
.Where(f => Path.GetFileNameWithoutExtension(f).Contains(dependency.Split('.').Last(), StringComparison.OrdinalIgnoreCase))
.Take(2);
foreach (var relatedFile in relatedFiles)
{
if (remainingTokens <= 0) break;
if (relatedFile.Equals(primaryFile, StringComparison.OrdinalIgnoreCase)) continue;
var content = await File.ReadAllTextAsync(relatedFile);
var tokenCount = EstimateTokenCount(content);
if (tokenCount <= remainingTokens)
{
relatedChunks.Add(new CodeChunk
{
FilePath = relatedFile,
Content = content,
Type = CodeChunkType.RelatedFile,
EstimatedTokens = tokenCount
});
remainingTokens -= tokenCount;
}
}
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to get related files for {PrimaryFile}", primaryFile);
}
return relatedChunks;
}
private async Task BuildDependencyTreeAsync(DependencyContext context, object targetType, int currentDepth, int maxDepth, HashSet<string> visited)
{
// Implementation would build a dependency tree using RefactorIQ data
// This is a simplified version
await Task.CompletedTask;
}
private string AnalyzeChangeType(string line)
{
line = line.Trim();
if (line.Contains("public class") || line.Contains("public interface"))
return "TypeDeclaration";
if (line.Contains("public") && (line.Contains("(") || line.Contains("=>")))
return "PublicMember";
if (line.Contains("private") || line.Contains("internal"))
return "PrivateMember";
if (line.Contains("using"))
return "UsingDirective";
return "CodeChange";
}
private async Task<List<string>> FindPotentiallyAffectedFilesAsync(string filePath, string targetLine)
{
var affectedFiles = new List<string>();
try
{
// Simple implementation - in practice, this would use sophisticated dependency analysis
var projectDirectory = Path.GetDirectoryName(filePath) ?? "";
var allFiles = Directory.GetFiles(projectDirectory, "*.cs", SearchOption.AllDirectories);
foreach (var file in allFiles.Take(10)) // Limit for performance
{
if (file.Equals(filePath, StringComparison.OrdinalIgnoreCase)) continue;
var content = await File.ReadAllTextAsync(file);
// Look for potential references (simplified)
var symbols = ExtractPotentialSymbols(targetLine);
if (symbols.Any(symbol => content.Contains(symbol)))
{
affectedFiles.Add(file);
}
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to find potentially affected files for {FilePath}", filePath);
}
return affectedFiles;
}
private List<string> ExtractPotentialSymbols(string line)
{
// Extract potential method names, class names, etc. from the line
var symbols = new List<string>();
var words = line.Split(' ', '(', ')', '{', '}', ';', ',', '.')
.Where(w => !string.IsNullOrWhiteSpace(w) && w.Length > 2)
.ToList();
symbols.AddRange(words);
return symbols;
}
private string CalculateRiskLevel(string changeType, int affectedFileCount)
{
return changeType switch
{
"TypeDeclaration" => affectedFileCount > 5 ? "High" : "Medium",
"PublicMember" => affectedFileCount > 3 ? "High" : "Medium",
"PrivateMember" => "Low",
"UsingDirective" => "Low",
_ => affectedFileCount > 2 ? "Medium" : "Low"
};
}
private LLMContext OptimizeContextForTokens(LLMContext context, int maxTokens)
{
context.EstimatedTokens = context.CodeChunks.Sum(c => c.EstimatedTokens);
if (context.EstimatedTokens <= maxTokens)
return context;
// Remove least relevant chunks first
var sortedChunks = context.CodeChunks
.OrderByDescending(c => c.RelevanceScore)
.ThenBy(c => c.Type == CodeChunkType.PrimaryFile ? 0 : 1)
.ToList();
var optimizedChunks = new List<CodeChunk>();
var currentTokens = 0;
foreach (var chunk in sortedChunks)
{
if (currentTokens + chunk.EstimatedTokens <= maxTokens)
{
optimizedChunks.Add(chunk);
currentTokens += chunk.EstimatedTokens;
}
}
context.CodeChunks = optimizedChunks;
context.EstimatedTokens = currentTokens;
return context;
}
private int EstimateTokenCount(string text)
{
// Rough estimation: ~4 characters per token for code
return text.Length / 4;
}
private async Task<List<DependencyInfo>> GetDependencyInformationAsync(string query)
{
// Simplified implementation
return new List<DependencyInfo>();
}
private async Task<List<CodeRelationship>> GetCodeRelationshipMappingAsync(string query)
{
// Simplified implementation
return new List<CodeRelationship>();
}
}
// Supporting classes for LLM context
public class LLMContext
{
public string Query { get; set; } = string.Empty;
public int MaxTokens { get; set; }
public int EstimatedTokens { get; set; }
public DateTime GeneratedAt { get; set; }
public string? PrimaryFile { get; set; }
public List<CodeChunk> CodeChunks { get; set; } = new();
public List<DependencyInfo> Dependencies { get; set; } = new();
public List<CodeRelationship> Relationships { get; set; } = new();
}
public class CodeChunk
{
public string FilePath { get; set; } = string.Empty;
public string Content { get; set; } = string.Empty;
public CodeChunkType Type { get; set; }
public int LineStart { get; set; }
public int LineEnd { get; set; }
public float RelevanceScore { get; set; } = 1.0f;
public int EstimatedTokens { get; set; }
public List<string> Symbols { get; set; } = new();
public List<string> Dependencies { get; set; } = new();
}
public enum CodeChunkType
{
PrimaryFile,
RelatedFile,
RelevantSection,
DependencyFile
}
public class DependencyContext
{
public string RootSymbol { get; set; } = string.Empty;
public int MaxDepth { get; set; }
public List<DependencyInfo> Dependencies { get; set; } = new();
}
public class DependencyInfo
{
public string Name { get; set; } = string.Empty;
public string Type { get; set; } = string.Empty;
public int Depth { get; set; }
public string FilePath { get; set; } = string.Empty;
}
public class ChangeImpactContext
{
public string TargetFile { get; set; } = string.Empty;
public int TargetLine { get; set; }
public string ChangeType { get; set; } = string.Empty;
public List<string> PotentiallyAffectedFiles { get; set; } = new();
public string RiskLevel { get; set; } = string.Empty;
}
public class CodeRelationshipContext
{
public string TargetSymbol { get; set; } = string.Empty;
public List<string> Callers { get; set; } = new();
public List<string> Callees { get; set; } = new();
public List<string> Inheritors { get; set; } = new();
public List<string> Implementers { get; set; } = new();
}
public class CodeRelationship
{
public string From { get; set; } = string.Empty;
public string To { get; set; } = string.Empty;
public string Type { get; set; } = string.Empty;
}
}