604 lines
23 KiB
C#
Executable File
604 lines
23 KiB
C#
Executable File
using MarketAlly.AIPlugin.Learning.Configuration;
|
|
using Microsoft.CodeAnalysis;
|
|
using Microsoft.CodeAnalysis.CSharp;
|
|
using Microsoft.CodeAnalysis.CSharp.Syntax;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using RefactorIQ.Core.Models;
|
|
using RefactorIQ.Services.Interfaces;
|
|
using System.Collections.Concurrent;
|
|
using System.Text;
|
|
|
|
namespace MarketAlly.AIPlugin.Learning.Services
|
|
{
|
|
/// <summary>
|
|
/// Service for preparing optimized context for LLM consumption
|
|
/// </summary>
|
|
public interface ILLMContextService
|
|
{
|
|
Task<LLMContext> PrepareContextAsync(string query, int maxTokens = 8000);
|
|
Task<LLMContext> PrepareCodeAnalysisContextAsync(string filePath, string query, int maxTokens = 8000);
|
|
Task<DependencyContext> GetDependencyContextAsync(string symbolName, int maxDepth = 3);
|
|
Task<ChangeImpactContext> AnalyzeChangeImpactAsync(string filePath, int lineNumber);
|
|
Task<CodeRelationshipContext> GetCodeRelationshipsAsync(string symbolName);
|
|
}
|
|
|
|
public class LLMContextService : ILLMContextService
|
|
{
|
|
private readonly IRefactorIQClient _refactorIQClient;
|
|
private readonly AIConfiguration _config;
|
|
private readonly ILogger<LLMContextService> _logger;
|
|
private readonly ConcurrentDictionary<string, LLMContext> _contextCache;
|
|
|
|
public LLMContextService(
|
|
IRefactorIQClient refactorIQClient,
|
|
IOptions<LearningConfiguration> options,
|
|
ILogger<LLMContextService> logger)
|
|
{
|
|
_refactorIQClient = refactorIQClient;
|
|
_config = options.Value.AI;
|
|
_logger = logger;
|
|
_contextCache = new ConcurrentDictionary<string, LLMContext>();
|
|
}
|
|
|
|
public async Task<LLMContext> PrepareContextAsync(string query, int maxTokens = 8000)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Preparing LLM context for query: {Query}", query);
|
|
|
|
var cacheKey = $"context_{query.GetHashCode()}_{maxTokens}";
|
|
if (_contextCache.TryGetValue(cacheKey, out var cachedContext))
|
|
{
|
|
_logger.LogDebug("Returning cached context for query: {Query}", query);
|
|
return cachedContext;
|
|
}
|
|
|
|
var context = new LLMContext
|
|
{
|
|
Query = query,
|
|
MaxTokens = maxTokens,
|
|
GeneratedAt = DateTime.UtcNow
|
|
};
|
|
|
|
// 1. Smart chunking: Break code into semantically coherent pieces
|
|
var relevantChunks = await GetRelevantCodeChunksAsync(query, maxTokens);
|
|
context.CodeChunks = relevantChunks;
|
|
|
|
// 2. Dependency tracking: Find related code that should be included
|
|
var dependencies = await GetDependencyInformationAsync(query);
|
|
context.Dependencies = dependencies;
|
|
|
|
// 3. Code relationship mapping: Find all relationships
|
|
var relationships = await GetCodeRelationshipMappingAsync(query);
|
|
context.Relationships = relationships;
|
|
|
|
// 4. Calculate token usage and optimize
|
|
context = OptimizeContextForTokens(context, maxTokens);
|
|
|
|
// Cache the result
|
|
_contextCache.TryAdd(cacheKey, context);
|
|
|
|
_logger.LogInformation("Generated LLM context with {ChunkCount} chunks, {TokenCount} estimated tokens",
|
|
context.CodeChunks.Count, context.EstimatedTokens);
|
|
|
|
return context;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to prepare LLM context for query: {Query}", query);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<LLMContext> PrepareCodeAnalysisContextAsync(string filePath, string query, int maxTokens = 8000)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Preparing code analysis context for file: {FilePath}", filePath);
|
|
|
|
var context = new LLMContext
|
|
{
|
|
Query = query,
|
|
MaxTokens = maxTokens,
|
|
GeneratedAt = DateTime.UtcNow,
|
|
PrimaryFile = filePath
|
|
};
|
|
|
|
// Parse the target file
|
|
var sourceCode = await File.ReadAllTextAsync(filePath);
|
|
var syntaxTree = CSharpSyntaxTree.ParseText(sourceCode);
|
|
var root = await syntaxTree.GetRootAsync();
|
|
|
|
// Extract key information from the file
|
|
var fileChunk = new CodeChunk
|
|
{
|
|
FilePath = filePath,
|
|
Content = sourceCode,
|
|
Type = CodeChunkType.PrimaryFile,
|
|
Symbols = ExtractSymbolsFromSyntaxTree(root),
|
|
Dependencies = await GetFileDependenciesAsync(filePath),
|
|
EstimatedTokens = EstimateTokenCount(sourceCode)
|
|
};
|
|
|
|
context.CodeChunks.Add(fileChunk);
|
|
|
|
// Add related files based on dependencies
|
|
var relatedFiles = await GetRelatedFilesAsync(filePath, maxTokens / 2);
|
|
context.CodeChunks.AddRange(relatedFiles);
|
|
|
|
// Optimize for token limit
|
|
context = OptimizeContextForTokens(context, maxTokens);
|
|
|
|
return context;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to prepare code analysis context for file: {FilePath}", filePath);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<DependencyContext> GetDependencyContextAsync(string symbolName, int maxDepth = 3)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Getting dependency context for symbol: {SymbolName}", symbolName);
|
|
|
|
var context = new DependencyContext
|
|
{
|
|
RootSymbol = symbolName,
|
|
MaxDepth = maxDepth,
|
|
Dependencies = new List<DependencyInfo>()
|
|
};
|
|
|
|
// Get all types from RefactorIQ
|
|
var typesResult = await _refactorIQClient.GetTypesAsync();
|
|
if (!typesResult.IsSuccess || typesResult.Data == null)
|
|
{
|
|
_logger.LogWarning("Failed to get types from RefactorIQ for dependency analysis");
|
|
return context;
|
|
}
|
|
|
|
// Find the target symbol
|
|
var targetType = typesResult.Data.FirstOrDefault(t =>
|
|
t.Name.Equals(symbolName, StringComparison.OrdinalIgnoreCase) ||
|
|
t.Members.Any(m => m.Name.Equals(symbolName, StringComparison.OrdinalIgnoreCase)));
|
|
|
|
if (targetType != null)
|
|
{
|
|
await BuildDependencyTreeAsync(context, targetType, 0, maxDepth, new HashSet<string>());
|
|
}
|
|
|
|
return context;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to get dependency context for symbol: {SymbolName}", symbolName);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<ChangeImpactContext> AnalyzeChangeImpactAsync(string filePath, int lineNumber)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Analyzing change impact for {FilePath}:{LineNumber}", filePath, lineNumber);
|
|
|
|
var context = new ChangeImpactContext
|
|
{
|
|
TargetFile = filePath,
|
|
TargetLine = lineNumber,
|
|
PotentiallyAffectedFiles = new List<string>(),
|
|
RiskLevel = "Low"
|
|
};
|
|
|
|
// Parse the file to understand what's at the target line
|
|
var sourceCode = await File.ReadAllTextAsync(filePath);
|
|
var lines = sourceCode.Split('\n');
|
|
|
|
if (lineNumber > 0 && lineNumber <= lines.Length)
|
|
{
|
|
var targetLine = lines[lineNumber - 1];
|
|
|
|
// Analyze what kind of change this might be
|
|
context.ChangeType = AnalyzeChangeType(targetLine);
|
|
|
|
// Find potentially affected files
|
|
context.PotentiallyAffectedFiles = await FindPotentiallyAffectedFilesAsync(filePath, targetLine);
|
|
|
|
// Calculate risk level
|
|
context.RiskLevel = CalculateRiskLevel(context.ChangeType, context.PotentiallyAffectedFiles.Count);
|
|
}
|
|
|
|
return context;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to analyze change impact for {FilePath}:{LineNumber}", filePath, lineNumber);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
public async Task<CodeRelationshipContext> GetCodeRelationshipsAsync(string symbolName)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Getting code relationships for symbol: {SymbolName}", symbolName);
|
|
|
|
var context = new CodeRelationshipContext
|
|
{
|
|
TargetSymbol = symbolName,
|
|
Callers = new List<string>(),
|
|
Callees = new List<string>(),
|
|
Inheritors = new List<string>(),
|
|
Implementers = new List<string>()
|
|
};
|
|
|
|
// Use RefactorIQ to get command/method information
|
|
var commandsResult = await _refactorIQClient.GetCommandsAsync();
|
|
if (commandsResult.IsSuccess && commandsResult.Data != null)
|
|
{
|
|
// Find all references to the symbol
|
|
foreach (var command in commandsResult.Data)
|
|
{
|
|
if (command.Name.Contains(symbolName, StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
// This is simplified - in a real implementation, you'd use call graph analysis
|
|
context.Callees.Add(command.Name);
|
|
}
|
|
}
|
|
}
|
|
|
|
return context;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Failed to get code relationships for symbol: {SymbolName}", symbolName);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
private async Task<List<CodeChunk>> GetRelevantCodeChunksAsync(string query, int maxTokens)
|
|
{
|
|
var chunks = new List<CodeChunk>();
|
|
|
|
// Use semantic search if available
|
|
if (_config.EnableSemanticSearch)
|
|
{
|
|
var searchResult = await _refactorIQClient.SearchSimilarAsync(query, null, _config.MaxSearchResults);
|
|
if (searchResult.IsSuccess && searchResult.Data != null)
|
|
{
|
|
foreach (var result in searchResult.Data.Where(r => r.Score >= _config.MinSimilarityScore))
|
|
{
|
|
var chunk = await CreateChunkFromSearchResult(result);
|
|
if (chunk != null)
|
|
{
|
|
chunks.Add(chunk);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return chunks;
|
|
}
|
|
|
|
private async Task<CodeChunk?> CreateChunkFromSearchResult(VectorSearchResult searchResult)
|
|
{
|
|
try
|
|
{
|
|
if (!File.Exists(searchResult.FilePath))
|
|
return null;
|
|
|
|
var content = await File.ReadAllTextAsync(searchResult.FilePath);
|
|
var lines = content.Split('\n');
|
|
|
|
// Extract context around the target line
|
|
var startLine = Math.Max(0, searchResult.LineStart - 10);
|
|
var endLine = Math.Min(lines.Length - 1, searchResult.LineStart + 20);
|
|
|
|
var chunkContent = string.Join("\n", lines[startLine..endLine]);
|
|
|
|
return new CodeChunk
|
|
{
|
|
FilePath = searchResult.FilePath,
|
|
Content = chunkContent,
|
|
Type = CodeChunkType.RelevantSection,
|
|
LineStart = startLine,
|
|
LineEnd = endLine,
|
|
RelevanceScore = searchResult.Score,
|
|
EstimatedTokens = EstimateTokenCount(chunkContent)
|
|
};
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to create chunk from search result for {FilePath}", searchResult.FilePath);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private List<string> ExtractSymbolsFromSyntaxTree(SyntaxNode root)
|
|
{
|
|
var symbols = new List<string>();
|
|
|
|
var classes = root.DescendantNodes().OfType<ClassDeclarationSyntax>();
|
|
symbols.AddRange(classes.Select(c => c.Identifier.Text));
|
|
|
|
var methods = root.DescendantNodes().OfType<MethodDeclarationSyntax>();
|
|
symbols.AddRange(methods.Select(m => m.Identifier.Text));
|
|
|
|
var properties = root.DescendantNodes().OfType<PropertyDeclarationSyntax>();
|
|
symbols.AddRange(properties.Select(p => p.Identifier.Text));
|
|
|
|
return symbols.Distinct().ToList();
|
|
}
|
|
|
|
private async Task<List<string>> GetFileDependenciesAsync(string filePath)
|
|
{
|
|
try
|
|
{
|
|
var content = await File.ReadAllTextAsync(filePath);
|
|
var syntaxTree = CSharpSyntaxTree.ParseText(content);
|
|
var root = await syntaxTree.GetRootAsync();
|
|
|
|
var usingDirectives = root.DescendantNodes().OfType<UsingDirectiveSyntax>();
|
|
return usingDirectives.Select(u => u.Name?.ToString() ?? "").Where(n => !string.IsNullOrEmpty(n)).ToList();
|
|
}
|
|
catch
|
|
{
|
|
return new List<string>();
|
|
}
|
|
}
|
|
|
|
private async Task<List<CodeChunk>> GetRelatedFilesAsync(string primaryFile, int maxTokens)
|
|
{
|
|
var relatedChunks = new List<CodeChunk>();
|
|
var remainingTokens = maxTokens;
|
|
|
|
try
|
|
{
|
|
var dependencies = await GetFileDependenciesAsync(primaryFile);
|
|
var projectDirectory = Path.GetDirectoryName(primaryFile) ?? "";
|
|
|
|
foreach (var dependency in dependencies.Take(5)) // Limit to top 5 dependencies
|
|
{
|
|
if (remainingTokens <= 0) break;
|
|
|
|
// Try to find files that might contain this dependency
|
|
var relatedFiles = Directory.GetFiles(projectDirectory, "*.cs", SearchOption.AllDirectories)
|
|
.Where(f => Path.GetFileNameWithoutExtension(f).Contains(dependency.Split('.').Last(), StringComparison.OrdinalIgnoreCase))
|
|
.Take(2);
|
|
|
|
foreach (var relatedFile in relatedFiles)
|
|
{
|
|
if (remainingTokens <= 0) break;
|
|
if (relatedFile.Equals(primaryFile, StringComparison.OrdinalIgnoreCase)) continue;
|
|
|
|
var content = await File.ReadAllTextAsync(relatedFile);
|
|
var tokenCount = EstimateTokenCount(content);
|
|
|
|
if (tokenCount <= remainingTokens)
|
|
{
|
|
relatedChunks.Add(new CodeChunk
|
|
{
|
|
FilePath = relatedFile,
|
|
Content = content,
|
|
Type = CodeChunkType.RelatedFile,
|
|
EstimatedTokens = tokenCount
|
|
});
|
|
remainingTokens -= tokenCount;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to get related files for {PrimaryFile}", primaryFile);
|
|
}
|
|
|
|
return relatedChunks;
|
|
}
|
|
|
|
private async Task BuildDependencyTreeAsync(DependencyContext context, object targetType, int currentDepth, int maxDepth, HashSet<string> visited)
|
|
{
|
|
// Implementation would build a dependency tree using RefactorIQ data
|
|
// This is a simplified version
|
|
await Task.CompletedTask;
|
|
}
|
|
|
|
private string AnalyzeChangeType(string line)
|
|
{
|
|
line = line.Trim();
|
|
|
|
if (line.Contains("public class") || line.Contains("public interface"))
|
|
return "TypeDeclaration";
|
|
if (line.Contains("public") && (line.Contains("(") || line.Contains("=>")))
|
|
return "PublicMember";
|
|
if (line.Contains("private") || line.Contains("internal"))
|
|
return "PrivateMember";
|
|
if (line.Contains("using"))
|
|
return "UsingDirective";
|
|
|
|
return "CodeChange";
|
|
}
|
|
|
|
private async Task<List<string>> FindPotentiallyAffectedFilesAsync(string filePath, string targetLine)
|
|
{
|
|
var affectedFiles = new List<string>();
|
|
|
|
try
|
|
{
|
|
// Simple implementation - in practice, this would use sophisticated dependency analysis
|
|
var projectDirectory = Path.GetDirectoryName(filePath) ?? "";
|
|
var allFiles = Directory.GetFiles(projectDirectory, "*.cs", SearchOption.AllDirectories);
|
|
|
|
foreach (var file in allFiles.Take(10)) // Limit for performance
|
|
{
|
|
if (file.Equals(filePath, StringComparison.OrdinalIgnoreCase)) continue;
|
|
|
|
var content = await File.ReadAllTextAsync(file);
|
|
|
|
// Look for potential references (simplified)
|
|
var symbols = ExtractPotentialSymbols(targetLine);
|
|
if (symbols.Any(symbol => content.Contains(symbol)))
|
|
{
|
|
affectedFiles.Add(file);
|
|
}
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex, "Failed to find potentially affected files for {FilePath}", filePath);
|
|
}
|
|
|
|
return affectedFiles;
|
|
}
|
|
|
|
private List<string> ExtractPotentialSymbols(string line)
|
|
{
|
|
// Extract potential method names, class names, etc. from the line
|
|
var symbols = new List<string>();
|
|
var words = line.Split(' ', '(', ')', '{', '}', ';', ',', '.')
|
|
.Where(w => !string.IsNullOrWhiteSpace(w) && w.Length > 2)
|
|
.ToList();
|
|
|
|
symbols.AddRange(words);
|
|
return symbols;
|
|
}
|
|
|
|
private string CalculateRiskLevel(string changeType, int affectedFileCount)
|
|
{
|
|
return changeType switch
|
|
{
|
|
"TypeDeclaration" => affectedFileCount > 5 ? "High" : "Medium",
|
|
"PublicMember" => affectedFileCount > 3 ? "High" : "Medium",
|
|
"PrivateMember" => "Low",
|
|
"UsingDirective" => "Low",
|
|
_ => affectedFileCount > 2 ? "Medium" : "Low"
|
|
};
|
|
}
|
|
|
|
private LLMContext OptimizeContextForTokens(LLMContext context, int maxTokens)
|
|
{
|
|
context.EstimatedTokens = context.CodeChunks.Sum(c => c.EstimatedTokens);
|
|
|
|
if (context.EstimatedTokens <= maxTokens)
|
|
return context;
|
|
|
|
// Remove least relevant chunks first
|
|
var sortedChunks = context.CodeChunks
|
|
.OrderByDescending(c => c.RelevanceScore)
|
|
.ThenBy(c => c.Type == CodeChunkType.PrimaryFile ? 0 : 1)
|
|
.ToList();
|
|
|
|
var optimizedChunks = new List<CodeChunk>();
|
|
var currentTokens = 0;
|
|
|
|
foreach (var chunk in sortedChunks)
|
|
{
|
|
if (currentTokens + chunk.EstimatedTokens <= maxTokens)
|
|
{
|
|
optimizedChunks.Add(chunk);
|
|
currentTokens += chunk.EstimatedTokens;
|
|
}
|
|
}
|
|
|
|
context.CodeChunks = optimizedChunks;
|
|
context.EstimatedTokens = currentTokens;
|
|
|
|
return context;
|
|
}
|
|
|
|
private int EstimateTokenCount(string text)
|
|
{
|
|
// Rough estimation: ~4 characters per token for code
|
|
return text.Length / 4;
|
|
}
|
|
|
|
private async Task<List<DependencyInfo>> GetDependencyInformationAsync(string query)
|
|
{
|
|
// Simplified implementation
|
|
return new List<DependencyInfo>();
|
|
}
|
|
|
|
private async Task<List<CodeRelationship>> GetCodeRelationshipMappingAsync(string query)
|
|
{
|
|
// Simplified implementation
|
|
return new List<CodeRelationship>();
|
|
}
|
|
}
|
|
|
|
// Supporting classes for LLM context
|
|
public class LLMContext
|
|
{
|
|
public string Query { get; set; } = string.Empty;
|
|
public int MaxTokens { get; set; }
|
|
public int EstimatedTokens { get; set; }
|
|
public DateTime GeneratedAt { get; set; }
|
|
public string? PrimaryFile { get; set; }
|
|
public List<CodeChunk> CodeChunks { get; set; } = new();
|
|
public List<DependencyInfo> Dependencies { get; set; } = new();
|
|
public List<CodeRelationship> Relationships { get; set; } = new();
|
|
}
|
|
|
|
public class CodeChunk
|
|
{
|
|
public string FilePath { get; set; } = string.Empty;
|
|
public string Content { get; set; } = string.Empty;
|
|
public CodeChunkType Type { get; set; }
|
|
public int LineStart { get; set; }
|
|
public int LineEnd { get; set; }
|
|
public float RelevanceScore { get; set; } = 1.0f;
|
|
public int EstimatedTokens { get; set; }
|
|
public List<string> Symbols { get; set; } = new();
|
|
public List<string> Dependencies { get; set; } = new();
|
|
}
|
|
|
|
public enum CodeChunkType
|
|
{
|
|
PrimaryFile,
|
|
RelatedFile,
|
|
RelevantSection,
|
|
DependencyFile
|
|
}
|
|
|
|
public class DependencyContext
|
|
{
|
|
public string RootSymbol { get; set; } = string.Empty;
|
|
public int MaxDepth { get; set; }
|
|
public List<DependencyInfo> Dependencies { get; set; } = new();
|
|
}
|
|
|
|
public class DependencyInfo
|
|
{
|
|
public string Name { get; set; } = string.Empty;
|
|
public string Type { get; set; } = string.Empty;
|
|
public int Depth { get; set; }
|
|
public string FilePath { get; set; } = string.Empty;
|
|
}
|
|
|
|
public class ChangeImpactContext
|
|
{
|
|
public string TargetFile { get; set; } = string.Empty;
|
|
public int TargetLine { get; set; }
|
|
public string ChangeType { get; set; } = string.Empty;
|
|
public List<string> PotentiallyAffectedFiles { get; set; } = new();
|
|
public string RiskLevel { get; set; } = string.Empty;
|
|
}
|
|
|
|
public class CodeRelationshipContext
|
|
{
|
|
public string TargetSymbol { get; set; } = string.Empty;
|
|
public List<string> Callers { get; set; } = new();
|
|
public List<string> Callees { get; set; } = new();
|
|
public List<string> Inheritors { get; set; } = new();
|
|
public List<string> Implementers { get; set; } = new();
|
|
}
|
|
|
|
public class CodeRelationship
|
|
{
|
|
public string From { get; set; } = string.Empty;
|
|
public string To { get; set; } = string.Empty;
|
|
public string Type { get; set; } = string.Empty;
|
|
}
|
|
} |