MarketAlly.AIPlugin.Extensions/MarketAlly.AIPlugin.Learning/Services/MultiLanguageMethodExtracto...

633 lines
28 KiB
C#
Executable File

using Microsoft.Extensions.Logging;
using System.Text.RegularExpressions;
using System.Text.Json;
using MarketAlly.AIPlugin.Learning.Models;
namespace MarketAlly.AIPlugin.Learning.Services
{
/// <summary>
/// Multi-language method extraction service for non-C# languages
/// Provides basic method extraction for JavaScript, TypeScript, Python, Java, etc.
/// </summary>
public interface IMultiLanguageMethodExtractor
{
Task<List<MethodExtraction>> ExtractMethodsFromProjectAsync(string projectPath, string language);
Task<List<MethodExtraction>> ExtractMethodsFromFileAsync(string filePath, string language);
Task<ProjectAnalysisResult> AnalyzeProjectStructureAsync(string projectPath);
List<string> GetSupportedLanguages();
}
public class MultiLanguageMethodExtractor : IMultiLanguageMethodExtractor
{
private readonly ILogger<MultiLanguageMethodExtractor> _logger;
private readonly Dictionary<string, LanguageExtractor> _extractors;
public MultiLanguageMethodExtractor(ILogger<MultiLanguageMethodExtractor> logger)
{
_logger = logger;
_extractors = new Dictionary<string, LanguageExtractor>
{
["javascript"] = new JavaScriptExtractor(logger),
["typescript"] = new TypeScriptExtractor(logger),
["python"] = new PythonExtractor(logger),
["java"] = new JavaExtractor(logger),
["php"] = new PhpExtractor(logger),
["ruby"] = new RubyExtractor(logger),
["go"] = new GoExtractor(logger)
};
}
public List<string> GetSupportedLanguages()
{
return _extractors.Keys.ToList();
}
public async Task<List<MethodExtraction>> ExtractMethodsFromProjectAsync(string projectPath, string language)
{
try
{
_logger.LogInformation("Extracting methods from project: {ProjectPath} for language: {Language}", projectPath, language);
if (!_extractors.TryGetValue(language.ToLower(), out var extractor))
{
_logger.LogWarning("Unsupported language: {Language}", language);
return new List<MethodExtraction>();
}
var files = extractor.GetRelevantFiles(projectPath);
var allMethods = new List<MethodExtraction>();
foreach (var file in files)
{
try
{
var methods = await ExtractMethodsFromFileAsync(file, language);
allMethods.AddRange(methods);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to extract methods from file: {FilePath}", file);
}
}
_logger.LogInformation("Extracted {Count} methods from {FileCount} files in project: {ProjectPath}",
allMethods.Count, files.Count, projectPath);
return allMethods;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error extracting methods from project: {ProjectPath} for language: {Language}", projectPath, language);
throw;
}
}
public async Task<List<MethodExtraction>> ExtractMethodsFromFileAsync(string filePath, string language)
{
try
{
if (!File.Exists(filePath))
{
_logger.LogWarning("File not found: {FilePath}", filePath);
return new List<MethodExtraction>();
}
if (!_extractors.TryGetValue(language.ToLower(), out var extractor))
{
_logger.LogWarning("Unsupported language: {Language}", language);
return new List<MethodExtraction>();
}
var content = await File.ReadAllTextAsync(filePath);
var methods = extractor.ExtractMethods(content, filePath);
_logger.LogDebug("Extracted {Count} methods from file: {FilePath}", methods.Count, filePath);
return methods;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error extracting methods from file: {FilePath} for language: {Language}", filePath, language);
return new List<MethodExtraction>();
}
}
public async Task<ProjectAnalysisResult> AnalyzeProjectStructureAsync(string projectPath)
{
try
{
_logger.LogInformation("Analyzing project structure: {ProjectPath}", projectPath);
var result = new ProjectAnalysisResult
{
ProjectPath = projectPath,
Languages = new Dictionary<string, int>(),
Files = new Dictionary<string, List<string>>(),
Frameworks = new List<DetectedFramework>(),
AnalyzedAt = DateTime.UtcNow
};
foreach (var language in _extractors.Keys)
{
var extractor = _extractors[language];
var files = extractor.GetRelevantFiles(projectPath);
if (files.Any())
{
result.Languages[language] = files.Count;
result.Files[language] = files.Select(Path.GetFileName).ToList();
var frameworks = extractor.DetectFrameworks(projectPath, files);
result.Frameworks.AddRange(frameworks);
}
}
result.PrimaryLanguage = result.Languages.OrderByDescending(l => l.Value).FirstOrDefault().Key;
result.Frameworks = result.Frameworks.Distinct().ToList();
_logger.LogInformation("Project analysis complete: {ProjectPath} - Primary language: {PrimaryLanguage}, {FileCount} files across {LanguageCount} languages",
projectPath, result.PrimaryLanguage, result.Languages.Values.Sum(), result.Languages.Count);
return result;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error analyzing project structure: {ProjectPath}", projectPath);
throw;
}
}
}
// Base class for language-specific extractors
public abstract partial class LanguageExtractor
{
protected readonly ILogger _logger;
protected LanguageExtractor(ILogger logger)
{
_logger = logger;
}
public abstract List<string> GetRelevantFiles(string projectPath);
public abstract List<MethodExtraction> ExtractMethods(string content, string filePath);
public abstract List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files);
protected abstract string GetLanguageName();
}
// JavaScript/Node.js extractor
public class JavaScriptExtractor : LanguageExtractor
{
public JavaScriptExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "javascript";
public override List<string> GetRelevantFiles(string projectPath)
{
return Directory.GetFiles(projectPath, "*.js", SearchOption.AllDirectories)
.Where(f => !f.Contains("node_modules") && !f.Contains("dist") && !f.Contains("build"))
.ToList();
}
public override List<MethodExtraction> ExtractMethods(string content, string filePath)
{
var methods = new List<MethodExtraction>();
// Function declarations: function name(params) { }
var functionPattern = @"function\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(([^)]*)\)\s*\{";
var functionMatches = Regex.Matches(content, functionPattern, RegexOptions.Multiline);
foreach (Match match in functionMatches)
{
methods.Add(new MethodExtraction
{
Name = match.Groups[1].Value,
Parameters = ParseJavaScriptParameters(match.Groups[2].Value),
FilePath = filePath,
Language = GetLanguageName(),
LineNumber = GetLineNumber(content, match.Index),
Signature = match.Value,
Type = "function"
});
}
// Arrow functions: const name = (params) => { }
var arrowPattern = @"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*\(([^)]*)\)\s*=>";
var arrowMatches = Regex.Matches(content, arrowPattern, RegexOptions.Multiline);
foreach (Match match in arrowMatches)
{
methods.Add(new MethodExtraction
{
Name = match.Groups[1].Value,
Parameters = ParseJavaScriptParameters(match.Groups[2].Value),
FilePath = filePath,
Language = GetLanguageName(),
LineNumber = GetLineNumber(content, match.Index),
Signature = match.Value,
Type = "arrow_function"
});
}
// Method definitions in classes/objects: methodName(params) { }
var methodPattern = @"([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(([^)]*)\)\s*\{";
var methodMatches = Regex.Matches(content, methodPattern, RegexOptions.Multiline);
foreach (Match match in methodMatches)
{
// Skip if it's already captured as a function
if (!functionMatches.Cast<Match>().Any(fm => fm.Index == match.Index))
{
methods.Add(new MethodExtraction
{
Name = match.Groups[1].Value,
Parameters = ParseJavaScriptParameters(match.Groups[2].Value),
FilePath = filePath,
Language = GetLanguageName(),
LineNumber = GetLineNumber(content, match.Index),
Signature = match.Value,
Type = "method"
});
}
}
return methods;
}
public override List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files)
{
var frameworks = new List<DetectedFramework>();
// Check package.json for framework dependencies
var packageJsonPath = Path.Combine(projectPath, "package.json");
if (File.Exists(packageJsonPath))
{
try
{
var packageJson = File.ReadAllText(packageJsonPath);
if (packageJson.Contains("\"react\""))
frameworks.Add(CreateFrameworkInfo("React", "JavaScript", "package.json", packageJsonPath,
"A JavaScript library for building user interfaces, maintained by Facebook",
new[] { "Component-based UI", "Single Page Applications", "State management" }));
if (packageJson.Contains("\"vue\""))
frameworks.Add(CreateFrameworkInfo("Vue.js", "JavaScript", "package.json", packageJsonPath,
"Progressive JavaScript framework for building user interfaces",
new[] { "Progressive web apps", "Component composition", "Reactive data binding" }));
if (packageJson.Contains("\"angular\""))
frameworks.Add(CreateFrameworkInfo("Angular", "TypeScript", "package.json", packageJsonPath,
"TypeScript-based web application framework led by Google",
new[] { "Enterprise applications", "TypeScript development", "Dependency injection" }));
if (packageJson.Contains("\"express\""))
frameworks.Add(CreateFrameworkInfo("Express.js", "JavaScript", "package.json", packageJsonPath,
"Fast, unopinionated, minimalist web framework for Node.js",
new[] { "REST APIs", "Web servers", "Middleware patterns" }));
if (packageJson.Contains("\"next\""))
frameworks.Add(CreateFrameworkInfo("Next.js", "JavaScript", "package.json", packageJsonPath,
"React framework for production with hybrid static & server rendering",
new[] { "Server-side rendering", "Static site generation", "Full-stack React apps" }));
if (packageJson.Contains("\"nuxt\""))
frameworks.Add(CreateFrameworkInfo("Nuxt.js", "JavaScript", "package.json", packageJsonPath,
"Intuitive Vue framework for web applications",
new[] { "Universal Vue applications", "Static generation", "Server-side rendering" }));
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse package.json: {PackageJsonPath}", packageJsonPath);
}
}
return frameworks;
}
private DetectedFramework CreateFrameworkInfo(string name, string language, string detectionMethod, string detectedFile, string documentation, string[] usages)
{
return new DetectedFramework
{
Name = name,
Language = language,
DetectionMethod = detectionMethod,
DetectedFiles = new List<string> { detectedFile },
Documentation = documentation,
CommonUsages = new List<string>(usages)
};
}
protected List<string> ParseJavaScriptParameters(string paramString)
{
if (string.IsNullOrWhiteSpace(paramString))
return new List<string>();
return paramString.Split(',')
.Select(p => p.Trim())
.Where(p => !string.IsNullOrEmpty(p))
.ToList();
}
}
// TypeScript extractor (extends JavaScript)
public class TypeScriptExtractor : JavaScriptExtractor
{
public TypeScriptExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "typescript";
public override List<string> GetRelevantFiles(string projectPath)
{
return Directory.GetFiles(projectPath, "*.ts", SearchOption.AllDirectories)
.Concat(Directory.GetFiles(projectPath, "*.tsx", SearchOption.AllDirectories))
.Where(f => !f.Contains("node_modules") && !f.Contains("dist") && !f.Contains("build"))
.ToList();
}
public override List<MethodExtraction> ExtractMethods(string content, string filePath)
{
var methods = base.ExtractMethods(content, filePath);
// TypeScript-specific: interface method signatures
var interfaceMethodPattern = @"([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(([^)]*)\)\s*:\s*([^;]+);";
var interfaceMatches = Regex.Matches(content, interfaceMethodPattern, RegexOptions.Multiline);
foreach (Match match in interfaceMatches)
{
methods.Add(new MethodExtraction
{
Name = match.Groups[1].Value,
Parameters = ParseJavaScriptParameters(match.Groups[2].Value),
ReturnType = match.Groups[3].Value.Trim(),
FilePath = filePath,
Language = GetLanguageName(),
LineNumber = GetLineNumber(content, match.Index),
Signature = match.Value,
Type = "interface_method"
});
}
return methods;
}
}
// Python extractor
public class PythonExtractor : LanguageExtractor
{
public PythonExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "python";
public override List<string> GetRelevantFiles(string projectPath)
{
return Directory.GetFiles(projectPath, "*.py", SearchOption.AllDirectories)
.Where(f => !f.Contains("__pycache__") && !f.Contains(".venv") && !f.Contains("venv"))
.ToList();
}
public override List<MethodExtraction> ExtractMethods(string content, string filePath)
{
var methods = new List<MethodExtraction>();
// Python function/method definitions: def name(params):
var defPattern = @"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*:";
var defMatches = Regex.Matches(content, defPattern, RegexOptions.Multiline);
foreach (Match match in defMatches)
{
methods.Add(new MethodExtraction
{
Name = match.Groups[1].Value,
Parameters = ParsePythonParameters(match.Groups[2].Value),
FilePath = filePath,
Language = GetLanguageName(),
LineNumber = GetLineNumber(content, match.Index),
Signature = match.Value,
Type = "function"
});
}
return methods;
}
public override List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files)
{
var frameworks = new List<DetectedFramework>();
// Check requirements.txt or setup.py for framework dependencies
var requirementsPath = Path.Combine(projectPath, "requirements.txt");
if (File.Exists(requirementsPath))
{
try
{
var requirements = File.ReadAllText(requirementsPath);
if (requirements.Contains("django"))
frameworks.Add(CreateFrameworkInfo("Django", "Python", "requirements.txt", requirementsPath,
"High-level Python web framework for rapid development",
new[] { "Web applications", "Admin interfaces", "ORM operations" }));
if (requirements.Contains("flask"))
frameworks.Add(CreateFrameworkInfo("Flask", "Python", "requirements.txt", requirementsPath,
"Lightweight WSGI web application framework for Python",
new[] { "Microservices", "REST APIs", "Lightweight web apps" }));
if (requirements.Contains("fastapi"))
frameworks.Add(CreateFrameworkInfo("FastAPI", "Python", "requirements.txt", requirementsPath,
"Modern, fast web framework for building APIs with Python",
new[] { "High-performance APIs", "Async operations", "API documentation" }));
if (requirements.Contains("numpy"))
frameworks.Add(CreateFrameworkInfo("NumPy", "Python", "requirements.txt", requirementsPath,
"Fundamental package for scientific computing with Python",
new[] { "Scientific computing", "Array operations", "Mathematical functions" }));
if (requirements.Contains("pandas"))
frameworks.Add(CreateFrameworkInfo("Pandas", "Python", "requirements.txt", requirementsPath,
"Data analysis and manipulation library for Python",
new[] { "Data analysis", "CSV processing", "Data manipulation" }));
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse requirements.txt: {RequirementsPath}", requirementsPath);
}
}
return frameworks;
}
private DetectedFramework CreateFrameworkInfo(string name, string language, string detectionMethod, string detectedFile, string documentation, string[] usages)
{
return new DetectedFramework
{
Name = name,
Language = language,
DetectionMethod = detectionMethod,
DetectedFiles = new List<string> { detectedFile },
Documentation = documentation,
CommonUsages = new List<string>(usages)
};
}
private List<string> ParsePythonParameters(string paramString)
{
if (string.IsNullOrWhiteSpace(paramString))
return new List<string>();
return paramString.Split(',')
.Select(p => p.Trim().Split('=')[0].Split(':')[0].Trim()) // Handle default values and type hints
.Where(p => !string.IsNullOrEmpty(p))
.ToList();
}
}
// Java extractor
public class JavaExtractor : LanguageExtractor
{
public JavaExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "java";
public override List<string> GetRelevantFiles(string projectPath)
{
return Directory.GetFiles(projectPath, "*.java", SearchOption.AllDirectories)
.Where(f => !f.Contains("target") && !f.Contains("build"))
.ToList();
}
public override List<MethodExtraction> ExtractMethods(string content, string filePath)
{
var methods = new List<MethodExtraction>();
// Java method definitions: [modifiers] returnType methodName(params) {
var methodPattern = @"(?:public|private|protected|static|\s)+\s+(\w+)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(([^)]*)\)\s*\{";
var methodMatches = Regex.Matches(content, methodPattern, RegexOptions.Multiline);
foreach (Match match in methodMatches)
{
methods.Add(new MethodExtraction
{
Name = match.Groups[2].Value,
ReturnType = match.Groups[1].Value,
Parameters = ParseJavaParameters(match.Groups[3].Value),
FilePath = filePath,
Language = GetLanguageName(),
LineNumber = GetLineNumber(content, match.Index),
Signature = match.Value,
Type = "method"
});
}
return methods;
}
public override List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files)
{
var frameworks = new List<DetectedFramework>();
// Check pom.xml for Maven dependencies
var pomPath = Path.Combine(projectPath, "pom.xml");
if (File.Exists(pomPath))
{
try
{
var pom = File.ReadAllText(pomPath);
if (pom.Contains("spring-boot"))
frameworks.Add(CreateFrameworkInfo("Spring Boot", "Java", "pom.xml", pomPath,
"Java-based framework for creating production-ready applications",
new[] { "Microservices", "Enterprise applications", "Auto-configuration" }));
if (pom.Contains("spring-framework"))
frameworks.Add(CreateFrameworkInfo("Spring Framework", "Java", "pom.xml", pomPath,
"Comprehensive programming and configuration model for Java",
new[] { "Dependency injection", "AOP programming", "Enterprise integration" }));
if (pom.Contains("hibernate"))
frameworks.Add(CreateFrameworkInfo("Hibernate", "Java", "pom.xml", pomPath,
"Object-relational mapping framework for Java",
new[] { "ORM mapping", "Database abstraction", "JPA implementation" }));
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse pom.xml: {PomPath}", pomPath);
}
}
return frameworks;
}
private DetectedFramework CreateFrameworkInfo(string name, string language, string detectionMethod, string detectedFile, string documentation, string[] usages)
{
return new DetectedFramework
{
Name = name,
Language = language,
DetectionMethod = detectionMethod,
DetectedFiles = new List<string> { detectedFile },
Documentation = documentation,
CommonUsages = new List<string>(usages)
};
}
private List<string> ParseJavaParameters(string paramString)
{
if (string.IsNullOrWhiteSpace(paramString))
return new List<string>();
return paramString.Split(',')
.Select(p => p.Trim().Split(' ').LastOrDefault()?.Trim()) // Get parameter name (last part)
.Where(p => !string.IsNullOrEmpty(p))
.ToList();
}
}
// Placeholder extractors for other languages
public class PhpExtractor : LanguageExtractor
{
public PhpExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "php";
public override List<string> GetRelevantFiles(string projectPath) =>
Directory.GetFiles(projectPath, "*.php", SearchOption.AllDirectories).ToList();
public override List<MethodExtraction> ExtractMethods(string content, string filePath) => new();
public override List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files) => new();
}
public class RubyExtractor : LanguageExtractor
{
public RubyExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "ruby";
public override List<string> GetRelevantFiles(string projectPath) =>
Directory.GetFiles(projectPath, "*.rb", SearchOption.AllDirectories).ToList();
public override List<MethodExtraction> ExtractMethods(string content, string filePath) => new();
public override List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files) => new();
}
public class GoExtractor : LanguageExtractor
{
public GoExtractor(ILogger logger) : base(logger) { }
protected override string GetLanguageName() => "go";
public override List<string> GetRelevantFiles(string projectPath) =>
Directory.GetFiles(projectPath, "*.go", SearchOption.AllDirectories).ToList();
public override List<MethodExtraction> ExtractMethods(string content, string filePath) => new();
public override List<DetectedFramework> DetectFrameworks(string projectPath, List<string> files) => new();
}
// Helper method for all extractors
public static class ExtractionHelpers
{
public static int GetLineNumber(string content, int index)
{
return content.Substring(0, index).Count(c => c == '\n') + 1;
}
}
// Extension to add line number calculation to base class
public abstract partial class LanguageExtractor
{
protected int GetLineNumber(string content, int index)
{
return ExtractionHelpers.GetLineNumber(content, index);
}
}
}