using System.Text.RegularExpressions; using Microsoft.Extensions.Logging; using MarketAlly.AIPlugin.Context.Configuration; using System.Text; namespace MarketAlly.AIPlugin.Context.Search { /// /// Enhanced search engine with semantic search and fuzzy matching capabilities /// public class EnhancedSearchEngine { private readonly ContextConfiguration _configuration; private readonly ILogger _logger; private readonly SemanticSearchEnhancer? _semanticSearch; private readonly FuzzyMatcher _fuzzyMatcher; public EnhancedSearchEngine( ContextConfiguration configuration, ILogger logger, SemanticSearchEnhancer? semanticSearch = null) { _configuration = configuration; _logger = logger; _semanticSearch = semanticSearch; _fuzzyMatcher = new FuzzyMatcher(configuration.Search.FuzzyMatchingThreshold); } /// /// Performs enhanced search with semantic understanding and fuzzy matching /// public async Task SearchAsync( string query, IEnumerable entries, CancellationToken cancellationToken = default) { var startTime = DateTime.UtcNow; var results = new List(); var queryTerms = ExtractSearchTerms(query); _logger.LogDebug("Starting enhanced search for query: {Query} with {EntryCount} entries", query, entries.Count()); foreach (var entry in entries) { if (cancellationToken.IsCancellationRequested) break; var searchResult = await AnalyzeEntryAsync(query, queryTerms, entry, cancellationToken); if (searchResult.TotalRelevance > 0) { results.Add(searchResult); } } // Sort by relevance score results = results.OrderByDescending(r => r.TotalRelevance) .ThenByDescending(r => r.Entry.Timestamp) .Take(_configuration.Search.MaxSearchResults) .ToList(); var searchDuration = DateTime.UtcNow - startTime; _logger.LogInformation("Enhanced search completed in {Duration}ms, found {ResultCount} relevant entries", searchDuration.TotalMilliseconds, results.Count); return new EnhancedSearchResults { Query = query, Results = results, TotalFound = results.Count, SearchDuration = searchDuration, UsedSemanticSearch = _semanticSearch != null && _configuration.Search.EnableSemanticSearch, UsedFuzzyMatching = _configuration.Search.EnableFuzzyMatching }; } /// /// Analyzes a single context entry for relevance to the search query /// private async Task AnalyzeEntryAsync( string query, List queryTerms, StoredContextEntry entry, CancellationToken cancellationToken) { var result = new EnhancedSearchResult { Entry = entry, MatchedTerms = new List(), RelevanceScores = new RelevanceBreakdown() }; // 1. Exact keyword matching result.RelevanceScores.KeywordRelevance = CalculateKeywordRelevance(queryTerms, entry, result.MatchedTerms); // 2. Fuzzy matching (if enabled) if (_configuration.Search.EnableFuzzyMatching) { result.RelevanceScores.FuzzyRelevance = CalculateFuzzyRelevance(query, entry, result.MatchedTerms); } // 3. Semantic similarity (if enabled and available) if (_configuration.Search.EnableSemanticSearch && _semanticSearch != null) { try { result.RelevanceScores.SemanticRelevance = await _semanticSearch.CalculateSemanticSimilarityAsync( query, $"{entry.Summary} {entry.Content}", cancellationToken); } catch (Exception ex) { _logger.LogWarning(ex, "Failed to calculate semantic similarity for entry {EntryId}", entry.Id); } } // 4. Context-specific scoring result.RelevanceScores.ContextRelevance = CalculateContextRelevance(query, entry); // 5. Recency boost result.RelevanceScores.RecencyBoost = CalculateRecencyBoost(entry.Timestamp); // Calculate total relevance result.TotalRelevance = CalculateTotalRelevance(result.RelevanceScores); return result; } /// /// Calculates keyword-based relevance score /// private double CalculateKeywordRelevance(List queryTerms, StoredContextEntry entry, List matchedTerms) { var relevance = 0.0; var summaryLower = entry.Summary.ToLower(); var contentLower = entry.Content.ToLower(); var tagsLower = entry.Tags.Select(t => t.ToLower()).ToList(); foreach (var term in queryTerms) { var termLower = term.ToLower(); var termRelevance = 0.0; // Summary matches (highest weight) if (summaryLower.Contains(termLower)) { termRelevance += 3.0; if (!matchedTerms.Contains(term)) matchedTerms.Add(term); } // Tag matches (high weight) if (tagsLower.Any(tag => tag.Contains(termLower))) { termRelevance += 2.5; if (!matchedTerms.Contains(term)) matchedTerms.Add(term); } // Content matches (medium weight) if (contentLower.Contains(termLower)) { termRelevance += 1.0; if (!matchedTerms.Contains(term)) matchedTerms.Add(term); } // Boost for longer terms (more specific) if (term.Length > 5) { termRelevance *= 1.2; } relevance += termRelevance; } return relevance; } /// /// Calculates fuzzy matching relevance score /// private double CalculateFuzzyRelevance(string query, StoredContextEntry entry, List matchedTerms) { var relevance = 0.0; // Fuzzy match against summary var summaryScore = _fuzzyMatcher.CalculateSimilarity(query, entry.Summary); if (summaryScore > _configuration.Search.FuzzyMatchingThreshold) { relevance += summaryScore * 2.0; // High weight for summary matches } // Fuzzy match against tags foreach (var tag in entry.Tags) { var tagScore = _fuzzyMatcher.CalculateSimilarity(query, tag); if (tagScore > _configuration.Search.FuzzyMatchingThreshold) { relevance += tagScore * 1.5; // Medium-high weight for tag matches } } // Fuzzy match against content (but limit to prevent overwhelming) var contentWords = entry.Content.Split(' ', StringSplitOptions.RemoveEmptyEntries) .Take(50) // Only check first 50 words .ToArray(); foreach (var word in contentWords) { var wordScore = _fuzzyMatcher.CalculateSimilarity(query, word); if (wordScore > _configuration.Search.FuzzyMatchingThreshold) { relevance += wordScore * 0.5; // Lower weight for content word matches } } return relevance; } /// /// Calculates context-specific relevance based on entry type and priority /// private double CalculateContextRelevance(string query, StoredContextEntry entry) { var relevance = 0.0; // Priority-based scoring relevance += entry.Priority.ToLower() switch { "critical" => 2.0, "high" => 1.5, "medium" => 1.0, "low" => 0.5, _ => 1.0 }; // Type-based scoring relevance += entry.Type.ToLower() switch { "decision" => 1.5, // Decisions are important "milestone" => 1.3, // Milestones are significant "insight" => 1.2, // Insights are valuable "codechange" => 1.0, // Code changes are relevant "conversation" => 0.8, // Conversations are less structured _ => 1.0 }; return relevance; } /// /// Calculates recency boost based on entry timestamp /// private double CalculateRecencyBoost(DateTime timestamp) { var daysSinceCreated = (DateTime.UtcNow - timestamp).TotalDays; // Recent entries get a boost return daysSinceCreated switch { <= 1 => 1.5, // Last 24 hours <= 7 => 1.2, // Last week <= 30 => 1.0, // Last month <= 90 => 0.8, // Last quarter _ => 0.6 // Older than 3 months }; } /// /// Calculates total relevance score from all components /// private double CalculateTotalRelevance(RelevanceBreakdown scores) { var total = scores.KeywordRelevance * 0.4; // 40% weight total += scores.FuzzyRelevance * 0.2; // 20% weight total += scores.SemanticRelevance * 0.25; // 25% weight total += scores.ContextRelevance * 0.1; // 10% weight total += scores.RecencyBoost * 0.05; // 5% weight return total; } /// /// Extracts meaningful terms from the search query /// private List ExtractSearchTerms(string query) { var terms = new List(); // Split by common delimiters and clean up var rawTerms = Regex.Split(query.ToLower(), @"[\s,;.!?]+") .Where(t => t.Length > 2) // Ignore very short terms .Where(t => !IsStopWord(t)) .ToList(); terms.AddRange(rawTerms); // Also add quoted phrases var quotedPhrases = Regex.Matches(query, @"""([^""]+)""") .Cast() .Select(m => m.Groups[1].Value.ToLower()) .Where(p => p.Length > 2); terms.AddRange(quotedPhrases); // Add the full query for exact phrase matching (if long enough) if (query.Length > 5) { terms.Add(query.ToLower()); } return terms.Distinct().ToList(); } /// /// Checks if a word is a common stop word /// private bool IsStopWord(string word) { var stopWords = new HashSet { "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "is", "are", "was", "were", "be", "been", "have", "has", "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", "this", "that", "these", "those", "a", "an", "as", "if", "then", "than", "when", "where", "why", "how", "what", "who", "which" }; return stopWords.Contains(word); } } /// /// Enhanced search results with detailed scoring information /// public class EnhancedSearchResults { public string Query { get; set; } = ""; public List Results { get; set; } = new(); public int TotalFound { get; set; } public TimeSpan SearchDuration { get; set; } public bool UsedSemanticSearch { get; set; } public bool UsedFuzzyMatching { get; set; } } /// /// Individual search result with detailed relevance scoring /// public class EnhancedSearchResult { public StoredContextEntry Entry { get; set; } = new(); public List MatchedTerms { get; set; } = new(); public RelevanceBreakdown RelevanceScores { get; set; } = new(); public double TotalRelevance { get; set; } } /// /// Breakdown of relevance scoring components /// public class RelevanceBreakdown { public double KeywordRelevance { get; set; } public double FuzzyRelevance { get; set; } public double SemanticRelevance { get; set; } public double ContextRelevance { get; set; } public double RecencyBoost { get; set; } } }