using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
using MarketAlly.AIPlugin.Context.Configuration;
using System.Text;
namespace MarketAlly.AIPlugin.Context.Search
{
///
/// Enhanced search engine with semantic search and fuzzy matching capabilities
///
public class EnhancedSearchEngine
{
private readonly ContextConfiguration _configuration;
private readonly ILogger _logger;
private readonly SemanticSearchEnhancer? _semanticSearch;
private readonly FuzzyMatcher _fuzzyMatcher;
public EnhancedSearchEngine(
ContextConfiguration configuration,
ILogger logger,
SemanticSearchEnhancer? semanticSearch = null)
{
_configuration = configuration;
_logger = logger;
_semanticSearch = semanticSearch;
_fuzzyMatcher = new FuzzyMatcher(configuration.Search.FuzzyMatchingThreshold);
}
///
/// Performs enhanced search with semantic understanding and fuzzy matching
///
public async Task SearchAsync(
string query,
IEnumerable entries,
CancellationToken cancellationToken = default)
{
var startTime = DateTime.UtcNow;
var results = new List();
var queryTerms = ExtractSearchTerms(query);
_logger.LogDebug("Starting enhanced search for query: {Query} with {EntryCount} entries",
query, entries.Count());
foreach (var entry in entries)
{
if (cancellationToken.IsCancellationRequested)
break;
var searchResult = await AnalyzeEntryAsync(query, queryTerms, entry, cancellationToken);
if (searchResult.TotalRelevance > 0)
{
results.Add(searchResult);
}
}
// Sort by relevance score
results = results.OrderByDescending(r => r.TotalRelevance)
.ThenByDescending(r => r.Entry.Timestamp)
.Take(_configuration.Search.MaxSearchResults)
.ToList();
var searchDuration = DateTime.UtcNow - startTime;
_logger.LogInformation("Enhanced search completed in {Duration}ms, found {ResultCount} relevant entries",
searchDuration.TotalMilliseconds, results.Count);
return new EnhancedSearchResults
{
Query = query,
Results = results,
TotalFound = results.Count,
SearchDuration = searchDuration,
UsedSemanticSearch = _semanticSearch != null && _configuration.Search.EnableSemanticSearch,
UsedFuzzyMatching = _configuration.Search.EnableFuzzyMatching
};
}
///
/// Analyzes a single context entry for relevance to the search query
///
private async Task AnalyzeEntryAsync(
string query,
List queryTerms,
StoredContextEntry entry,
CancellationToken cancellationToken)
{
var result = new EnhancedSearchResult
{
Entry = entry,
MatchedTerms = new List(),
RelevanceScores = new RelevanceBreakdown()
};
// 1. Exact keyword matching
result.RelevanceScores.KeywordRelevance = CalculateKeywordRelevance(queryTerms, entry, result.MatchedTerms);
// 2. Fuzzy matching (if enabled)
if (_configuration.Search.EnableFuzzyMatching)
{
result.RelevanceScores.FuzzyRelevance = CalculateFuzzyRelevance(query, entry, result.MatchedTerms);
}
// 3. Semantic similarity (if enabled and available)
if (_configuration.Search.EnableSemanticSearch && _semanticSearch != null)
{
try
{
result.RelevanceScores.SemanticRelevance = await _semanticSearch.CalculateSemanticSimilarityAsync(
query, $"{entry.Summary} {entry.Content}", cancellationToken);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to calculate semantic similarity for entry {EntryId}", entry.Id);
}
}
// 4. Context-specific scoring
result.RelevanceScores.ContextRelevance = CalculateContextRelevance(query, entry);
// 5. Recency boost
result.RelevanceScores.RecencyBoost = CalculateRecencyBoost(entry.Timestamp);
// Calculate total relevance
result.TotalRelevance = CalculateTotalRelevance(result.RelevanceScores);
return result;
}
///
/// Calculates keyword-based relevance score
///
private double CalculateKeywordRelevance(List queryTerms, StoredContextEntry entry, List matchedTerms)
{
var relevance = 0.0;
var summaryLower = entry.Summary.ToLower();
var contentLower = entry.Content.ToLower();
var tagsLower = entry.Tags.Select(t => t.ToLower()).ToList();
foreach (var term in queryTerms)
{
var termLower = term.ToLower();
var termRelevance = 0.0;
// Summary matches (highest weight)
if (summaryLower.Contains(termLower))
{
termRelevance += 3.0;
if (!matchedTerms.Contains(term))
matchedTerms.Add(term);
}
// Tag matches (high weight)
if (tagsLower.Any(tag => tag.Contains(termLower)))
{
termRelevance += 2.5;
if (!matchedTerms.Contains(term))
matchedTerms.Add(term);
}
// Content matches (medium weight)
if (contentLower.Contains(termLower))
{
termRelevance += 1.0;
if (!matchedTerms.Contains(term))
matchedTerms.Add(term);
}
// Boost for longer terms (more specific)
if (term.Length > 5)
{
termRelevance *= 1.2;
}
relevance += termRelevance;
}
return relevance;
}
///
/// Calculates fuzzy matching relevance score
///
private double CalculateFuzzyRelevance(string query, StoredContextEntry entry, List matchedTerms)
{
var relevance = 0.0;
// Fuzzy match against summary
var summaryScore = _fuzzyMatcher.CalculateSimilarity(query, entry.Summary);
if (summaryScore > _configuration.Search.FuzzyMatchingThreshold)
{
relevance += summaryScore * 2.0; // High weight for summary matches
}
// Fuzzy match against tags
foreach (var tag in entry.Tags)
{
var tagScore = _fuzzyMatcher.CalculateSimilarity(query, tag);
if (tagScore > _configuration.Search.FuzzyMatchingThreshold)
{
relevance += tagScore * 1.5; // Medium-high weight for tag matches
}
}
// Fuzzy match against content (but limit to prevent overwhelming)
var contentWords = entry.Content.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Take(50) // Only check first 50 words
.ToArray();
foreach (var word in contentWords)
{
var wordScore = _fuzzyMatcher.CalculateSimilarity(query, word);
if (wordScore > _configuration.Search.FuzzyMatchingThreshold)
{
relevance += wordScore * 0.5; // Lower weight for content word matches
}
}
return relevance;
}
///
/// Calculates context-specific relevance based on entry type and priority
///
private double CalculateContextRelevance(string query, StoredContextEntry entry)
{
var relevance = 0.0;
// Priority-based scoring
relevance += entry.Priority.ToLower() switch
{
"critical" => 2.0,
"high" => 1.5,
"medium" => 1.0,
"low" => 0.5,
_ => 1.0
};
// Type-based scoring
relevance += entry.Type.ToLower() switch
{
"decision" => 1.5, // Decisions are important
"milestone" => 1.3, // Milestones are significant
"insight" => 1.2, // Insights are valuable
"codechange" => 1.0, // Code changes are relevant
"conversation" => 0.8, // Conversations are less structured
_ => 1.0
};
return relevance;
}
///
/// Calculates recency boost based on entry timestamp
///
private double CalculateRecencyBoost(DateTime timestamp)
{
var daysSinceCreated = (DateTime.UtcNow - timestamp).TotalDays;
// Recent entries get a boost
return daysSinceCreated switch
{
<= 1 => 1.5, // Last 24 hours
<= 7 => 1.2, // Last week
<= 30 => 1.0, // Last month
<= 90 => 0.8, // Last quarter
_ => 0.6 // Older than 3 months
};
}
///
/// Calculates total relevance score from all components
///
private double CalculateTotalRelevance(RelevanceBreakdown scores)
{
var total = scores.KeywordRelevance * 0.4; // 40% weight
total += scores.FuzzyRelevance * 0.2; // 20% weight
total += scores.SemanticRelevance * 0.25; // 25% weight
total += scores.ContextRelevance * 0.1; // 10% weight
total += scores.RecencyBoost * 0.05; // 5% weight
return total;
}
///
/// Extracts meaningful terms from the search query
///
private List ExtractSearchTerms(string query)
{
var terms = new List();
// Split by common delimiters and clean up
var rawTerms = Regex.Split(query.ToLower(), @"[\s,;.!?]+")
.Where(t => t.Length > 2) // Ignore very short terms
.Where(t => !IsStopWord(t))
.ToList();
terms.AddRange(rawTerms);
// Also add quoted phrases
var quotedPhrases = Regex.Matches(query, @"""([^""]+)""")
.Cast()
.Select(m => m.Groups[1].Value.ToLower())
.Where(p => p.Length > 2);
terms.AddRange(quotedPhrases);
// Add the full query for exact phrase matching (if long enough)
if (query.Length > 5)
{
terms.Add(query.ToLower());
}
return terms.Distinct().ToList();
}
///
/// Checks if a word is a common stop word
///
private bool IsStopWord(string word)
{
var stopWords = new HashSet
{
"the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
"is", "are", "was", "were", "be", "been", "have", "has", "had", "do", "does", "did",
"will", "would", "could", "should", "may", "might", "can", "this", "that", "these", "those",
"a", "an", "as", "if", "then", "than", "when", "where", "why", "how", "what", "who", "which"
};
return stopWords.Contains(word);
}
}
///
/// Enhanced search results with detailed scoring information
///
public class EnhancedSearchResults
{
public string Query { get; set; } = "";
public List Results { get; set; } = new();
public int TotalFound { get; set; }
public TimeSpan SearchDuration { get; set; }
public bool UsedSemanticSearch { get; set; }
public bool UsedFuzzyMatching { get; set; }
}
///
/// Individual search result with detailed relevance scoring
///
public class EnhancedSearchResult
{
public StoredContextEntry Entry { get; set; } = new();
public List MatchedTerms { get; set; } = new();
public RelevanceBreakdown RelevanceScores { get; set; } = new();
public double TotalRelevance { get; set; }
}
///
/// Breakdown of relevance scoring components
///
public class RelevanceBreakdown
{
public double KeywordRelevance { get; set; }
public double FuzzyRelevance { get; set; }
public double SemanticRelevance { get; set; }
public double ContextRelevance { get; set; }
public double RecencyBoost { get; set; }
}
}