using System.Text; namespace MarketAlly.AIPlugin.Context.Search { /// /// Provides fuzzy string matching capabilities for search operations /// public class FuzzyMatcher { private readonly double _threshold; public FuzzyMatcher(double threshold = 0.7) { _threshold = threshold; } /// /// Calculates similarity between two strings using multiple algorithms /// public double CalculateSimilarity(string source, string target) { if (string.IsNullOrEmpty(source) || string.IsNullOrEmpty(target)) return 0.0; // Normalize strings var normalizedSource = NormalizeString(source); var normalizedTarget = NormalizeString(target); // If strings are identical after normalization, return perfect match if (normalizedSource.Equals(normalizedTarget, StringComparison.OrdinalIgnoreCase)) return 1.0; // Combine multiple similarity algorithms for better accuracy var levenshteinRatio = CalculateLevenshteinRatio(normalizedSource, normalizedTarget); var jaroWinklerScore = CalculateJaroWinkler(normalizedSource, normalizedTarget); var tokenSetRatio = CalculateTokenSetRatio(normalizedSource, normalizedTarget); // Weighted combination of different algorithms var combinedScore = (levenshteinRatio * 0.4) + (jaroWinklerScore * 0.4) + (tokenSetRatio * 0.2); return Math.Min(1.0, combinedScore); } /// /// Checks if two strings are similar based on the configured threshold /// public bool AreSimilar(string source, string target) { return CalculateSimilarity(source, target) >= _threshold; } /// /// Finds the best match for a query in a collection of candidates /// public FuzzyMatchResult FindBestMatch(string query, IEnumerable candidates) { var bestMatch = new FuzzyMatchResult { Query = query }; foreach (var candidate in candidates) { var similarity = CalculateSimilarity(query, candidate); if (similarity > bestMatch.Score) { bestMatch.Match = candidate; bestMatch.Score = similarity; } } bestMatch.IsMatch = bestMatch.Score >= _threshold; return bestMatch; } /// /// Finds all matches above the threshold /// public List FindMatches(string query, IEnumerable candidates) { var matches = new List(); foreach (var candidate in candidates) { var similarity = CalculateSimilarity(query, candidate); if (similarity >= _threshold) { matches.Add(new FuzzyMatchResult { Query = query, Match = candidate, Score = similarity, IsMatch = true }); } } return matches.OrderByDescending(m => m.Score).ToList(); } /// /// Normalizes a string for better matching /// private string NormalizeString(string input) { if (string.IsNullOrEmpty(input)) return string.Empty; return input.ToLowerInvariant() .Trim() .Replace(" ", " ") // Remove double spaces .Replace("-", " ") .Replace("_", " "); } /// /// Calculates Levenshtein distance ratio (1 - distance/max_length) /// private double CalculateLevenshteinRatio(string source, string target) { var distance = CalculateLevenshteinDistance(source, target); var maxLength = Math.Max(source.Length, target.Length); if (maxLength == 0) return 1.0; return 1.0 - (double)distance / maxLength; } /// /// Calculates Levenshtein distance between two strings /// private int CalculateLevenshteinDistance(string source, string target) { if (string.IsNullOrEmpty(source)) return target?.Length ?? 0; if (string.IsNullOrEmpty(target)) return source.Length; var sourceLength = source.Length; var targetLength = target.Length; var matrix = new int[sourceLength + 1, targetLength + 1]; // Initialize first row and column for (int i = 0; i <= sourceLength; i++) matrix[i, 0] = i; for (int j = 0; j <= targetLength; j++) matrix[0, j] = j; // Calculate distances for (int i = 1; i <= sourceLength; i++) { for (int j = 1; j <= targetLength; j++) { var cost = source[i - 1] == target[j - 1] ? 0 : 1; matrix[i, j] = Math.Min( Math.Min(matrix[i - 1, j] + 1, matrix[i, j - 1] + 1), matrix[i - 1, j - 1] + cost); } } return matrix[sourceLength, targetLength]; } /// /// Calculates Jaro-Winkler similarity /// private double CalculateJaroWinkler(string source, string target) { var jaroScore = CalculateJaro(source, target); if (jaroScore < 0.7) // Jaro-Winkler only applies prefix scaling if Jaro > 0.7 return jaroScore; // Calculate common prefix (up to 4 characters) var prefix = 0; var minLength = Math.Min(source.Length, Math.Min(target.Length, 4)); for (int i = 0; i < minLength; i++) { if (source[i] == target[i]) prefix++; else break; } return jaroScore + (0.1 * prefix * (1 - jaroScore)); } /// /// Calculates Jaro similarity /// private double CalculateJaro(string source, string target) { if (source.Length == 0 && target.Length == 0) return 1.0; if (source.Length == 0 || target.Length == 0) return 0.0; var matchWindow = Math.Max(source.Length, target.Length) / 2 - 1; if (matchWindow < 0) matchWindow = 0; var sourceMatches = new bool[source.Length]; var targetMatches = new bool[target.Length]; var matches = 0; var transpositions = 0; // Find matches for (int i = 0; i < source.Length; i++) { var start = Math.Max(0, i - matchWindow); var end = Math.Min(i + matchWindow + 1, target.Length); for (int j = start; j < end; j++) { if (targetMatches[j] || source[i] != target[j]) continue; sourceMatches[i] = true; targetMatches[j] = true; matches++; break; } } if (matches == 0) return 0.0; // Count transpositions var k = 0; for (int i = 0; i < source.Length; i++) { if (!sourceMatches[i]) continue; while (!targetMatches[k]) k++; if (source[i] != target[k]) transpositions++; k++; } return (matches / (double)source.Length + matches / (double)target.Length + (matches - transpositions / 2.0) / matches) / 3.0; } /// /// Calculates token set ratio for better handling of word order differences /// private double CalculateTokenSetRatio(string source, string target) { var sourceTokens = new HashSet(source.Split(' ', StringSplitOptions.RemoveEmptyEntries)); var targetTokens = new HashSet(target.Split(' ', StringSplitOptions.RemoveEmptyEntries)); if (sourceTokens.Count == 0 && targetTokens.Count == 0) return 1.0; if (sourceTokens.Count == 0 || targetTokens.Count == 0) return 0.0; var intersection = sourceTokens.Intersect(targetTokens).Count(); var union = sourceTokens.Union(targetTokens).Count(); return (double)intersection / union; } } /// /// Result of a fuzzy matching operation /// public class FuzzyMatchResult { public string Query { get; set; } = ""; public string Match { get; set; } = ""; public double Score { get; set; } public bool IsMatch { get; set; } } }