MarketAlly.AIPlugin.Extensions/MarketAlly.AIPlugin.Refacto.../Security/InputSanitizer.cs

431 lines
15 KiB
C#
Executable File

using System;
using System.Collections.Generic;
using System.Linq;
using System.Security;
using System.Text;
using System.Text.RegularExpressions;
namespace MarketAlly.AIPlugin.Refactoring.Security
{
public static class InputSanitizer
{
// Compiled regex patterns for performance
private static readonly Regex UnsafeCharacters =
new(@"[<>:""|?*\x00-\x1f]", RegexOptions.Compiled);
private static readonly Regex HtmlTags =
new(@"<[^>]*>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ScriptTags =
new(@"<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SqlInjectionPatterns =
new(@"(\b(select|insert|update|delete|drop|create|alter|exec|execute|sp_|xp_)\b)|(')|(--)|(\/\*)|(\*\/)",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex CommandInjectionPatterns =
new(@"[;&|`$(){}[\]\\]|(&&)|(\|\|)|(>>)|(<<)", RegexOptions.Compiled);
private static readonly Regex PathTraversalPatterns =
new(@"(\.\.[\\/])|(%2e%2e[\\/])|(%252e%252e[\\/])",
RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly string[] DangerousKeywords = new[]
{
"javascript:", "vbscript:", "onload", "onerror", "onclick", "onmouseover",
"eval", "expression", "url(", "import", "@import", "behavior:",
"binding:", "-moz-binding", "data:", "filesystem:", "ms-its:"
};
private static readonly string[] SqlKeywords = new[]
{
"union", "select", "insert", "update", "delete", "drop", "create",
"alter", "exec", "execute", "sp_", "xp_", "waitfor", "cast",
"convert", "ascii", "char", "nchar", "varchar", "nvarchar"
};
/// <summary>
/// Sanitizes file names by removing or replacing dangerous characters
/// </summary>
/// <param name="fileName">The file name to sanitize</param>
/// <returns>A sanitized file name</returns>
public static string SanitizeFileName(string fileName)
{
if (string.IsNullOrWhiteSpace(fileName))
return "default_file";
var sanitized = fileName;
// Remove unsafe characters
sanitized = UnsafeCharacters.Replace(sanitized, "_");
// Remove path separators
sanitized = sanitized.Replace('/', '_').Replace('\\', '_');
// Remove leading/trailing dots and spaces
sanitized = sanitized.Trim('.', ' ');
// Handle empty result
if (string.IsNullOrWhiteSpace(sanitized))
return "sanitized_file";
// Limit length
if (sanitized.Length > 200)
{
var extension = System.IO.Path.GetExtension(sanitized);
var nameWithoutExt = System.IO.Path.GetFileNameWithoutExtension(sanitized);
sanitized = nameWithoutExt.Substring(0, 200 - extension.Length) + extension;
}
return sanitized;
}
/// <summary>
/// Sanitizes user input to prevent XSS attacks
/// </summary>
/// <param name="input">The input string to sanitize</param>
/// <param name="allowHtml">Whether to allow safe HTML tags</param>
/// <returns>Sanitized string</returns>
public static string SanitizeForWeb(string input, bool allowHtml = false)
{
if (string.IsNullOrEmpty(input))
return string.Empty;
var sanitized = input;
// Remove script tags first
sanitized = ScriptTags.Replace(sanitized, string.Empty);
// Check for dangerous keywords
foreach (var keyword in DangerousKeywords)
{
if (sanitized.Contains(keyword, StringComparison.OrdinalIgnoreCase))
{
sanitized = sanitized.Replace(keyword, "[REMOVED]", StringComparison.OrdinalIgnoreCase);
}
}
if (!allowHtml)
{
// Remove all HTML tags
sanitized = HtmlTags.Replace(sanitized, string.Empty);
// Encode remaining special characters
sanitized = System.Net.WebUtility.HtmlEncode(sanitized);
}
else
{
// Only allow safe HTML tags (whitelist approach would be better)
var safeTags = new[] { "b", "i", "u", "em", "strong", "p", "br", "ul", "ol", "li" };
// Implementation for safe HTML would go here
}
return sanitized;
}
/// <summary>
/// Sanitizes input to prevent SQL injection attacks
/// </summary>
/// <param name="input">The input string to sanitize</param>
/// <returns>Sanitized string safe for use in SQL contexts</returns>
public static string SanitizeForSql(string input)
{
if (string.IsNullOrEmpty(input))
return string.Empty;
var sanitized = input;
// Remove SQL injection patterns
if (SqlInjectionPatterns.IsMatch(sanitized))
{
throw new SecurityException("Input contains potential SQL injection patterns");
}
// Check for SQL keywords
foreach (var keyword in SqlKeywords)
{
if (sanitized.Contains(keyword, StringComparison.OrdinalIgnoreCase))
{
throw new SecurityException($"Input contains restricted SQL keyword: {keyword}");
}
}
// Escape single quotes by doubling them
sanitized = sanitized.Replace("'", "''");
return sanitized;
}
/// <summary>
/// Sanitizes input to prevent command injection attacks
/// </summary>
/// <param name="input">The input string to sanitize</param>
/// <returns>Sanitized string safe for use in command contexts</returns>
public static string SanitizeForCommand(string input)
{
if (string.IsNullOrEmpty(input))
return string.Empty;
var sanitized = input;
// Check for command injection patterns
if (CommandInjectionPatterns.IsMatch(sanitized))
{
throw new SecurityException("Input contains potential command injection patterns");
}
// Remove or escape dangerous characters
var dangerousChars = new char[] { ';', '&', '|', '`', '$', '(', ')', '{', '}', '[', ']', '\\' };
foreach (var ch in dangerousChars)
{
sanitized = sanitized.Replace(ch.ToString(), $"\\{ch}");
}
return sanitized;
}
/// <summary>
/// Sanitizes paths to prevent path traversal attacks
/// </summary>
/// <param name="path">The path string to sanitize</param>
/// <returns>Sanitized path</returns>
public static string SanitizePath(string path)
{
if (string.IsNullOrEmpty(path))
return string.Empty;
var sanitized = path;
// Check for path traversal patterns
if (PathTraversalPatterns.IsMatch(sanitized))
{
throw new SecurityException("Path contains potential traversal patterns");
}
// Remove dangerous path components
sanitized = sanitized.Replace("..", "_");
sanitized = sanitized.Replace("./", "_/");
sanitized = sanitized.Replace(".\\", "_\\");
// Normalize path separators
sanitized = sanitized.Replace('\\', '/');
return sanitized;
}
/// <summary>
/// Validates and sanitizes email addresses
/// </summary>
/// <param name="email">The email address to validate</param>
/// <returns>Sanitized email or empty string if invalid</returns>
public static string SanitizeEmail(string email)
{
if (string.IsNullOrWhiteSpace(email))
return string.Empty;
var trimmed = email.Trim();
// Basic email validation regex
var emailRegex = new Regex(@"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", RegexOptions.Compiled);
if (!emailRegex.IsMatch(trimmed))
return string.Empty;
// Additional sanitization
if (trimmed.Length > 254) // RFC 5321 limit
return string.Empty;
return trimmed.ToLowerInvariant();
}
/// <summary>
/// Sanitizes numeric input
/// </summary>
/// <param name="input">The input string containing numeric data</param>
/// <param name="allowDecimals">Whether to allow decimal points</param>
/// <param name="allowNegative">Whether to allow negative numbers</param>
/// <returns>Sanitized numeric string</returns>
public static string SanitizeNumeric(string input, bool allowDecimals = false, bool allowNegative = false)
{
if (string.IsNullOrEmpty(input))
return "0";
var pattern = @"[^0-9";
if (allowDecimals) pattern += ".";
if (allowNegative) pattern += "-";
pattern += "]";
var numericRegex = new Regex(pattern, RegexOptions.Compiled);
var sanitized = numericRegex.Replace(input, "");
// Ensure only one decimal point
if (allowDecimals)
{
var parts = sanitized.Split('.');
if (parts.Length > 2)
{
sanitized = parts[0] + "." + string.Join("", parts.Skip(1));
}
}
// Ensure only one negative sign at the beginning
if (allowNegative)
{
var negativeCount = sanitized.Count(c => c == '-');
if (negativeCount > 1)
{
sanitized = "-" + sanitized.Replace("-", "");
}
else if (negativeCount == 1 && !sanitized.StartsWith('-'))
{
sanitized = sanitized.Replace("-", "");
}
}
return string.IsNullOrEmpty(sanitized) ? "0" : sanitized;
}
/// <summary>
/// Removes or neutralizes potentially dangerous content from text
/// </summary>
/// <param name="content">The content to sanitize</param>
/// <param name="maxLength">Maximum allowed length</param>
/// <returns>Sanitized content</returns>
public static string SanitizeContent(string content, int maxLength = 10000)
{
if (string.IsNullOrEmpty(content))
return string.Empty;
var sanitized = content;
// Remove null bytes
sanitized = sanitized.Replace("\0", "");
// Remove control characters except common whitespace
var controlCharsRegex = new Regex(@"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", RegexOptions.Compiled);
sanitized = controlCharsRegex.Replace(sanitized, "");
// Normalize line endings
sanitized = sanitized.Replace("\r\n", "\n").Replace("\r", "\n");
// Limit length
if (sanitized.Length > maxLength)
{
sanitized = sanitized.Substring(0, maxLength);
}
return sanitized;
}
/// <summary>
/// Validates that a string contains only safe, printable characters
/// </summary>
/// <param name="input">The input to validate</param>
/// <returns>True if the input is safe</returns>
public static bool IsInputSafe(string input)
{
if (string.IsNullOrEmpty(input))
return true;
// Check for control characters (except common whitespace)
if (input.Any(c => char.IsControl(c) && c != '\t' && c != '\n' && c != '\r'))
return false;
// Check for dangerous patterns
if (SqlInjectionPatterns.IsMatch(input))
return false;
if (CommandInjectionPatterns.IsMatch(input))
return false;
if (PathTraversalPatterns.IsMatch(input))
return false;
// Check for dangerous keywords
foreach (var keyword in DangerousKeywords)
{
if (input.Contains(keyword, StringComparison.OrdinalIgnoreCase))
return false;
}
return true;
}
/// <summary>
/// Creates a safe identifier from user input (for variable names, etc.)
/// </summary>
/// <param name="input">The input string</param>
/// <param name="prefix">Optional prefix to ensure valid identifier</param>
/// <returns>A safe identifier string</returns>
public static string CreateSafeIdentifier(string input, string prefix = "item")
{
if (string.IsNullOrWhiteSpace(input))
return prefix;
var sanitized = input;
// Remove non-alphanumeric characters except underscore
var identifierRegex = new Regex(@"[^a-zA-Z0-9_]", RegexOptions.Compiled);
sanitized = identifierRegex.Replace(sanitized, "_");
// Ensure it starts with a letter or underscore
if (!char.IsLetter(sanitized[0]) && sanitized[0] != '_')
{
sanitized = prefix + "_" + sanitized;
}
// Remove consecutive underscores
sanitized = Regex.Replace(sanitized, @"_{2,}", "_");
// Trim underscores from end
sanitized = sanitized.TrimEnd('_');
// Ensure not empty
if (string.IsNullOrEmpty(sanitized))
return prefix;
// Limit length
if (sanitized.Length > 50)
sanitized = sanitized.Substring(0, 50).TrimEnd('_');
return sanitized;
}
}
/// <summary>
/// Extension methods for easy input sanitization
/// </summary>
public static class SanitizationExtensions
{
public static string SanitizeFileName(this string input)
{
return InputSanitizer.SanitizeFileName(input);
}
public static string SanitizeForWeb(this string input, bool allowHtml = false)
{
return InputSanitizer.SanitizeForWeb(input, allowHtml);
}
public static string SanitizePath(this string input)
{
return InputSanitizer.SanitizePath(input);
}
public static string SanitizeContent(this string input, int maxLength = 10000)
{
return InputSanitizer.SanitizeContent(input, maxLength);
}
public static bool IsInputSafe(this string input)
{
return InputSanitizer.IsInputSafe(input);
}
public static string ToSafeIdentifier(this string input, string prefix = "item")
{
return InputSanitizer.CreateSafeIdentifier(input, prefix);
}
}
}