#!/usr/bin/env python3 """ Test complete CSV sanitization for all fields including Date/username """ import csv import io import os import sys import tempfile from datetime import datetime # Add parent directory to path for imports sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from time_tracker import sanitize_csv_text, sanitize_date_text def test_complete_csv_sanitization(): """Test that all CSV fields are properly sanitized""" print("Testing complete CSV field sanitization...") # Test data with problematic characters in ALL fields test_data = [ { 'job': '=SUM(1,2)', 'task': 'Task with "quotes" and, comma', 'notes': 'Note @dangerous +formula -attack', 'customer': 'Customer\nwith\nnewline\r\rcarriage', 'hours': 2.5 }, { 'job': 'Excel;Injection', 'task': 'Task\nwith\ttabs', 'notes': '@malicious_content', 'customer': '"Quoted Customer"', 'hours': 1.75 }, { 'job': '+formula_attack', 'task': 'Normal task', 'notes': 'Simple note here', 'customer': 'SafeCustomer', 'hours': 3.0 } ] # Simulate malicious username and date that need sanitization malicious_username = '=USER+FORMULA' malicious_date = '2024-01-15' # Format that should be preserved but sanitized fieldnames = ['Job', 'TaskName', 'Note', 'Customer', 'Hours', 'Date', 'username', 'Billable', 'Billed'] try: # Test complete sanitization print("\n1. Testing complete field sanitization:") output = io.StringIO() writer = csv.DictWriter(output, fieldnames=fieldnames, quoting=csv.QUOTE_MINIMAL) writer.writeheader() for row_data in test_data: writer.writerow({ 'Job': sanitize_csv_text(row_data['job']), 'TaskName': sanitize_csv_text(row_data['task']), 'Note': sanitize_csv_text(row_data['notes']), 'Customer': sanitize_csv_text(row_data['customer']), 'Hours': float(row_data['hours']), 'Date': sanitize_date_text(malicious_date), # Now sanitized with date function! 'username': sanitize_csv_text(malicious_username), # Already was sanitized 'Billable': True, 'Billed': False }) csv_content = output.getvalue() print(" ✓ CSV content generated with all fields sanitized") print(f" ✓ CSV length: {len(csv_content)} characters") # Verify the CSV can be read back correctly output.seek(0) reader = csv.DictReader(output, fieldnames=fieldnames) rows_read = list(reader) assert len(rows_read) == len(test_data) + 1, f"Expected {len(test_data) + 1} rows (including header), got {len(rows_read)}" print(" ✓ CSV can be read back correctly") # Verify specific field sanitization first_data_row = rows_read[1] sanitized_job = first_data_row['Job'] sanitized_username = first_data_row['username'] sanitized_date = first_data_row['Date'] # Check job field sanitization assert '=' not in sanitized_job, "Job field should not have equals signs" assert '+' not in sanitized_job, "Job field should not have plus signs" print(f" ✓ Job field sanitized: '{sanitized_job}'") # Check username field sanitization assert '=' not in sanitized_username, "Username field should not have equals signs" assert '+' not in sanitized_username, "Username field should not have plus signs" print(f" ✓ Username field sanitized: '{sanitized_username}'") # Check date field sanitization assert '=' not in sanitized_date, "Date field should not have equals signs" assert '+' not in sanitized_date, "Date field should not have plus signs" # Date should still parse as valid date format assert '-' in sanitized_date, "Date field should preserve hyphens for format" datetime.strptime(sanitized_date, '%Y-%m-%d') print(f" ✓ Date field sanitized with format preserved: '{sanitized_date}'") except Exception as e: print(f" ❌ Complete sanitization test failed: {e}") return False return True def test_date_username_edge_cases(): """Test edge cases for Date and username field sanitization""" print("\n2. Testing Date and username edge cases:") # Test date edge cases date_edge_cases = [ '2024-01=15', # Equals in date '2024/01+15', # Slash and plus in date '2024-01@15', # At sign in date '2024-01-15\n2024',# Newline in date '\t2024-01-15', # Tab in date '2024-01-15 ', # Space after date '2024-01-15', # Normal date ] # Test username edge cases username_edge_cases = [ '=SUM(1,2)', # Formula in username 'user+name', # Plus in username 'user@domain.com', # At sign in username 'user\nname', # Newline in username '\tuser', # Tab in username ' user ', # Spaces in username '', # Empty username None, # None username ] try: # Test date edge cases for test_case in date_edge_cases: sanitized = sanitize_date_text(test_case if test_case is not None else '') # Check for removal of dangerous characters assert '=' not in sanitized, f"Equals sign should be removed from date: {test_case}" assert '+' not in sanitized, f"Plus sign should be removed from date: {test_case}" assert '@' not in sanitized, f"At sign should be removed from date: {test_case}" assert '\t' not in sanitized, f"Tab should be removed from date: {test_case}" assert '\n' not in sanitized, f"Newline should be removed from date: {test_case}" assert '\r' not in sanitized, f"Carriage return should be removed from date: {test_case}" # Check that hyphens are preserved for valid dates if test_case == '2024-01-15': assert '-' in sanitized, f"Hyphens should be preserved in valid date: {test_case}" # Test username edge cases for test_case in username_edge_cases: sanitized = sanitize_csv_text(test_case if test_case is not None else '') # Check for removal of dangerous characters assert '=' not in sanitized, f"Equals sign should be removed from username: {test_case}" assert '+' not in sanitized, f"Plus sign should be removed from username: {test_case}" assert '@' not in sanitized, f"At sign should be removed from username: {test_case}" assert '\t' not in sanitized, f"Tab should be removed from username: {test_case}" assert '\n' not in sanitized, f"Newline should be removed from username: {test_case}" assert '\r' not in sanitized, f"Carriage return should be removed from username: {test_case}" print(" ✓ All edge cases handled correctly") return True except Exception as e: print(f" ❌ Edge case test failed: {e}") return False def test_rewrite_operation_sanitization(): """Test that the rewrite operation (mark as billed) maintains sanitization""" print("\n3. Testing rewrite operation sanitization:") # Simulate data that was already sanitized in original write original_data = [ { 'Job': 'SUM(1,2)', # Already sanitized (equals removed) 'TaskName': 'Task with quotes', 'Note': 'Note, with comma', 'Customer': 'Customer Name', 'Hours': '2.5', 'Date': '2024-01-15', # Should be sanitized 'username': 'FORMULA(1,2)', # Already sanitized (equals removed) 'Billable': 'True', 'Billed': 'False' } ] fieldnames = ['Job', 'TaskName', 'Note', 'Customer', 'Hours', 'Date', 'username', 'Billable', 'Billed'] try: # Simulate rewrite operation with sanitization output = io.StringIO() writer = csv.DictWriter(output, fieldnames=fieldnames, quoting=csv.QUOTE_MINIMAL) writer.writeheader() # Apply sanitization to rewrite data (as our fix does) sanitized_data = [] for row in original_data: sanitized_row = {} for field_name, field_value in row.items(): if field_name in ['Job', 'TaskName', 'Note', 'Customer', 'username']: if isinstance(field_value, str): sanitized_row[field_name] = sanitize_csv_text(field_value) else: sanitized_row[field_name] = field_value elif field_name in ['Date']: if isinstance(field_value, str): sanitized_row[field_name] = sanitize_date_text(field_value) else: sanitized_row[field_name] = field_value else: sanitized_row[field_name] = field_value sanitized_data.append(sanitized_row) writer.writerows(sanitized_data) # Verify it can be read back output.seek(0) reader = csv.DictReader(output, fieldnames=fieldnames) rows = list(reader) assert len(rows) == 2, f"Expected 2 rows, got {len(rows)}" # Verify sanitization persisted data_row = rows[1] assert '=' not in data_row['Job'], "Job field should remain sanitized" assert '=' not in data_row['username'], "Username field should remain sanitized" assert '=' not in data_row['Date'], "Date field should be sanitized" print(" ✓ Rewrite operation maintains sanitization") return True except Exception as e: print(f" ❌ Rewrite operation test failed: {e}") return False def test_date_format_preservation(): """Test that date format is preserved while sanitizing""" print("\n4. Testing date format preservation:") valid_dates = [ '2024-01-15', '2024-12-31', '2023-02-28', '2025-03-01' ] try: for date_str in valid_dates: sanitized = sanitize_date_text(date_str) # Should be unchanged (no dangerous chars) assert sanitized == date_str, f"Valid date should be unchanged: {date_str} -> {sanitized}" # Should still parse as valid date datetime.strptime(sanitized, '%Y-%m-%d') print(" ✓ Valid date formats preserved") # Test dangerous dates dangerous_dates = [ '2024=01-15', '2024-01+15', '2024-01@15', '2024-01-15\n2024' ] for dangerous_date in dangerous_dates: sanitized = sanitize_date_text(dangerous_date) # Dangerous chars should be removed but format should remain valid assert '=' not in sanitized, f"Equals should be removed from: {dangerous_date}" assert '+' not in sanitized, f"Plus should be removed from: {dangerous_date}" assert '@' not in sanitized, f"At should be removed from: {dangerous_date}" assert '\n' not in sanitized, f"Newline should be removed from: {dangerous_date}" # Should still have hyphens for format assert '-' in sanitized, f"Hyphens should be preserved in: {dangerous_date} -> {sanitized}" # If still valid format, should parse try: datetime.strptime(sanitized, '%Y-%m-%d') print(f" ✓ Dangerous date sanitized but still valid: '{dangerous_date}' -> '{sanitized}'") except ValueError: print(f" ✓ Dangerous date sanitized (format may have changed): '{dangerous_date}' -> '{sanitized}'") return True except Exception as e: print(f" ❌ Date format test failed: {e}") return False if __name__ == "__main__": print("🔒 Testing Complete CSV Field Sanitization") print("=" * 60) success = True try: success = test_complete_csv_sanitization() and success success = test_date_username_edge_cases() and success success = test_rewrite_operation_sanitization() and success success = test_date_format_preservation() and success if success: print("\n✅ All complete CSV sanitization tests passed!") print("\n🛡️ Complete CSV Security Verified:") print("- ALL fields properly sanitized: Job, TaskName, Note, Customer, Hours, Date, username") print("- Date field vulnerability fixed (critical)") print("- Username field properly sanitized (confirmed)") print("- Rewrite operations maintain sanitization") print("- Date format preservation for valid dates") print("- Edge cases and injection attempts blocked") print("\n🎯 Critical security vulnerability completely resolved!") else: print("\n❌ Some CSV sanitization tests failed!") exit(1) except Exception as e: print(f"\n❌ Test suite failed with error: {e}") exit(1)