"""
Safe cleanup script with backup functionality
This will create a backup before cleaning up debug/test files
"""

import os
import shutil
from pathlib import Path
from datetime import datetime
import zipfile

# Define the base directory
BASE_DIR = Path("E:/timesheet/timesheet_new")
BACKUP_DIR = BASE_DIR / "backup"

# Core files to definitely keep
ESSENTIAL_FILES = {
    # Root essentials
    ".gitignore",
    ".env.example", 
    "requirements.txt",
    "README.md",  # Keep main README if it exists
    
    # Backend core files
    "backend/.env",
    "backend/.env.local",
    "backend/.env.production",
    "backend/main.py",
    "backend/database.py",
    "backend/models.py",
    "backend/schemas.py",
    "backend/config.py",
    "backend/auth.py",
    "backend/crud.py",
    "backend/activity_categorizer.py",
    "backend/activity_categorization_api.py",
    "backend/activitywatch_sync.py",
    "backend/activitywatch_webhook.py",
    "backend/dashboard_api.py",
    "backend/productivity_calculator.py",
    "backend/daily_hours_calculator.py",
    "backend/realistic_hours_calculator.py",
    "backend/__init__.py",
    
    # Frontend essentials
    "frontend/package.json",
    "frontend/package-lock.json",
    "frontend/.env",
    "frontend/.env.example",
    "frontend/.env.local",
    "frontend/.env.production",
}

# Directories that are definitely safe to delete
CLEANUP_DIRS = [
    "__to_delete__",
    "backend/debug",
    "backend/__pycache__",
    "backend/scripts",  # Unless you have important scripts here
    "backend/sync_scripts",
]

def create_backup():
    """Create a backup of files before deletion"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_name = f"cleanup_backup_{timestamp}.zip"
    backup_path = BACKUP_DIR / backup_name
    
    # Create backup directory
    BACKUP_DIR.mkdir(exist_ok=True)
    
    print(f"\n📦 Creating backup: {backup_name}")
    
    files_backed_up = 0
    with zipfile.ZipFile(backup_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Backup all test/debug files before deletion
        for pattern in ["*test*", "*debug*", "*fix*", "*check*", "*quick*", "*.bat", "*.sql"]:
            for filepath in BASE_DIR.rglob(pattern):
                if filepath.is_file() and '.git' not in str(filepath):
                    arcname = filepath.relative_to(BASE_DIR)
                    try:
                        zipf.write(filepath, arcname)
                        files_backed_up += 1
                    except Exception as e:
                        print(f"  ⚠️  Could not backup {filepath}: {e}")
    
    print(f"  ✅ Backed up {files_backed_up} files to {backup_path}")
    return backup_path

def get_files_to_delete():
    """Get list of files to delete based on patterns"""
    files_to_delete = []
    
    # Define delete patterns
    patterns = [
        # Test files
        "*test*.py", "*TEST*.bat", "*test*.sql",
        
        # Debug files  
        "*debug*.py", "*DEBUG*.bat", "*diagnose*",
        
        # Fix files
        "*fix*.py", "*FIX*.bat", "*fix*.sql", "*fix*.sh",
        
        # Check files
        "*check*.py", "*CHECK*.bat",
        
        # Quick/temporary files
        "*quick*.py", "*QUICK*.bat", "*temporary*.py",
        
        # Simple/sample files
        "*simple*.py", "*sample*.py",
        
        # Most batch files (keeping essential ones)
        "*.bat",
        
        # SQL files (usually for fixes/tests)
        "*.sql",
        
        # Shell scripts (usually for fixes)
        "*.sh",
        
        # VBS scripts
        "*.vbs",
        
        # Temporary documentation
        "DO-*.txt", "DO_*.md", "WHERE_*.md", 
        "*EMERGENCY*.md", "*ACTION*.txt", "*SOLUTION*",
        "*GUIDE*.txt", "*STATUS*.bat",
        
        # HTML files in backend (test pages)
        "backend/*.html",
        
        # Old/backup files
        "*_old.py", "*.backup",
    ]
    
    # Find files matching patterns
    for pattern in patterns:
        for filepath in BASE_DIR.rglob(pattern):
            if filepath.is_file():
                # Skip if in .git, node_modules, or venv
                if any(skip in str(filepath) for skip in ['.git', 'node_modules', 'venv']):
                    continue
                
                # Skip if it's an essential file
                relative_path = str(filepath.relative_to(BASE_DIR)).replace('\\', '/')
                if relative_path not in ESSENTIAL_FILES:
                    files_to_delete.append(filepath)
    
    # Remove duplicates and sort
    files_to_delete = sorted(set(files_to_delete))
    
    return files_to_delete

def cleanup_project():
    """Main cleanup function"""
    print("🧹 TIMESHEET PROJECT SAFE CLEANUP")
    print("=" * 80)
    
    # Get files to delete
    files_to_delete = get_files_to_delete()
    dirs_to_delete = [BASE_DIR / d for d in CLEANUP_DIRS if (BASE_DIR / d).exists()]
    
    if not files_to_delete and not dirs_to_delete:
        print("✅ No debug/test files found. Project is already clean!")
        return
    
    # Show what will be deleted
    print(f"\n📊 CLEANUP SUMMARY:")
    print(f"  • Files to delete: {len(files_to_delete)}")
    print(f"  • Directories to delete: {len(dirs_to_delete)}")
    
    # Calculate size
    total_size = 0
    for filepath in files_to_delete:
        total_size += filepath.stat().st_size
    
    print(f"  • Space to be freed: {total_size / (1024 * 1024):.2f} MB")
    
    # Show some examples
    print(f"\n📄 Example files to be deleted:")
    for filepath in files_to_delete[:10]:
        print(f"  • {filepath.relative_to(BASE_DIR)}")
    if len(files_to_delete) > 10:
        print(f"  ... and {len(files_to_delete) - 10} more files")
    
    # Confirm
    print("\n" + "="*80)
    confirm = input("Do you want to proceed? A backup will be created first. (yes/no): ")
    
    if confirm.lower() != 'yes':
        print("❌ Cleanup cancelled.")
        return
    
    # Create backup
    backup_path = create_backup()
    
    # Delete files
    print(f"\n🗑️  Deleting files...")
    deleted_count = 0
    error_count = 0
    
    for filepath in files_to_delete:
        try:
            filepath.unlink()
            deleted_count += 1
        except Exception as e:
            error_count += 1
            print(f"  ❌ Error deleting {filepath.name}: {e}")
    
    # Delete directories
    for dir_path in dirs_to_delete:
        try:
            shutil.rmtree(dir_path)
            print(f"  ✅ Deleted directory: {dir_path.relative_to(BASE_DIR)}")
        except Exception as e:
            print(f"  ❌ Error deleting directory {dir_path}: {e}")
    
    print(f"\n✅ CLEANUP COMPLETED!")
    print(f"  • Deleted {deleted_count} files")
    if error_count > 0:
        print(f"  • Failed to delete {error_count} files") 
    print(f"  • Backup saved at: {backup_path}")
    
    # Show remaining structure
    print(f"\n📁 CLEAN PROJECT STRUCTURE:")
    print("  Root/")
    print("    ├── backend/        (core backend files)")
    print("    ├── frontend/       (React application)")
    print("    ├── backup/         (cleanup backups)")
    print("    ├── .gitignore")
    print("    ├── requirements.txt")
    print("    └── .env.example")

def list_essential_structure():
    """Show the essential files that will remain"""
    print("\n✅ ESSENTIAL FILES THAT WILL BE KEPT:")
    print("=" * 80)
    
    print("\nBackend Core Files:")
    for file in ["main.py", "database.py", "models.py", "schemas.py", "config.py",
                 "activity_categorizer.py", "activity_categorization_api.py",
                 "dashboard_api.py", "activitywatch_sync.py"]:
        filepath = BASE_DIR / "backend" / file
        if filepath.exists():
            size = filepath.stat().st_size / 1024  # KB
            print(f"  • {file:<35} ({size:>6.1f} KB)")
    
    print("\nFrontend Structure:")
    print("  • src/              (React source code)")
    print("  • public/           (Static assets)")  
    print("  • package.json      (Dependencies)")
    
    print("\nConfiguration Files:")
    for env_file in [".env", ".env.example", ".env.local", ".env.production"]:
        for location in ["", "backend", "frontend"]:
            filepath = BASE_DIR / location / env_file if location else BASE_DIR / env_file
            if filepath.exists():
                print(f"  • {location}/{env_file if location else env_file}")

if __name__ == "__main__":
    try:
        # Show current structure first
        list_essential_structure()
        
        print("\n" + "="*80)
        
        # Run cleanup
        cleanup_project()
        
    except KeyboardInterrupt:
        print("\n\n❌ Cleanup interrupted by user.")
    except Exception as e:
        print(f"\n❌ An error occurred: {e}")
        import traceback
        traceback.print_exc()