# Quick Fix: Add this function to your existing activity_categorization_api.py

def group_activities_by_project(activities):
    """Group activities by project name and sum their durations"""
    project_groups = {}
    
    for activity in activities:
        # Extract project name from window title if not set
        project_name = activity.get("project_name", "")
        window_title = activity.get("window_title", "")
        
        # If project_name is empty or "general", try to extract from window title
        if not project_name or project_name.lower() in ["general", "unknown", ""]:
            if " - " in window_title:
                parts = window_title.split(" - ")
                if len(parts) >= 2:
                    # Get the second part (usually project name)
                    project_name = parts[1].strip()
                    # Remove application name if present
                    app_names = ["Visual Studio Code", "VS Code", "Cursor", "Notepad++", "Sublime Text"]
                    for app in app_names:
                        if project_name.endswith(app):
                            project_name = project_name.replace(app, "").strip()
        
        # Default to "No Project" if still empty
        if not project_name or project_name.lower() in ["general", "unknown", ""]:
            project_name = "No Project"
        
        # Initialize project group if not exists
        if project_name not in project_groups:
            project_groups[project_name] = {
                "window_title": f"{project_name}",
                "application_name": activity.get("application_name", ""),
                "duration": 0,
                "duration_hours": 0,
                "file_count": 0,
                "files": set(),
                "subcategory": "development",
                "confidence": 0.9
            }
        
        # Accumulate duration
        project_groups[project_name]["duration"] += activity["duration"]
        
        # Track unique files
        if " - " in window_title:
            file_part = window_title.split(" - ")[0].strip()
            if "." in file_part:  # Looks like a filename
                project_groups[project_name]["files"].add(file_part)
    
    # Convert to list
    result = []
    for project_name, group in project_groups.items():
        group["file_count"] = len(group["files"])
        group["duration_hours"] = round(group["duration"] / 3600, 2)
        
        # Update window title to show file count
        if group["file_count"] > 0:
            group["window_title"] = f"{project_name} ({group['file_count']} files)"
        
        # Remove set for JSON serialization
        del group["files"]
        
        result.append(group)
    
    # Sort by duration
    return sorted(result, key=lambda x: x["duration"], reverse=True)

# Then modify this part of your code (around line 141):
# Find this section:
#         top_activities_by_category[category] = [
#             {
#                 "window_title": act["window_title"],
#                 ...
#             }
#             for act in sorted_activities
#         ]

# Replace with:
        if category == "productive":
            # Group productive activities by project
            grouped_activities = group_activities_by_project(activities)
            top_activities_by_category[category] = grouped_activities[:10]
        else:
            # Keep original behavior for other categories
            top_activities_by_category[category] = [
                {
                    "window_title": act["window_title"],
                    "application_name": act["application_name"],
                    "duration": act["duration"],
                    "duration_hours": round(act["duration"] / 3600, 2),
                    "subcategory": act["subcategory"],
                    "confidence": act["confidence"]
                }
                for act in sorted_activities
            ]
