# activity_categorizer.py
"""
Four Categories:
1) PRODUCTIVE  → All work-related activities (IDEs, Office, dev tools)
2) BROWSER     → YouTube, Gmail, social, entertainment, shopping, ALL MAIL
3) SERVER      → AWS, GCP, Azure, SSH, Docker, monitoring tools
4) NON-WORK    → Untitled, system lock / idle / AFK / screensaver
"""

import re
from typing import Dict, List, Tuple


class ActivityCategorizer:
    def __init__(self):

        # 🟥 NON-WORK
        self.non_work_keywords = [
            "untitled", "new tab", "blank", "empty",
            "lockapp.exe", "lockapp", "lock screen", "sessionlock",
            "windows default lock screen",
            "idle", "idle-time", "afk", "away",
            "not active", "userinactive", "no active window",
            "screensaver", "screen saver", "new incognito tab"
        ]

        # 🟧 BROWSER (non-productive)
        self.browser_keywords = [
            # Shopping
            "amazon.in", "amazon.com", "flipkart", "myntra", "ajio",
            "snapdeal", "meesho", "shopping", "add to cart", "buy online",
            # Email
            "inbox", "compose", "mail", "email",
            "@firsteconomy.com", "@gmail.com", "@yahoo.com", 
            "@outlook.com", "@hotmail.com",
            "first economy mail", "gmail", "yahoo mail", "outlook",
            "protonmail", "thunderbird", "webmail",
            # Entertainment
            "youtube", "youtu.be", "netflix", "amazon prime",
            "primevideo", "hotstar", "spotify", "twitch",
            # Social
            "facebook", "instagram", "snapchat", "tiktok",
            "pinterest", "reddit", "twitter", "x.com",
            "whatsapp", "telegram",
            # Search
            "google.com/search", "bing.com/search",
            "duckduckgo", "?q=", "&q=", "search?q=",
            # Extensions
            "awesome screenshot", "screenshot"
        ]

        # 🟦 SERVER
        self.server_keywords = [
            "aws", "ec2", "s3", "lambda", "iam", "cloudwatch",
            "azure", "microsoft azure",
            "gcp", "google cloud", "firebase",
            "digitalocean", "droplet", "linode", "vultr",
            "vercel", "netlify", "cloudflare", "godaddy", "namecheap",
            "jenkins", "github actions", "gitlab ci", "circleci",
            "docker", "kubernetes", "k8s", "pods", "cluster",
            "grafana", "prometheus", "datadog", "new relic", "sentry",
            "ssh", "rdp", "vnc", "teamviewer", "anydesk", "openvpn"
        ]

        # 🟩 PRODUCTIVE
        self.productive_keywords = [
            # Microsoft Office
            "winword", "excel", "powerpnt", "onenote", "msword",
            "microsoft word", "microsoft excel", "microsoft powerpoint",
            # Windows Tools
            "snipping tool", "snippingtool", "notepad", "calculator",
            # IDEs
            "visual studio", "vs code", "vscode", "cursor",
            "intellij", "pycharm", "phpstorm", "webstorm",
            # Code files
            ".py", ".js", ".ts", ".php", ".jsx", ".tsx",
            ".html", ".css", ".json", ".sql",
            # Dev servers
            "localhost", "127.0.0.1",
            ":3000", ":8000", ":5000", ":4200",
            # Dev tools
            "postman", "insomnia",
            "github.com", "gitlab.com", "bitbucket", "stack overflow",
            # AI tools
            "chatgpt", "claude", "bard", "perplexity", "phind",
            # PM tools
            "jira", "notion", "trello", "asana", "confluence", "clickup",
            # Design
            "figma", "adobe xd", "photoshop", "illustrator",
            # Server tools
            "cpanel", "phpmyadmin", "filezilla"
        ]

        # Known projects - ALWAYS productive
        self.known_projects = [
            "radiant_clone", "radiant clone", "timesheet", 
            "waaree", "firsteconomy"
        ]

    def categorize_activity(self, window_title: str, app_name: str = "") -> Tuple[str, float]:
        text = f"{window_title} {app_name}".lower()
        window_lower = window_title.lower().strip()
        
        # 🔥 1. EMAIL CHECK FIRST - if it contains email indicators, it's BROWSER
        email_indicators = ["inbox", "@gmail", "@yahoo", "@outlook", "@hotmail",
                           "@firsteconomy", "first economy mail", "- mail", "webmail"]
        if any(indicator in text for indicator in email_indicators):
            return ("browser", 1.0)
        
        # 🔥 2. Known project names - ALWAYS productive
        if any(project in text for project in self.known_projects):
            return ("productive", 1.0)
        
        # 🔥 3. Generic/unclear titles -> BROWSER (check BEFORE IDE)
        generic_titles = ["welcome", "home", "start", "open", "loading", "page", "open folder"]
        if window_lower in generic_titles:
            return ("browser", 1.0)
        
        # 🔥 4. Microsoft Office & Windows Tools -> PRODUCTIVE
        office_tools = ["winword", "excel", "powerpnt", "onenote", "msword",
                        "snipping tool", "snippingtool", "notepad.exe", "calculator"]
        if any(tool in text for tool in office_tools):
            return ("productive", 1.0)
        
        # 🔥 5. IDEs -> PRODUCTIVE
        ide_indicators = ["vscode", "code.exe", "cursor", "pycharm", "intellij", 
                          "webstorm", "phpstorm", "sublime", "atom", "vim", 
                          "emacs", "notepad++", "visual studio", "claude code",
                          "android studio", "xcode", "rider"]
        if any(ide in text for ide in ide_indicators):
            return ("productive", 1.0)
        
        # 🔥 4. Code file extensions -> PRODUCTIVE
        code_extensions = [".py", ".js", ".jsx", ".ts", ".tsx", ".php", ".java", 
                          ".cpp", ".c", ".h", ".cs", ".rb", ".go", ".rs", ".vue",
                          ".html", ".css", ".scss", ".sass", ".json", ".xml", ".sql",
                          ".swift", ".kt", ".dart", ".sh", ".yaml", ".yml"]
        if any(ext in text for ext in code_extensions):
            return ("productive", 1.0)

        # 5. Untitled/blank -> NON-WORK
        if window_lower in ["untitled", "new tab", "blank", ""]:
            return ("non-work", 1.0)

        # 6. NON-WORK keywords
        for word in self.non_work_keywords:
            if word in text:
                return ("non-work", 1.0)

        # 7. Shopping sites -> BROWSER
        shopping_sites = ["amazon.in", "amazon.com", "flipkart", "myntra", 
                         "ajio", "meesho", "snapdeal", "titan"]
        if any(shop in text for shop in shopping_sites):
            return ("browser", 1.0)

        # 8. File Explorer -> NON-WORK
        if "file explorer" in text:
            return ("non-work", 1.0)

        # 10. Email indicators -> BROWSER (but not if file path)
        is_file_path = any(p in text for p in ["\\", ":/", "src/", "src\\", "/components", "\\components"])
        if not is_file_path:
            if "@" in text or "inbox" in text or " mail " in text:
                return ("browser", 1.0)
            for word in self.browser_keywords:
                if word in text:
                    return ("browser", 0.95)

        # 11. SERVER keywords
        for word in self.server_keywords:
            if word in text:
                return ("server", 0.95)

        # 12. Default -> PRODUCTIVE
        return ("productive", 0.90)

    def get_detailed_category(self, window_title: str, app_name: str = "") -> Dict:
        category, confidence = self.categorize_activity(window_title, app_name)
        text = f"{window_title} {app_name}".lower()

        if category == "non-work":
            if "untitled" in text:
                sub = "untitled"
            elif "lock" in text:
                sub = "system-lock"
            elif "idle" in text or "afk" in text:
                sub = "idle"
            else:
                sub = "non-work"
        elif category == "browser":
            if "@" in text or "inbox" in text or "mail" in text:
                sub = "email"
            elif "youtube" in text:
                sub = "entertainment"
            elif "amazon" in text or "flipkart" in text:
                sub = "shopping"
            elif "?q=" in text or "search" in text:
                sub = "search"
            else:
                sub = "general-browsing"
        elif category == "server":
            if "aws" in text:
                sub = "aws"
            elif "azure" in text:
                sub = "azure"
            elif "gcp" in text or "firebase" in text:
                sub = "gcp"
            else:
                sub = "server-tools"
        else:  # PRODUCTIVE
            if "vscode" in text or "code.exe" in text or "cursor" in text:
                sub = "coding"
            elif "winword" in text or "excel" in text:
                sub = "office"
            elif "snipping" in text:
                sub = "tools"
            elif "localhost" in text:
                sub = "dev-server"
            elif "postman" in text:
                sub = "api-testing"
            elif "figma" in text:
                sub = "design"
            else:
                sub = "productive-general"

        return {
            "category": category,
            "subcategory": sub,
            "confidence": confidence,
            "window_title": window_title,
            "app_name": app_name
        }

    def categorize_batch(self, activities: List[Dict]) -> List[Dict]:
        categorized = []
        for activity in activities:
            info = self.get_detailed_category(
                activity.get("window_title", ""),
                activity.get("application_name", "")
            )
            activity.update(info)
            categorized.append(activity)
        return categorized


if __name__ == "__main__":
    categorizer = ActivityCategorizer()
    
    test_cases = [
        ("Document1 - Microsoft Word", "WINWORD.EXE"),  # productive
        ("Snipping Tool Overlay", "SnippingTool.exe"),  # productive
        ("Online Shopping - Amazon.in", "chrome.exe"),  # browser
        ("Luxury Dealz - ankita@firsteconomy.com - First Economy Mail", "chrome.exe"),  # browser
        ("? [Claude Code] radiant_clone\\src\\Mail.jsx", "Code.exe"),  # productive
        ("Welcome", "chrome.exe"),  # browser
        ("YouTube - Google Chrome", "chrome.exe"),  # browser
    ]
    
    print("Testing categorization:")
    print("-" * 60)
    for title, app in test_cases:
        info = categorizer.get_detailed_category(title, app)
        print(f"Title: {title}")
        print(f"App: {app}")
        print(f"→ Category: {info['category']} ({info['subcategory']})")
        print("-" * 60)
