from flask import Flask, render_template, Response
from sonatoki.ilo import Ilo
from sonatoki.Configs import PrefConfig, CorpusConfig
from atproto import FirehoseSubscribeReposClient, parse_subscribe_repos_message
from atproto import CAR, models
import json
import re
import emoji
import queue
import threading
from werkzeug.serving import run_simple
from threading import Lock
# STL
from typing import List, Type, TypedDict

# PDM
from typing_extensions import NotRequired

# LOCAL
from sonatoki.types import Number
from sonatoki.Filters import (
    Or,
    And,
    Len,
    Not,
    Filter,
    PuName,
    Numeric,
    Syllabic,
    NimiUCSUR,
    Alphabetic,
    NimiKuLili,
    NimiKuSuli,
    ProperName,
    Punctuation,
    LongSyllabic,
    Miscellaneous,
    LongAlphabetic,
    LongProperName,
    FalsePosSyllabic,
    NimiLinkuByUsage,
    NimiLinkuObscure,
    NimiLinkuSandbox,
    NimiLinkuUncommon,
    FalsePosAlphabetic,
)
from sonatoki.Scorers import Scorer, Soften, Voting, PassFail, SoftScaling, SoftPassFail
from sonatoki.Cleaners import Cleaner, ConsecutiveDuplicates
from sonatoki.Tokenizers import Tokenizer, WordTokenizerRe
from sonatoki.Preprocessors import (
    RECOMMENDED_PREPROCESSORS,
    URLs,
    Emoji,
    Codeblock,
    Reference,
    Preprocessor,
    AngleBracketObject,
    Emails
)

__DICT_PHONOMATCHES = {
    # Sandbox words are removed from the CorpusConfig if they appear more frequently in English than Toki Pona by a factor of at least 3.
    # In this case, all of these appear more often in English by a factor of at least 10.
    "aka",  # also known as
    "an",  # article
    "api",  # API
    "i",  # 1st person
    "je",  # 1st person pronoun, french
    "kana",  # japanese script
    "me",  # 1st person singular, english
    "ne",  # "no" in several languages
    "nu",  # "new" in english, "now" in dutch
    "omen",  # ominous
    "se",  # spanish particle, english "see"
    "sole",  # singular, of shoe
    "take",  # acquire, perhaps forcefully or without permission
    "ten",  # 10
    "to",  # to, too
    "u",  # no u
    "we",  # 1st person plural, english
    "wi",  # wii and discussions of syllables
    # unexplored candidates for removal
    # "papa",  # father
    # "lo",  # "lo" and "loo"
    # "ewe",  # sheep
    # "pa",  # father- eh?
}


app = Flask(__name__)

ilo = Ilo(**{
    "preprocessors": [
        URLs,
        Emails,
        Emoji,
    ],
    "cleaners": [ConsecutiveDuplicates],
    "ignoring_filters": [Numeric, Punctuation],
    "scoring_filters": [
        Len(Or(NimiLinkuByUsage(30), NimiUCSUR), max=15),
        Len(And(Syllabic, Not(FalsePosSyllabic)), min=3, max=24),
        # NOTE: These are allowed to pass name and alphabetic below, because they *could* be wrong
        Len(ProperName, min=2, max=24),
        Len(And(Alphabetic, Not(FalsePosAlphabetic)), min=3, max=24),
    ],
    "scorer": SoftScaling,
    "passing_score": 0.8,
})

class JSONExtra(json.JSONEncoder):
    def default(self, obj):
        try:
            return json.JSONEncoder.default(self, obj)
        except:
            return repr(obj)

def clean_text(text: str) -> str:
    text = emoji.replace_emoji(text, replace='')
    text = re.sub(r'https?://\S+', '', text)
    #text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.strip()
    return text

clients = []
clients_lock = Lock()

def broadcast_message(msg: dict):
    # Send the given message to all connected SSE clients
    with clients_lock:
        for q in clients:
            q.put(msg)

def process_firehose():
    client = FirehoseSubscribeReposClient()

    def on_message_handler(message):
        commit = parse_subscribe_repos_message(message)
        if not isinstance(commit, models.ComAtprotoSyncSubscribeRepos.Commit):
            return
        car = CAR.from_bytes(commit.blocks)
        for op in commit.ops:
            if op.action == "create" and op.cid:
                raw = car.blocks.get(op.cid)
                cooked = models.get_or_create(raw, strict=False)
                if not cooked:
                    continue

                if cooked.py_type == "app.bsky.feed.post":
                    #if ilo.is_toki_pona(raw.get("text", "")):
                    #    print(raw.get("text", ""))
                          
                    cleaned_text = clean_text(raw.get("text", ""))
                    
                    if not cleaned_text or len(cleaned_text.split()) < 3:
                        continue

                    msg = ilo.preprocess(cleaned_text)
                    scorecard = ilo._is_toki_pona(msg)
                    result = scorecard["cleaned"] and scorecard["score"] >= 0.8

                    if not result:
                        continue

                    url = f'https://bsky.app/profile/{commit.repo}/post/{op.path.split("/")[1]}'
                    broadcast_message({'text': raw.get("text", ""), 'url': url})

    client.start(on_message_handler)

def generate_sse():
    # Each client gets its own queue.
    q = queue.Queue()
    with clients_lock:
        clients.append(q)
    try:
        while True:
            message = q.get()  # Blocking until a new message is broadcast
            yield f"data: {json.dumps(message)}\n\n"
    finally:
        with clients_lock:
            clients.remove(q)

@app.route('/')
def index():
    return """<!DOCTYPE html>
<html>
<head>
    <title>Toki Pona Live Stream</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .message {
            background: white;
            padding: 15px;
            margin: 10px 0;
            border-radius: 5px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        a {
            color: #0066cc;
            text-decoration: none;
        }
        h1 {
            text-align: center;
        }
    </style>
</head>
<body>
    <h1>Toki Pona Live Stream</h1>
    <div id="messages"></div>

    <script>
        const evtSource = new EventSource("/stream");
        const messages = document.getElementById('messages');
        
        evtSource.onmessage = function(event) {
            const data = JSON.parse(event.data);
            const messageDiv = document.createElement('div');
            messageDiv.className = 'message';
            messageDiv.innerHTML = `
                <p>${data.text}</p>
                <a href="${data.url}" target="_blank">View on Bluesky</a>
            `;
            messages.insertBefore(messageDiv, messages.firstChild);
            
            if (messages.children.length > 50) {
                messages.removeChild(messages.lastChild);
            }
        };
    </script>
</body>
</html>"""

@app.route('/stream')
def stream():
    return Response(generate_sse(), mimetype='text/event-stream')

if __name__ == '__main__':
    # Start the firehose processing in a separate thread
    threading.Thread(target=process_firehose, daemon=True).start()
    # Use run_simple with threading enabled to allow multiple clients
    run_simple('0.0.0.0', 7860, app, use_reloader=True, use_debugger=True, threaded=True)