XAUUSD-PRO2

Sleeping

App Files Files Community

Dooratre commited on Aug 12

Commit

43fd6f4

verified ·

1 Parent(s): 6d047e5

Create app.py

Browse files

Files changed (1) hide show

app.py +270 -0

app.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import re
+import json
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse
+from flask import Flask, jsonify, Response
+from apscheduler.schedulers.background import BackgroundScheduler
+from datetime import datetime
+# Import GitHub JSON DB helpers
+from db_twiter import fetch_authenticity_token_and_commit_oid, update_user_json_file, fetch_json_from_github
+BROWSE_ENDPOINT = "https://corvo-ai-xx-pg.hf.space/browse"
+TWIIIT_REDIRECT = "https://twiiit.com/get-location"
+DEFAULT_NITTER_DOMAIN = "nitter.net"
+def get_current_nitter_domain(timeout=10):
+    try:
+        resp = requests.get(TWIIIT_REDIRECT, allow_redirects=False, timeout=timeout)
+        location = resp.headers.get("Location")
+        if location:
+            domain = urlparse(location).netloc
+            if domain:
+                return domain
+    except requests.RequestException:
+        pass
+    return DEFAULT_NITTER_DOMAIN
+def build_nitter_profile_url(domain: str, username: str) -> str:
+    username = username.lstrip("@").strip()
+    return f"https://{domain}/{username}"
+def browse(urls, wait_for=350):
+    payload = {"urls": urls, "wait_for": wait_for}
+    headers = {"Content-Type": "application/json"}
+    resp = requests.post(BROWSE_ENDPOINT, json=payload, headers=headers, timeout=120)
+    resp.raise_for_status()
+    try:
+        return resp.json()
+    except ValueError:
+        return {"raw": resp.text}
+def strip_markdown_links(text: str) -> str:
+    text = re.sub(r'!\[[^\]]*\]\([^)]+\)', '', text)
+    text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
+    return text
+def html_to_text(s: str) -> str:
+    soup = BeautifulSoup(s, "html.parser")
+    for br in soup.find_all(["br"]):
+        br.replace_with("\n")
+    text = soup.get_text(separator="\n")
+    return text
+def clean_text(s: str) -> str:
+    s = strip_markdown_links(s)
+    s = html_to_text(s)
+    s = s.replace("\\\\", "\\")
+    s = re.sub(r'[ \t]+', ' ', s)
+    s = re.sub(r'\n{3,}', '\n\n', s)
+    s = s.strip()
+    return s
+def extract_text_from_response(data: dict) -> str:
+    parts = []
+    try:
+        results = data["output"]["results"]
+    except (KeyError, TypeError):
+        return clean_text(str(data))
+    for r in results:
+        content = r.get("content", "")
+        if not content:
+            continue
+        parts.append(clean_text(content))
+    return "\n\n".join(p for p in parts if p)
+def parse_posts(raw_text: str):
+    lines = [ln.strip() for ln in raw_text.splitlines()]
+    name_handle_re = re.compile(r'^(.+?)\s+@([A-Za-z0-9_]+)\s*$')
+    link_block_re = re.compile(r'^\[\]\(https?://')
+    def is_recent_time(s: str) -> bool:
+        return bool(re.fullmatch(r'\d+\s*[hmsHMS]', s))
+    posts = []
+    i = 0
+    n = len(lines)
+    while i < n:
+        m = name_handle_re.match(lines[i])
+        if not m:
+            i += 1
+            continue
+        name = m.group(1).strip()
+        handle = '@' + m.group(2).strip()
+        i += 1
+        while i < n and lines[i] == '':
+            i += 1
+        if i >= n:
+            break
+        publish_time = lines[i]
+        if not is_recent_time(publish_time):
+            i += 1
+            continue
+        i += 1
+        body_lines = []
+        while i < n:
+            line = lines[i]
+            if name_handle_re.match(line):
+                break
+            if link_block_re.match(line):
+                break
+            body_lines.append(line)
+            i += 1
+        def strip_trailing_counters(lines_in):
+            j = len(lines_in) - 1
+            count = 0
+            while j >= 0 and re.fullmatch(r'\d{1,4}(,\d{3})*', lines_in[j]):
+                count += 1
+                j -= 1
+                if count >= 5:
+                    break
+            if count >= 2:
+                return lines_in[:j+1]
+            return lines_in
+        body_lines = strip_trailing_counters([ln for ln in body_lines if ln != ''])
+        body = "\n".join(ln for ln in body_lines).strip()
+        if body:
+            posts.append({
+                "name": name,
+                "handle": handle,
+                "time": publish_time,
+                "post": body
+            })
+        while i < n and link_block_re.match(lines[i]):
+            i += 1
+    return posts
+def format_posts(posts):
+    sep = "-" * 50
+    out = []
+    for idx, p in enumerate(posts):
+        out.append(f"{p['name']} {p['handle']}")
+        out.append(p['time'])
+        out.append(p['post'])
+        if idx != len(posts) - 1:
+            out.append(sep)
+    return "\n".join(out).strip()
+def fetch_posts_for_user(domain: str, username: str, wait_for=350):
+    url = build_nitter_profile_url(domain, username)
+    resp = browse([url], wait_for=wait_for)
+    text = extract_text_from_response(resp)
+    posts = parse_posts(text)
+    return posts
+def build_output_text():
+    # 15+ accounts
+    usernames = [
+        "zerohedge",
+        "lisaabramowicz1",
+        "elerianm",
+        "jsblokland",
+        "AndreasSteno",
+        "charliebilello",
+        "GameofTrades_",
+        "SantiagoAuFund",
+        "DylanLeClair_",
+        "Ole_S_Hansen",
+        "NickTimiraos",
+        "federalreserve",
+        "POTUS",
+        "WhiteHouse",
+        "USTreasury",
+        "Reuters",
+        "BloombergTV",
+    ]
+    domain = get_current_nitter_domain()
+    outputs = []
+    for uname in usernames:
+        uname_clean = uname.lstrip("@")
+        try:
+            posts = fetch_posts_for_user(domain, uname_clean, wait_for=350)
+            header = f"=== @{uname_clean} ==="
+            if posts:
+                outputs.append(header)
+                outputs.append(format_posts(posts))
+            else:
+                outputs.append(f"{header}\nNo recent posts found.")
+        except Exception as e:
+            outputs.append(f"=== @{uname_clean} ===\nError: {e}")
+    # Append timestamp footer to help trace runs
+    outputs.append("")
+    outputs.append(f"Last update: {datetime.utcnow().isoformat()}Z")
+    return "\n\n".join(outputs)
+def save_to_github_twiter_json(text_output: str):
+    # We store as JSON string, e.g. {"twiter": "<text here>"}
+    payload_obj = {"twiter": text_output}
+    new_content = json.dumps(payload_obj, ensure_ascii=False)
+    token, commit_oid = fetch_authenticity_token_and_commit_oid()
+    if not token or not commit_oid:
+        return {"success": False, "message": "Failed to retrieve authenticity token or commit OID."}
+    res = update_user_json_file(token, commit_oid, new_content)
+    return res
+# Job that runs to fetch and save
+def run_job():
+    try:
+        text = build_output_text()
+        result = save_to_github_twiter_json(text)
+        return result
+    except Exception as e:
+        return {"success": False, "message": f"Job failed: {e}"}
+# Flask app
+app = Flask(__name__)
+@app.route("/", methods=["GET"])
+def index():
+    # Return the latest content from GitHub
+    data = fetch_json_from_github()
+    if data.get("success") and isinstance(data.get("data"), dict):
+        tw_text = data["data"].get("twiter", "")
+        return Response(tw_text, mimetype="text/plain; charset=utf-8")
+    return jsonify(data), 500
+@app.route("/run", methods=["POST"])
+def run_now():
+    res = run_job()
+    status = 200 if res.get("success") else 500
+    return jsonify(res), status
+def schedule_jobs():
+    scheduler = BackgroundScheduler(timezone="UTC")
+    # Run at minute 0 and 30 of every hour (UTC)
+    scheduler.add_job(run_job, "cron", minute="0,30", id="twiter_fetch_save")
+    scheduler.start()
+    return scheduler
+if __name__ == "__main__":
+    # Start scheduler
+    schedule_jobs()
+    # Optional: run once on startup
+    try:
+        run_job()
+    except Exception:
+        pass
+    # Run Flask on port 7860
+    app.run(host="0.0.0.0", port=7860)