import os from flask import Flask, render_template, request, jsonify from transformers import AutoModelForCausalLM, AutoTokenizer import torch app = Flask(__name__) # Define cache directory cache_dir = "/app/cache" os.environ["HF_HOME"] = cache_dir # Load Myanmarsar-GPT (1.42B params) from Hugging Face MODEL_NAME = "simbolo-ai/Myanmarsar-GPT" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=cache_dir) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, cache_dir=cache_dir) # Function to generate chatbot responses def generate_response(prompt): inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): output = model.generate(**inputs, max_length=200) return tokenizer.decode(output[0], skip_special_tokens=True) # Serve the HTML page @app.route("/") def home(): return render_template("index.html") # API route for chatbot responses @app.route("/chat", methods=["POST"]) def chat(): try: if not request.is_json: print("Error: Request is not JSON") return jsonify({"error": "Request must be JSON"}), 415 data = request.get_json() user_message = data.get("message", "") if not user_message: print("Error: No message received") return jsonify({"error": "No message provided"}), 400 print(f"Received message: {user_message}") bot_reply = generate_response(user_message) print(f"AI response: {bot_reply}") return jsonify({"reply": bot_reply}) except Exception as e: print(f"Error processing request: {e}") return jsonify({"error": str(e)}), 500 if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) # Default to 7860, but use any assigned port app.run(host="0.0.0.0", port=port)