# app.py from flask import Flask, jsonify, request from flask_cors import CORS from transformers import pipeline import logging import torch import os # Untuk mendapatkan environment variables, misalnya di Hugging Face Spaces app = Flask(__name__) CORS(app) # Mengaktifkan CORS untuk mengizinkan permintaan dari frontend Anda # --- Setup Logging --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # --- Konfigurasi Model dan Informasi --- # Tambahkan 'hf_model_name' jika nama model di Hugging Face berbeda dari ID yang Anda inginkan. # Jika nama model di Hugging Face sama, tidak perlu 'hf_model_name'. model_info = { "Albert-Base-V2": { "task": "fill-mask", "description": "Lyon28 Albert-Base-V2" }, "GPT-2": { "task": "text-generation", "description": "Lyon28 GPT-2" }, "Tinny-Llama": { "task": "text-generation", "description": "Lyon 28 Tinny Llama" }, "Electra-Small": { "task": "fill-mask", "description": "Lyon28 Small ELECTRA" }, "GPT-2-Tinny": { "task": "text-generation", "description": "Lyon28 Tiny GPT-2" }, "Bert-Tinny": { "task": "fill-mask", "description": "Lyon28 Tiny BERT" }, "Distilbert-Base-Uncased": { "task": "fill-mask", "description": "Lyon28 Distilled BERT" }, "Pythia": { "task": "text-generation", "description": "Lyon28 Pythia" }, "T5-Small": { "task": "text2text-generation", "description": "Lyon28 Small T5" }, "GPT-Neo": { "task": "text-generation", "description": "Lyon28 GPT-Neo" }, "Distil-GPT-2": { "task": "text-generation", "description": "Lyon28 Distilled GPT-2" }, "training-models": { "task": "text-generation", "description": "Lyon28 training-models" }, # --- MODEL EXTERNAL --- "TinyLLama-NSFW-Chatbot": { "task": "text-generation", "description": "BilalRahib's TinyLLama NSFW Chatbot", "hf_model_name": "bilalRahib/TinyLLama-NSFW-Chatbot" }, "whisper-large-v3": { "task": "automatic-speech-recognition", "description": "openai whisper-large-v3", "hf_model_name": "openai/whisper-large-v3" }, "Nusantara-4b-Indo-Chat": { "task": "text-generation", "description": "kalisai Nusantara-4b-Indo-Chat", "hf_model_name": "kalisai/Nusantara-4b-Indo-Chat" }, "lb-reranker-0.5B-v1.0": { "task": "text-generation", "description": "lightblue lb-reranker-0.5B-v1.0", "hf_model_name": "lightblue/lb-reranker-0.5B-v1.0" }, "harry-potter-gpt2": { "task": "text-generation", "description": "akahana harry-potter-gpt2", "hf_model_name": "akahana/harry-potter-gpt2" }, "Sailor2-1B-Chat": { "task": "text-generation", "description": "sail Sailor2-1B-Chat", "hf_model_name": "sail/Sailor2-1B-Chat" }, "gpt2-indo-textgen": { "task": "text-generation", "description": "anugrahap gpt2-indo-textgen", "hf_model_name": "anugrahap/gpt2-indo-textgen" }, "cendol-mt5-small-inst": { "task": "text-generation", "description": "indonlp cendol-mt5-small-inst", "hf_model_name": "indonlp/cendol-mt5-small-inst" }, "Sailor2-1B-Pre": { "task": "text-generation", "description": "sail Sailor2-1B-Pre", "hf_model_name": "sail/Sailor2-1B-Pre" }, "gemini-small": { "task": "text-generation", "description": "describeai gemini-small", "hf_model_name": "describeai/gemini-small" } } # --- Lazy Loading --- models = {} # --- Utility Lazy Loading --- def get_model_pipeline(model_name): """ Memuat model hanya jika belum dimuat (lazy loading). Mengembalikan pipeline model yang diminta. """ if model_name not in models: logger.info(f"Model '{model_name}' belum dimuat. Memuat sekarang...") if model_name not in model_info: logger.error(f"Informasi model '{model_name}' tidak ditemukan di model_info.") raise ValueError(f"Model '{model_name}' tidak dikenal.") info = model_info[model_name] try: hf_model_path = info.get("hf_model_name", f"Lyon28/{model_name}") models[model_name] = pipeline( info["task"], model=hf_model_path, device="cpu", torch_dtype=torch.float32 ) logger.info(f"✅ Model '{model_name}' (Path: {hf_model_path}) berhasil dimuat.") except Exception as e: logger.error(f"❌ Gagal memuat model '{model_name}' (Path: {hf_model_path}): {str(e)}", exc_info=True) raise RuntimeError(f"Gagal memuat model: {model_name}. Detail: {str(e)}") from e return models[model_name] # --- Rute API --- @app.route('/') def home(): """Endpoint root untuk status API.""" return jsonify({ "message": "Flask API untuk Model Hugging Face", "status": "online", "loaded_models_count": len(models), "available_model_configs": list(model_info.keys()), "info": "Gunakan /api/models untuk daftar model yang tersedia." }) @app.route('/api/models', methods=['GET']) def list_available_models(): """Mengembalikan daftar semua model yang dikonfigurasi, termasuk status muatan.""" available_models_data = [ { "id": name, "name": info["description"], "task": info["task"], "status": "loaded" if name in models else "not_loaded", # Menunjukkan apakah sudah dimuat via lazy loading "endpoint": f"/api/{name}" } for name, info in model_info.items() ] return jsonify({ "total_configured_models": len(model_info), "currently_loaded_models": len(models), "models": available_models_data }) @app.route('/api/', methods=['POST']) def predict_with_model(model_id): """ Endpoint utama untuk prediksi model. Menerima 'inputs' (teks pra-diformat) dan 'parameters' (dictionary) opsional. """ logger.info(f"Menerima permintaan untuk model: {model_id}") if model_id not in model_info: logger.warning(f"Permintaan untuk model tidak dikenal: {model_id}") return jsonify({"error": f"Model '{model_id}' tidak dikenal. Lihat /api/models untuk daftar yang tersedia."}), 404 try: model_pipeline = get_model_pipeline(model_id) # Memuat model jika belum ada model_task = model_info[model_id]["task"] data = request.json # Input sekarang diharapkan sebagai fullPromptString dari frontend full_prompt_string_from_frontend = data.get('inputs', '') parameters = data.get('parameters', {}) if not full_prompt_string_from_frontend: return jsonify({"error": "Input 'inputs' (full prompt string) tidak boleh kosong."}), 400 logger.info(f"Inferensi: Model='{model_id}', Task='{model_task}', Full Prompt='{full_prompt_string_from_frontend[:200]}...', Params='{parameters}'") result = [] # --- Penanganan Parameter dan Inferensi berdasarkan Tipe Tugas --- if model_task == "text-generation": gen_params = { "max_new_tokens": parameters.get("max_new_tokens", 150), "temperature": parameters.get("temperature", 0.7), "do_sample": parameters.get("do_sample", True), "return_full_text": parameters.get("return_full_text", False), # Sangat penting untuk chatbot "num_return_sequences": parameters.get("num_return_sequences", 1), "top_k": parameters.get("top_k", 50), "top_p": parameters.get("top_p", 0.95), "repetition_penalty": parameters.get("repetition_penalty", 1.2), } # Langsung berikan full_prompt_string_from_frontend ke pipeline result = model_pipeline(full_prompt_string_from_frontend, **gen_params) elif model_task == "fill-mask": mask_params = { "top_k": parameters.get("top_k", 5) } # Untuk fill-mask, input harus string biasa, bukan prompt yang kompleks # Anda perlu memastikan frontend tidak mengirim prompt kompleks ke fill-mask model result = model_pipeline(full_prompt_string_from_frontend, **mask_params) elif model_task == "text2text-generation": t2t_params = { "max_new_tokens": parameters.get("max_new_tokens", 150), "temperature": parameters.get("temperature", 0.7), "do_sample": parameters.get("do_sample", True), } result = model_pipeline(full_prompt_string_from_frontend, **t2t_params) else: result = model_pipeline(full_prompt_string_from_frontend, **parameters) # --- Konsistensi Format Output (tidak berubah dari update sebelumnya) --- response_output = {} if model_task == "text-generation" or model_task == "text2text-generation": if result and len(result) > 0 and 'generated_text' in result[0]: response_output['text'] = result[0]['generated_text'].strip() else: response_output['text'] = "[Tidak ada teks yang dihasilkan atau format tidak sesuai.]" elif model_task == "fill-mask": response_output['predictions'] = [ {"sequence": p.get('sequence', ''), "score": p.get('score', 0.0), "token_str": p.get('token_str', '')} for p in result ] else: response_output = result logger.info(f"Inferensi berhasil untuk '{model_id}'. Output singkat: '{str(response_output)[:200]}'") return jsonify({"model": model_id, "inputs": full_prompt_string_from_frontend, "outputs": response_output}) except ValueError as ve: logger.error(f"Validasi atau konfigurasi error untuk model '{model_id}': {str(ve)}") return jsonify({"error": str(ve), "message": "Kesalahan konfigurasi atau input model."}), 400 except RuntimeError as re: logger.error(f"Error runtime saat memuat model '{model_id}': {str(re)}") return jsonify({"error": str(re), "message": "Model gagal dimuat."}), 503 except Exception as e: logger.error(f"Terjadi kesalahan tak terduga saat memprediksi dengan model '{model_id}': {str(e)}", exc_info=True) return jsonify({"error": str(e), "message": "Terjadi kesalahan internal server."}), 500 @app.route('/health', methods=['GET']) def health_check(): """Endpoint untuk health check.""" return jsonify({"status": "healthy", "loaded_models_count": len(models), "message": "API berfungsi normal."}) # --- Jalankan Aplikasi --- if __name__ == '__main__': # Untuk Hugging Face Spaces, port biasanya 7860 # Menggunakan HOST dari environment variable jika tersedia, default ke 0.0.0.0 # Debug=False untuk produksi app.run(host=os.getenv('HOST', '0.0.0.0'), port=int(os.getenv('PORT', 7860)), debug=False)