|
|
|
from flask import Flask, jsonify, request |
|
from flask_cors import CORS |
|
from transformers import pipeline |
|
import logging |
|
import torch |
|
import os |
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
model_info = { |
|
"Albert-Base-V2": { |
|
"task": "fill-mask", |
|
"description": "Lyon28 Albert-Base-V2" |
|
}, |
|
"GPT-2": { |
|
"task": "text-generation", |
|
"description": "Lyon28 GPT-2" |
|
}, |
|
"Tinny-Llama": { |
|
"task": "text-generation", |
|
"description": "Lyon 28 Tinny Llama" |
|
}, |
|
"Electra-Small": { |
|
"task": "fill-mask", |
|
"description": "Lyon28 Small ELECTRA" |
|
}, |
|
"GPT-2-Tinny": { |
|
"task": "text-generation", |
|
"description": "Lyon28 Tiny GPT-2" |
|
}, |
|
"Bert-Tinny": { |
|
"task": "fill-mask", |
|
"description": "Lyon28 Tiny BERT" |
|
}, |
|
"Distilbert-Base-Uncased": { |
|
"task": "fill-mask", |
|
"description": "Lyon28 Distilled BERT" |
|
}, |
|
"Pythia": { |
|
"task": "text-generation", |
|
"description": "Lyon28 Pythia" |
|
}, |
|
"T5-Small": { |
|
"task": "text2text-generation", |
|
"description": "Lyon28 Small T5" |
|
}, |
|
"GPT-Neo": { |
|
"task": "text-generation", |
|
"description": "Lyon28 GPT-Neo" |
|
}, |
|
"Distil-GPT-2": { |
|
"task": "text-generation", |
|
"description": "Lyon28 Distilled GPT-2" |
|
}, |
|
"training-models": { |
|
"task": "text-generation", |
|
"description": "Lyon28 training-models" |
|
}, |
|
|
|
"TinyLLama-NSFW-Chatbot": { |
|
"task": "text-generation", |
|
"description": "BilalRahib's TinyLLama NSFW Chatbot", |
|
"hf_model_name": "bilalRahib/TinyLLama-NSFW-Chatbot" |
|
}, |
|
"whisper-large-v3": { |
|
"task": "automatic-speech-recognition", |
|
"description": "openai whisper-large-v3", |
|
"hf_model_name": "openai/whisper-large-v3" |
|
}, |
|
"Nusantara-4b-Indo-Chat": { |
|
"task": "text-generation", |
|
"description": "kalisai Nusantara-4b-Indo-Chat", |
|
"hf_model_name": "kalisai/Nusantara-4b-Indo-Chat" |
|
}, |
|
"lb-reranker-0.5B-v1.0": { |
|
"task": "text-generation", |
|
"description": "lightblue lb-reranker-0.5B-v1.0", |
|
"hf_model_name": "lightblue/lb-reranker-0.5B-v1.0" |
|
}, |
|
"harry-potter-gpt2": { |
|
"task": "text-generation", |
|
"description": "akahana harry-potter-gpt2", |
|
"hf_model_name": "akahana/harry-potter-gpt2" |
|
}, |
|
"Sailor2-1B-Chat": { |
|
"task": "text-generation", |
|
"description": "sail Sailor2-1B-Chat", |
|
"hf_model_name": "sail/Sailor2-1B-Chat" |
|
}, |
|
"gpt2-indo-textgen": { |
|
"task": "text-generation", |
|
"description": "anugrahap gpt2-indo-textgen", |
|
"hf_model_name": "anugrahap/gpt2-indo-textgen" |
|
}, |
|
"cendol-mt5-small-inst": { |
|
"task": "text-generation", |
|
"description": "indonlp cendol-mt5-small-inst", |
|
"hf_model_name": "indonlp/cendol-mt5-small-inst" |
|
}, |
|
"Sailor2-1B-Pre": { |
|
"task": "text-generation", |
|
"description": "sail Sailor2-1B-Pre", |
|
"hf_model_name": "sail/Sailor2-1B-Pre" |
|
}, |
|
"gemini-small": { |
|
"task": "text-generation", |
|
"description": "describeai gemini-small", |
|
"hf_model_name": "describeai/gemini-small" |
|
} |
|
} |
|
|
|
|
|
models = {} |
|
|
|
|
|
def get_model_pipeline(model_name): |
|
""" |
|
Memuat model hanya jika belum dimuat (lazy loading). |
|
Mengembalikan pipeline model yang diminta. |
|
""" |
|
if model_name not in models: |
|
logger.info(f"Model '{model_name}' belum dimuat. Memuat sekarang...") |
|
if model_name not in model_info: |
|
logger.error(f"Informasi model '{model_name}' tidak ditemukan di model_info.") |
|
raise ValueError(f"Model '{model_name}' tidak dikenal.") |
|
|
|
info = model_info[model_name] |
|
try: |
|
|
|
hf_model_path = info.get("hf_model_name", f"Lyon28/{model_name}") |
|
|
|
models[model_name] = pipeline( |
|
info["task"], |
|
model=hf_model_path, |
|
device="cpu", |
|
torch_dtype=torch.float32 |
|
) |
|
logger.info(f"✅ Model '{model_name}' (Path: {hf_model_path}) berhasil dimuat.") |
|
except Exception as e: |
|
logger.error(f"❌ Gagal memuat model '{model_name}' (Path: {hf_model_path}): {str(e)}", exc_info=True) |
|
raise RuntimeError(f"Gagal memuat model: {model_name}. Detail: {str(e)}") from e |
|
return models[model_name] |
|
|
|
|
|
|
|
@app.route('/') |
|
def home(): |
|
"""Endpoint root untuk status API.""" |
|
return jsonify({ |
|
"message": "Flask API untuk Model Hugging Face", |
|
"status": "online", |
|
"loaded_models_count": len(models), |
|
"available_model_configs": list(model_info.keys()), |
|
"info": "Gunakan /api/models untuk daftar model yang tersedia." |
|
}) |
|
|
|
@app.route('/api/models', methods=['GET']) |
|
def list_available_models(): |
|
"""Mengembalikan daftar semua model yang dikonfigurasi, termasuk status muatan.""" |
|
available_models_data = [ |
|
{ |
|
"id": name, |
|
"name": info["description"], |
|
"task": info["task"], |
|
"status": "loaded" if name in models else "not_loaded", |
|
"endpoint": f"/api/{name}" |
|
} |
|
for name, info in model_info.items() |
|
] |
|
return jsonify({ |
|
"total_configured_models": len(model_info), |
|
"currently_loaded_models": len(models), |
|
"models": available_models_data |
|
}) |
|
|
|
@app.route('/api/<model_id>', methods=['POST']) |
|
def predict_with_model(model_id): |
|
""" |
|
Endpoint utama untuk prediksi model. |
|
Menerima 'inputs' (teks pra-diformat) dan 'parameters' (dictionary) opsional. |
|
""" |
|
logger.info(f"Menerima permintaan untuk model: {model_id}") |
|
if model_id not in model_info: |
|
logger.warning(f"Permintaan untuk model tidak dikenal: {model_id}") |
|
return jsonify({"error": f"Model '{model_id}' tidak dikenal. Lihat /api/models untuk daftar yang tersedia."}), 404 |
|
|
|
try: |
|
model_pipeline = get_model_pipeline(model_id) |
|
model_task = model_info[model_id]["task"] |
|
|
|
data = request.json |
|
|
|
full_prompt_string_from_frontend = data.get('inputs', '') |
|
parameters = data.get('parameters', {}) |
|
|
|
if not full_prompt_string_from_frontend: |
|
return jsonify({"error": "Input 'inputs' (full prompt string) tidak boleh kosong."}), 400 |
|
|
|
logger.info(f"Inferensi: Model='{model_id}', Task='{model_task}', Full Prompt='{full_prompt_string_from_frontend[:200]}...', Params='{parameters}'") |
|
|
|
result = [] |
|
|
|
if model_task == "text-generation": |
|
gen_params = { |
|
"max_new_tokens": parameters.get("max_new_tokens", 150), |
|
"temperature": parameters.get("temperature", 0.7), |
|
"do_sample": parameters.get("do_sample", True), |
|
"return_full_text": parameters.get("return_full_text", False), |
|
"num_return_sequences": parameters.get("num_return_sequences", 1), |
|
"top_k": parameters.get("top_k", 50), |
|
"top_p": parameters.get("top_p", 0.95), |
|
"repetition_penalty": parameters.get("repetition_penalty", 1.2), |
|
} |
|
|
|
result = model_pipeline(full_prompt_string_from_frontend, **gen_params) |
|
|
|
elif model_task == "fill-mask": |
|
mask_params = { |
|
"top_k": parameters.get("top_k", 5) |
|
} |
|
|
|
|
|
result = model_pipeline(full_prompt_string_from_frontend, **mask_params) |
|
|
|
elif model_task == "text2text-generation": |
|
t2t_params = { |
|
"max_new_tokens": parameters.get("max_new_tokens", 150), |
|
"temperature": parameters.get("temperature", 0.7), |
|
"do_sample": parameters.get("do_sample", True), |
|
} |
|
result = model_pipeline(full_prompt_string_from_frontend, **t2t_params) |
|
|
|
else: |
|
result = model_pipeline(full_prompt_string_from_frontend, **parameters) |
|
|
|
|
|
response_output = {} |
|
if model_task == "text-generation" or model_task == "text2text-generation": |
|
if result and len(result) > 0 and 'generated_text' in result[0]: |
|
response_output['text'] = result[0]['generated_text'].strip() |
|
else: |
|
response_output['text'] = "[Tidak ada teks yang dihasilkan atau format tidak sesuai.]" |
|
elif model_task == "fill-mask": |
|
response_output['predictions'] = [ |
|
{"sequence": p.get('sequence', ''), "score": p.get('score', 0.0), "token_str": p.get('token_str', '')} |
|
for p in result |
|
] |
|
else: |
|
response_output = result |
|
|
|
logger.info(f"Inferensi berhasil untuk '{model_id}'. Output singkat: '{str(response_output)[:200]}'") |
|
return jsonify({"model": model_id, "inputs": full_prompt_string_from_frontend, "outputs": response_output}) |
|
|
|
except ValueError as ve: |
|
logger.error(f"Validasi atau konfigurasi error untuk model '{model_id}': {str(ve)}") |
|
return jsonify({"error": str(ve), "message": "Kesalahan konfigurasi atau input model."}), 400 |
|
except RuntimeError as re: |
|
logger.error(f"Error runtime saat memuat model '{model_id}': {str(re)}") |
|
return jsonify({"error": str(re), "message": "Model gagal dimuat."}), 503 |
|
except Exception as e: |
|
logger.error(f"Terjadi kesalahan tak terduga saat memprediksi dengan model '{model_id}': {str(e)}", exc_info=True) |
|
return jsonify({"error": str(e), "message": "Terjadi kesalahan internal server."}), 500 |
|
|
|
@app.route('/health', methods=['GET']) |
|
def health_check(): |
|
"""Endpoint untuk health check.""" |
|
return jsonify({"status": "healthy", "loaded_models_count": len(models), "message": "API berfungsi normal."}) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
app.run(host=os.getenv('HOST', '0.0.0.0'), port=int(os.getenv('PORT', 7860)), debug=False) |