Spaces:

Lyon28
/

test

Running

File size: 11,278 Bytes

d2346a3
 
90260b6
 
 
 
d2346a3
90260b6
 
d2346a3
90260b6
d2346a3
 
90260b6
 
d2346a3
 
 
90260b6
42b67cc
 
 
 
 
 
 
 
 
ca2fcb8
42b67cc
 
 
 
 
 
 
 
 
 
 
 
 
ca2fcb8
42b67cc
 
 
 
 
ca2fcb8
42b67cc
 
 
 
 
1a39cb8
42b67cc
ca2fcb8
42b67cc
ca2fcb8
42b67cc
ca2fcb8
42b67cc
ca2fcb8
2ba9ac9
17dfa9a
2ba9ac9
17dfa9a
42b67cc
 
ca2fcb8
42b67cc
 
11eef4d
 
 
 
 
42b67cc
 
 
 
 
 
96097b5
42b67cc
96097b5
 
42b67cc
 
 
 
 
 
96097b5
42b67cc
96097b5
 
42b67cc
 
 
 
 
 
 
 
 
 
 
96097b5
42b67cc
96097b5
 
bfc1179
 
d38e2b9
 
 
ca2fcb8
90260b6
 
42b67cc
d2346a3
 
42b67cc
d2346a3
 
 
 
 
 
 
 
 
 
 
 
90260b6
42b67cc
d2346a3
 
90260b6
d2346a3
 
42b67cc
 
90260b6
d2346a3
90260b6
d2346a3
 
 
90260b6
d2346a3
90260b6
 
 
d2346a3
90260b6
d2346a3
90260b6
d2346a3
 
 
90260b6
 
 
d2346a3
 
 
 
 
 
90260b6
d2346a3
90260b6
d2346a3
 
 
90260b6
d2346a3
 
 
90260b6
 
d2346a3
 
 
 
11eef4d
d2346a3
 
 
 
 
 
90260b6
d2346a3
 
 
90260b6
11eef4d
 
 
d2346a3
11eef4d
 
d2346a3
11eef4d
d2346a3
 
 
 
 
11eef4d
d2346a3
 
 
 
 
 
11eef4d
d2346a3
11eef4d
 
d2346a3
 
 
 
 
11eef4d
 
 
d2346a3
11eef4d
d2346a3
 
 
 
 
11eef4d
d2346a3
 
11eef4d
d2346a3
11eef4d
d2346a3
 
 
 
 
 
 
 
 
 
 
90260b6
d2346a3
 
 
11eef4d
d2346a3
 
 
 
 
 
11eef4d
90260b6
d2346a3
 
90260b6
 
 
d2346a3
 
90260b6
d2346a3
90260b6
d2346a3
 
 
7f552ff

# app.py
from flask import Flask, jsonify, request
from flask_cors import CORS
from transformers import pipeline
import logging
import torch
import os # Untuk mendapatkan environment variables, misalnya di Hugging Face Spaces

app = Flask(__name__)
CORS(app) # Mengaktifkan CORS untuk mengizinkan permintaan dari frontend Anda

# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Konfigurasi Model dan Informasi ---
# Tambahkan 'hf_model_name' jika nama model di Hugging Face berbeda dari ID yang Anda inginkan.
# Jika nama model di Hugging Face sama, tidak perlu 'hf_model_name'.
model_info = {
    "Albert-Base-V2": {
        "task": "fill-mask", 
        "description": "Lyon28 Albert-Base-V2"
    },
    "GPT-2": {
        "task": "text-generation", 
        "description": "Lyon28 GPT-2"
    },
    "Tinny-Llama": {
        "task": "text-generation",
        "description": "Lyon 28 Tinny Llama"
    },
    "Electra-Small": {
        "task": "fill-mask", 
        "description": "Lyon28 Small ELECTRA"
    },
    "GPT-2-Tinny": {
        "task": "text-generation",
        "description": "Lyon28 Tiny GPT-2"
    },
    "Bert-Tinny": {
        "task": "fill-mask", 
        "description": "Lyon28 Tiny BERT"
    },
    "Distilbert-Base-Uncased": {
        "task": "fill-mask",
        "description": "Lyon28 Distilled BERT"
    },
    "Pythia": {
        "task": "text-generation",
        "description": "Lyon28 Pythia"
    },
    "T5-Small": {
        "task": "text2text-generation", 
        "description": "Lyon28 Small T5"
    },
    "GPT-Neo": {
        "task": "text-generation",
        "description": "Lyon28 GPT-Neo"
    },
    "Distil-GPT-2": {
        "task": "text-generation",
        "description": "Lyon28 Distilled GPT-2"
    },
    "training-models": {
        "task": "text-generation",
        "description": "Lyon28 training-models"
    },
    # --- MODEL EXTERNAL ---
    "TinyLLama-NSFW-Chatbot": {
        "task": "text-generation",
        "description": "BilalRahib's TinyLLama NSFW Chatbot",
        "hf_model_name": "bilalRahib/TinyLLama-NSFW-Chatbot"
    },
    "whisper-large-v3": {
        "task": "automatic-speech-recognition",
        "description": "openai whisper-large-v3",
        "hf_model_name": "openai/whisper-large-v3"
    },
    "Nusantara-4b-Indo-Chat": {
        "task": "text-generation",
        "description": "kalisai Nusantara-4b-Indo-Chat",
        "hf_model_name": "kalisai/Nusantara-4b-Indo-Chat"
    },
   "lb-reranker-0.5B-v1.0": {
        "task": "text-generation",
        "description": "lightblue lb-reranker-0.5B-v1.0",
        "hf_model_name": "lightblue/lb-reranker-0.5B-v1.0"
    },
   "harry-potter-gpt2": {
        "task": "text-generation",
        "description": "akahana harry-potter-gpt2",
        "hf_model_name": "akahana/harry-potter-gpt2"
    },
    "Sailor2-1B-Chat": {
        "task": "text-generation",
        "description": "sail Sailor2-1B-Chat",
        "hf_model_name": "sail/Sailor2-1B-Chat"
    },
    "gpt2-indo-textgen": {
        "task": "text-generation",
        "description": "anugrahap gpt2-indo-textgen",
        "hf_model_name": "anugrahap/gpt2-indo-textgen"
    },
    "cendol-mt5-small-inst": {
        "task": "text-generation",
        "description": "indonlp cendol-mt5-small-inst",
        "hf_model_name": "indonlp/cendol-mt5-small-inst"
    },
    "Sailor2-1B-Pre": {
        "task": "text-generation",
        "description": "sail Sailor2-1B-Pre",
        "hf_model_name": "sail/Sailor2-1B-Pre"
    },
    "gemini-small": {
        "task": "text-generation",
        "description": "describeai gemini-small",
        "hf_model_name": "describeai/gemini-small"
    }
}

# --- Lazy Loading ---
models = {}

# --- Utility Lazy Loading ---
def get_model_pipeline(model_name):
    """
    Memuat model hanya jika belum dimuat (lazy loading).
    Mengembalikan pipeline model yang diminta.
    """
    if model_name not in models:
        logger.info(f"Model '{model_name}' belum dimuat. Memuat sekarang...")
        if model_name not in model_info:
            logger.error(f"Informasi model '{model_name}' tidak ditemukan di model_info.")
            raise ValueError(f"Model '{model_name}' tidak dikenal.")

        info = model_info[model_name]
        try:

            hf_model_path = info.get("hf_model_name", f"Lyon28/{model_name}")

            models[model_name] = pipeline(
                info["task"],
                model=hf_model_path,
                device="cpu", 
                torch_dtype=torch.float32
            )
            logger.info(f"✅ Model '{model_name}' (Path: {hf_model_path}) berhasil dimuat.")
        except Exception as e:
            logger.error(f"❌ Gagal memuat model '{model_name}' (Path: {hf_model_path}): {str(e)}", exc_info=True)
            raise RuntimeError(f"Gagal memuat model: {model_name}. Detail: {str(e)}") from e
    return models[model_name]

# --- Rute API ---

@app.route('/')
def home():
    """Endpoint root untuk status API."""
    return jsonify({
        "message": "Flask API untuk Model Hugging Face",
        "status": "online",
        "loaded_models_count": len(models),
        "available_model_configs": list(model_info.keys()),
        "info": "Gunakan /api/models untuk daftar model yang tersedia."
    })

@app.route('/api/models', methods=['GET'])
def list_available_models():
    """Mengembalikan daftar semua model yang dikonfigurasi, termasuk status muatan."""
    available_models_data = [
        {
            "id": name,
            "name": info["description"],
            "task": info["task"],
            "status": "loaded" if name in models else "not_loaded", # Menunjukkan apakah sudah dimuat via lazy loading
            "endpoint": f"/api/{name}"
        }
        for name, info in model_info.items()
    ]
    return jsonify({
        "total_configured_models": len(model_info),
        "currently_loaded_models": len(models),
        "models": available_models_data
    })

@app.route('/api/<model_id>', methods=['POST'])
def predict_with_model(model_id):
    """
    Endpoint utama untuk prediksi model.
    Menerima 'inputs' (teks pra-diformat) dan 'parameters' (dictionary) opsional.
    """
    logger.info(f"Menerima permintaan untuk model: {model_id}")
    if model_id not in model_info:
        logger.warning(f"Permintaan untuk model tidak dikenal: {model_id}")
        return jsonify({"error": f"Model '{model_id}' tidak dikenal. Lihat /api/models untuk daftar yang tersedia."}), 404

    try:
        model_pipeline = get_model_pipeline(model_id) # Memuat model jika belum ada
        model_task = model_info[model_id]["task"]

        data = request.json
        # Input sekarang diharapkan sebagai fullPromptString dari frontend
        full_prompt_string_from_frontend = data.get('inputs', '')
        parameters = data.get('parameters', {})

        if not full_prompt_string_from_frontend:
            return jsonify({"error": "Input 'inputs' (full prompt string) tidak boleh kosong."}), 400

        logger.info(f"Inferensi: Model='{model_id}', Task='{model_task}', Full Prompt='{full_prompt_string_from_frontend[:200]}...', Params='{parameters}'")

        result = []
        # --- Penanganan Parameter dan Inferensi berdasarkan Tipe Tugas ---
        if model_task == "text-generation":
            gen_params = {
                "max_new_tokens": parameters.get("max_new_tokens", 150),
                "temperature": parameters.get("temperature", 0.7),
                "do_sample": parameters.get("do_sample", True),
                "return_full_text": parameters.get("return_full_text", False), # Sangat penting untuk chatbot
                "num_return_sequences": parameters.get("num_return_sequences", 1),
                "top_k": parameters.get("top_k", 50),
                "top_p": parameters.get("top_p", 0.95),
                "repetition_penalty": parameters.get("repetition_penalty", 1.2),
            }
            # Langsung berikan full_prompt_string_from_frontend ke pipeline
            result = model_pipeline(full_prompt_string_from_frontend, **gen_params)

        elif model_task == "fill-mask":
            mask_params = {
                "top_k": parameters.get("top_k", 5)
            }
            # Untuk fill-mask, input harus string biasa, bukan prompt yang kompleks
            # Anda perlu memastikan frontend tidak mengirim prompt kompleks ke fill-mask model
            result = model_pipeline(full_prompt_string_from_frontend, **mask_params)

        elif model_task == "text2text-generation":
            t2t_params = {
                "max_new_tokens": parameters.get("max_new_tokens", 150),
                "temperature": parameters.get("temperature", 0.7),
                "do_sample": parameters.get("do_sample", True),
            }
            result = model_pipeline(full_prompt_string_from_frontend, **t2t_params)

        else:
            result = model_pipeline(full_prompt_string_from_frontend, **parameters)

        # --- Konsistensi Format Output (tidak berubah dari update sebelumnya) ---
        response_output = {}
        if model_task == "text-generation" or model_task == "text2text-generation":
            if result and len(result) > 0 and 'generated_text' in result[0]:
                response_output['text'] = result[0]['generated_text'].strip()
            else:
                response_output['text'] = "[Tidak ada teks yang dihasilkan atau format tidak sesuai.]"
        elif model_task == "fill-mask":
            response_output['predictions'] = [
                {"sequence": p.get('sequence', ''), "score": p.get('score', 0.0), "token_str": p.get('token_str', '')}
                for p in result
            ]
        else:
            response_output = result

        logger.info(f"Inferensi berhasil untuk '{model_id}'. Output singkat: '{str(response_output)[:200]}'")
        return jsonify({"model": model_id, "inputs": full_prompt_string_from_frontend, "outputs": response_output})

    except ValueError as ve:
        logger.error(f"Validasi atau konfigurasi error untuk model '{model_id}': {str(ve)}")
        return jsonify({"error": str(ve), "message": "Kesalahan konfigurasi atau input model."}), 400
    except RuntimeError as re:
        logger.error(f"Error runtime saat memuat model '{model_id}': {str(re)}")
        return jsonify({"error": str(re), "message": "Model gagal dimuat."}), 503
    except Exception as e:
        logger.error(f"Terjadi kesalahan tak terduga saat memprediksi dengan model '{model_id}': {str(e)}", exc_info=True)
        return jsonify({"error": str(e), "message": "Terjadi kesalahan internal server."}), 500

@app.route('/health', methods=['GET'])
def health_check():
    """Endpoint untuk health check."""
    return jsonify({"status": "healthy", "loaded_models_count": len(models), "message": "API berfungsi normal."})

# --- Jalankan Aplikasi ---
if __name__ == '__main__':
    # Untuk Hugging Face Spaces, port biasanya 7860
    # Menggunakan HOST dari environment variable jika tersedia, default ke 0.0.0.0
    # Debug=False untuk produksi
    app.run(host=os.getenv('HOST', '0.0.0.0'), port=int(os.getenv('PORT', 7860)), debug=False)