File size: 11,278 Bytes
d2346a3
 
90260b6
 
 
 
d2346a3
90260b6
 
d2346a3
90260b6
d2346a3
 
90260b6
 
d2346a3
 
 
90260b6
42b67cc
 
 
 
 
 
 
 
 
ca2fcb8
42b67cc
 
 
 
 
 
 
 
 
 
 
 
 
ca2fcb8
42b67cc
 
 
 
 
ca2fcb8
42b67cc
 
 
 
 
1a39cb8
42b67cc
ca2fcb8
42b67cc
ca2fcb8
42b67cc
ca2fcb8
42b67cc
ca2fcb8
2ba9ac9
17dfa9a
2ba9ac9
17dfa9a
42b67cc
 
ca2fcb8
42b67cc
 
11eef4d
 
 
 
 
42b67cc
 
 
 
 
 
96097b5
42b67cc
96097b5
 
42b67cc
 
 
 
 
 
96097b5
42b67cc
96097b5
 
42b67cc
 
 
 
 
 
 
 
 
 
 
96097b5
42b67cc
96097b5
 
bfc1179
 
d38e2b9
 
 
ca2fcb8
90260b6
 
42b67cc
d2346a3
 
42b67cc
d2346a3
 
 
 
 
 
 
 
 
 
 
 
90260b6
42b67cc
d2346a3
 
90260b6
d2346a3
 
42b67cc
 
90260b6
d2346a3
90260b6
d2346a3
 
 
90260b6
d2346a3
90260b6
 
 
d2346a3
90260b6
d2346a3
90260b6
d2346a3
 
 
90260b6
 
 
d2346a3
 
 
 
 
 
90260b6
d2346a3
90260b6
d2346a3
 
 
90260b6
d2346a3
 
 
90260b6
 
d2346a3
 
 
 
11eef4d
d2346a3
 
 
 
 
 
90260b6
d2346a3
 
 
90260b6
11eef4d
 
 
d2346a3
11eef4d
 
d2346a3
11eef4d
d2346a3
 
 
 
 
11eef4d
d2346a3
 
 
 
 
 
11eef4d
d2346a3
11eef4d
 
d2346a3
 
 
 
 
11eef4d
 
 
d2346a3
11eef4d
d2346a3
 
 
 
 
11eef4d
d2346a3
 
11eef4d
d2346a3
11eef4d
d2346a3
 
 
 
 
 
 
 
 
 
 
90260b6
d2346a3
 
 
11eef4d
d2346a3
 
 
 
 
 
11eef4d
90260b6
d2346a3
 
90260b6
 
 
d2346a3
 
90260b6
d2346a3
90260b6
d2346a3
 
 
7f552ff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# app.py
from flask import Flask, jsonify, request
from flask_cors import CORS
from transformers import pipeline
import logging
import torch
import os # Untuk mendapatkan environment variables, misalnya di Hugging Face Spaces

app = Flask(__name__)
CORS(app) # Mengaktifkan CORS untuk mengizinkan permintaan dari frontend Anda

# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Konfigurasi Model dan Informasi ---
# Tambahkan 'hf_model_name' jika nama model di Hugging Face berbeda dari ID yang Anda inginkan.
# Jika nama model di Hugging Face sama, tidak perlu 'hf_model_name'.
model_info = {
    "Albert-Base-V2": {
        "task": "fill-mask", 
        "description": "Lyon28 Albert-Base-V2"
    },
    "GPT-2": {
        "task": "text-generation", 
        "description": "Lyon28 GPT-2"
    },
    "Tinny-Llama": {
        "task": "text-generation",
        "description": "Lyon 28 Tinny Llama"
    },
    "Electra-Small": {
        "task": "fill-mask", 
        "description": "Lyon28 Small ELECTRA"
    },
    "GPT-2-Tinny": {
        "task": "text-generation",
        "description": "Lyon28 Tiny GPT-2"
    },
    "Bert-Tinny": {
        "task": "fill-mask", 
        "description": "Lyon28 Tiny BERT"
    },
    "Distilbert-Base-Uncased": {
        "task": "fill-mask",
        "description": "Lyon28 Distilled BERT"
    },
    "Pythia": {
        "task": "text-generation",
        "description": "Lyon28 Pythia"
    },
    "T5-Small": {
        "task": "text2text-generation", 
        "description": "Lyon28 Small T5"
    },
    "GPT-Neo": {
        "task": "text-generation",
        "description": "Lyon28 GPT-Neo"
    },
    "Distil-GPT-2": {
        "task": "text-generation",
        "description": "Lyon28 Distilled GPT-2"
    },
    "training-models": {
        "task": "text-generation",
        "description": "Lyon28 training-models"
    },
    # --- MODEL EXTERNAL ---
    "TinyLLama-NSFW-Chatbot": {
        "task": "text-generation",
        "description": "BilalRahib's TinyLLama NSFW Chatbot",
        "hf_model_name": "bilalRahib/TinyLLama-NSFW-Chatbot"
    },
    "whisper-large-v3": {
        "task": "automatic-speech-recognition",
        "description": "openai whisper-large-v3",
        "hf_model_name": "openai/whisper-large-v3"
    },
    "Nusantara-4b-Indo-Chat": {
        "task": "text-generation",
        "description": "kalisai Nusantara-4b-Indo-Chat",
        "hf_model_name": "kalisai/Nusantara-4b-Indo-Chat"
    },
   "lb-reranker-0.5B-v1.0": {
        "task": "text-generation",
        "description": "lightblue lb-reranker-0.5B-v1.0",
        "hf_model_name": "lightblue/lb-reranker-0.5B-v1.0"
    },
   "harry-potter-gpt2": {
        "task": "text-generation",
        "description": "akahana harry-potter-gpt2",
        "hf_model_name": "akahana/harry-potter-gpt2"
    },
    "Sailor2-1B-Chat": {
        "task": "text-generation",
        "description": "sail Sailor2-1B-Chat",
        "hf_model_name": "sail/Sailor2-1B-Chat"
    },
    "gpt2-indo-textgen": {
        "task": "text-generation",
        "description": "anugrahap gpt2-indo-textgen",
        "hf_model_name": "anugrahap/gpt2-indo-textgen"
    },
    "cendol-mt5-small-inst": {
        "task": "text-generation",
        "description": "indonlp cendol-mt5-small-inst",
        "hf_model_name": "indonlp/cendol-mt5-small-inst"
    },
    "Sailor2-1B-Pre": {
        "task": "text-generation",
        "description": "sail Sailor2-1B-Pre",
        "hf_model_name": "sail/Sailor2-1B-Pre"
    },
    "gemini-small": {
        "task": "text-generation",
        "description": "describeai gemini-small",
        "hf_model_name": "describeai/gemini-small"
    }
}

# --- Lazy Loading ---
models = {}

# --- Utility Lazy Loading ---
def get_model_pipeline(model_name):
    """
    Memuat model hanya jika belum dimuat (lazy loading).
    Mengembalikan pipeline model yang diminta.
    """
    if model_name not in models:
        logger.info(f"Model '{model_name}' belum dimuat. Memuat sekarang...")
        if model_name not in model_info:
            logger.error(f"Informasi model '{model_name}' tidak ditemukan di model_info.")
            raise ValueError(f"Model '{model_name}' tidak dikenal.")

        info = model_info[model_name]
        try:

            hf_model_path = info.get("hf_model_name", f"Lyon28/{model_name}")

            models[model_name] = pipeline(
                info["task"],
                model=hf_model_path,
                device="cpu", 
                torch_dtype=torch.float32
            )
            logger.info(f"βœ… Model '{model_name}' (Path: {hf_model_path}) berhasil dimuat.")
        except Exception as e:
            logger.error(f"❌ Gagal memuat model '{model_name}' (Path: {hf_model_path}): {str(e)}", exc_info=True)
            raise RuntimeError(f"Gagal memuat model: {model_name}. Detail: {str(e)}") from e
    return models[model_name]

# --- Rute API ---

@app.route('/')
def home():
    """Endpoint root untuk status API."""
    return jsonify({
        "message": "Flask API untuk Model Hugging Face",
        "status": "online",
        "loaded_models_count": len(models),
        "available_model_configs": list(model_info.keys()),
        "info": "Gunakan /api/models untuk daftar model yang tersedia."
    })

@app.route('/api/models', methods=['GET'])
def list_available_models():
    """Mengembalikan daftar semua model yang dikonfigurasi, termasuk status muatan."""
    available_models_data = [
        {
            "id": name,
            "name": info["description"],
            "task": info["task"],
            "status": "loaded" if name in models else "not_loaded", # Menunjukkan apakah sudah dimuat via lazy loading
            "endpoint": f"/api/{name}"
        }
        for name, info in model_info.items()
    ]
    return jsonify({
        "total_configured_models": len(model_info),
        "currently_loaded_models": len(models),
        "models": available_models_data
    })

@app.route('/api/<model_id>', methods=['POST'])
def predict_with_model(model_id):
    """
    Endpoint utama untuk prediksi model.
    Menerima 'inputs' (teks pra-diformat) dan 'parameters' (dictionary) opsional.
    """
    logger.info(f"Menerima permintaan untuk model: {model_id}")
    if model_id not in model_info:
        logger.warning(f"Permintaan untuk model tidak dikenal: {model_id}")
        return jsonify({"error": f"Model '{model_id}' tidak dikenal. Lihat /api/models untuk daftar yang tersedia."}), 404

    try:
        model_pipeline = get_model_pipeline(model_id) # Memuat model jika belum ada
        model_task = model_info[model_id]["task"]

        data = request.json
        # Input sekarang diharapkan sebagai fullPromptString dari frontend
        full_prompt_string_from_frontend = data.get('inputs', '')
        parameters = data.get('parameters', {})

        if not full_prompt_string_from_frontend:
            return jsonify({"error": "Input 'inputs' (full prompt string) tidak boleh kosong."}), 400

        logger.info(f"Inferensi: Model='{model_id}', Task='{model_task}', Full Prompt='{full_prompt_string_from_frontend[:200]}...', Params='{parameters}'")

        result = []
        # --- Penanganan Parameter dan Inferensi berdasarkan Tipe Tugas ---
        if model_task == "text-generation":
            gen_params = {
                "max_new_tokens": parameters.get("max_new_tokens", 150),
                "temperature": parameters.get("temperature", 0.7),
                "do_sample": parameters.get("do_sample", True),
                "return_full_text": parameters.get("return_full_text", False), # Sangat penting untuk chatbot
                "num_return_sequences": parameters.get("num_return_sequences", 1),
                "top_k": parameters.get("top_k", 50),
                "top_p": parameters.get("top_p", 0.95),
                "repetition_penalty": parameters.get("repetition_penalty", 1.2),
            }
            # Langsung berikan full_prompt_string_from_frontend ke pipeline
            result = model_pipeline(full_prompt_string_from_frontend, **gen_params)

        elif model_task == "fill-mask":
            mask_params = {
                "top_k": parameters.get("top_k", 5)
            }
            # Untuk fill-mask, input harus string biasa, bukan prompt yang kompleks
            # Anda perlu memastikan frontend tidak mengirim prompt kompleks ke fill-mask model
            result = model_pipeline(full_prompt_string_from_frontend, **mask_params)

        elif model_task == "text2text-generation":
            t2t_params = {
                "max_new_tokens": parameters.get("max_new_tokens", 150),
                "temperature": parameters.get("temperature", 0.7),
                "do_sample": parameters.get("do_sample", True),
            }
            result = model_pipeline(full_prompt_string_from_frontend, **t2t_params)

        else:
            result = model_pipeline(full_prompt_string_from_frontend, **parameters)

        # --- Konsistensi Format Output (tidak berubah dari update sebelumnya) ---
        response_output = {}
        if model_task == "text-generation" or model_task == "text2text-generation":
            if result and len(result) > 0 and 'generated_text' in result[0]:
                response_output['text'] = result[0]['generated_text'].strip()
            else:
                response_output['text'] = "[Tidak ada teks yang dihasilkan atau format tidak sesuai.]"
        elif model_task == "fill-mask":
            response_output['predictions'] = [
                {"sequence": p.get('sequence', ''), "score": p.get('score', 0.0), "token_str": p.get('token_str', '')}
                for p in result
            ]
        else:
            response_output = result

        logger.info(f"Inferensi berhasil untuk '{model_id}'. Output singkat: '{str(response_output)[:200]}'")
        return jsonify({"model": model_id, "inputs": full_prompt_string_from_frontend, "outputs": response_output})

    except ValueError as ve:
        logger.error(f"Validasi atau konfigurasi error untuk model '{model_id}': {str(ve)}")
        return jsonify({"error": str(ve), "message": "Kesalahan konfigurasi atau input model."}), 400
    except RuntimeError as re:
        logger.error(f"Error runtime saat memuat model '{model_id}': {str(re)}")
        return jsonify({"error": str(re), "message": "Model gagal dimuat."}), 503
    except Exception as e:
        logger.error(f"Terjadi kesalahan tak terduga saat memprediksi dengan model '{model_id}': {str(e)}", exc_info=True)
        return jsonify({"error": str(e), "message": "Terjadi kesalahan internal server."}), 500

@app.route('/health', methods=['GET'])
def health_check():
    """Endpoint untuk health check."""
    return jsonify({"status": "healthy", "loaded_models_count": len(models), "message": "API berfungsi normal."})

# --- Jalankan Aplikasi ---
if __name__ == '__main__':
    # Untuk Hugging Face Spaces, port biasanya 7860
    # Menggunakan HOST dari environment variable jika tersedia, default ke 0.0.0.0
    # Debug=False untuk produksi
    app.run(host=os.getenv('HOST', '0.0.0.0'), port=int(os.getenv('PORT', 7860)), debug=False)