Spaces:

Lyon28
/

test

Running

App Files Files Community

test / app.py

Lyon28

Update app.py

2ba9ac9 verified about 21 hours ago

raw

history blame contribute delete

11.3 kB

	# app.py
	from flask import Flask, jsonify, request
	from flask_cors import CORS
	from transformers import pipeline
	import logging
	import torch
	import os # Untuk mendapatkan environment variables, misalnya di Hugging Face Spaces

	app = Flask(__name__)
	CORS(app) # Mengaktifkan CORS untuk mengizinkan permintaan dari frontend Anda

	# --- Setup Logging ---
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# --- Konfigurasi Model dan Informasi ---
	# Tambahkan 'hf_model_name' jika nama model di Hugging Face berbeda dari ID yang Anda inginkan.
	# Jika nama model di Hugging Face sama, tidak perlu 'hf_model_name'.
	model_info = {
	"Albert-Base-V2": {
	"task": "fill-mask",
	"description": "Lyon28 Albert-Base-V2"
	},
	"GPT-2": {
	"task": "text-generation",
	"description": "Lyon28 GPT-2"
	},
	"Tinny-Llama": {
	"task": "text-generation",
	"description": "Lyon 28 Tinny Llama"
	},
	"Electra-Small": {
	"task": "fill-mask",
	"description": "Lyon28 Small ELECTRA"
	},
	"GPT-2-Tinny": {
	"task": "text-generation",
	"description": "Lyon28 Tiny GPT-2"
	},
	"Bert-Tinny": {
	"task": "fill-mask",
	"description": "Lyon28 Tiny BERT"
	},
	"Distilbert-Base-Uncased": {
	"task": "fill-mask",
	"description": "Lyon28 Distilled BERT"
	},
	"Pythia": {
	"task": "text-generation",
	"description": "Lyon28 Pythia"
	},
	"T5-Small": {
	"task": "text2text-generation",
	"description": "Lyon28 Small T5"
	},
	"GPT-Neo": {
	"task": "text-generation",
	"description": "Lyon28 GPT-Neo"
	},
	"Distil-GPT-2": {
	"task": "text-generation",
	"description": "Lyon28 Distilled GPT-2"
	},
	"training-models": {
	"task": "text-generation",
	"description": "Lyon28 training-models"
	},
	# --- MODEL EXTERNAL ---
	"TinyLLama-NSFW-Chatbot": {
	"task": "text-generation",
	"description": "BilalRahib's TinyLLama NSFW Chatbot",
	"hf_model_name": "bilalRahib/TinyLLama-NSFW-Chatbot"
	},
	"whisper-large-v3": {
	"task": "automatic-speech-recognition",
	"description": "openai whisper-large-v3",
	"hf_model_name": "openai/whisper-large-v3"
	},
	"Nusantara-4b-Indo-Chat": {
	"task": "text-generation",
	"description": "kalisai Nusantara-4b-Indo-Chat",
	"hf_model_name": "kalisai/Nusantara-4b-Indo-Chat"
	},
	"lb-reranker-0.5B-v1.0": {
	"task": "text-generation",
	"description": "lightblue lb-reranker-0.5B-v1.0",
	"hf_model_name": "lightblue/lb-reranker-0.5B-v1.0"
	},
	"harry-potter-gpt2": {
	"task": "text-generation",
	"description": "akahana harry-potter-gpt2",
	"hf_model_name": "akahana/harry-potter-gpt2"
	},
	"Sailor2-1B-Chat": {
	"task": "text-generation",
	"description": "sail Sailor2-1B-Chat",
	"hf_model_name": "sail/Sailor2-1B-Chat"
	},
	"gpt2-indo-textgen": {
	"task": "text-generation",
	"description": "anugrahap gpt2-indo-textgen",
	"hf_model_name": "anugrahap/gpt2-indo-textgen"
	},
	"cendol-mt5-small-inst": {
	"task": "text-generation",
	"description": "indonlp cendol-mt5-small-inst",
	"hf_model_name": "indonlp/cendol-mt5-small-inst"
	},
	"Sailor2-1B-Pre": {
	"task": "text-generation",
	"description": "sail Sailor2-1B-Pre",
	"hf_model_name": "sail/Sailor2-1B-Pre"
	},
	"gemini-small": {
	"task": "text-generation",
	"description": "describeai gemini-small",
	"hf_model_name": "describeai/gemini-small"
	}
	}

	# --- Lazy Loading ---
	models = {}

	# --- Utility Lazy Loading ---
	def get_model_pipeline(model_name):
	"""
	Memuat model hanya jika belum dimuat (lazy loading).
	Mengembalikan pipeline model yang diminta.
	"""
	if model_name not in models:
	logger.info(f"Model '{model_name}' belum dimuat. Memuat sekarang...")
	if model_name not in model_info:
	logger.error(f"Informasi model '{model_name}' tidak ditemukan di model_info.")
	raise ValueError(f"Model '{model_name}' tidak dikenal.")

	info = model_info[model_name]
	try:

	hf_model_path = info.get("hf_model_name", f"Lyon28/{model_name}")

	models[model_name] = pipeline(
	info["task"],
	model=hf_model_path,
	device="cpu",
	torch_dtype=torch.float32
	)
	logger.info(f"✅ Model '{model_name}' (Path: {hf_model_path}) berhasil dimuat.")
	except Exception as e:
	logger.error(f"❌ Gagal memuat model '{model_name}' (Path: {hf_model_path}): {str(e)}", exc_info=True)
	raise RuntimeError(f"Gagal memuat model: {model_name}. Detail: {str(e)}") from e
	return models[model_name]

	# --- Rute API ---

	@app.route('/')
	def home():
	"""Endpoint root untuk status API."""
	return jsonify({
	"message": "Flask API untuk Model Hugging Face",
	"status": "online",
	"loaded_models_count": len(models),
	"available_model_configs": list(model_info.keys()),
	"info": "Gunakan /api/models untuk daftar model yang tersedia."
	})

	@app.route('/api/models', methods=['GET'])
	def list_available_models():
	"""Mengembalikan daftar semua model yang dikonfigurasi, termasuk status muatan."""
	available_models_data = [
	{
	"id": name,
	"name": info["description"],
	"task": info["task"],
	"status": "loaded" if name in models else "not_loaded", # Menunjukkan apakah sudah dimuat via lazy loading
	"endpoint": f"/api/{name}"
	}
	for name, info in model_info.items()
	]
	return jsonify({
	"total_configured_models": len(model_info),
	"currently_loaded_models": len(models),
	"models": available_models_data
	})

	@app.route('/api/<model_id>', methods=['POST'])
	def predict_with_model(model_id):
	"""
	Endpoint utama untuk prediksi model.
	Menerima 'inputs' (teks pra-diformat) dan 'parameters' (dictionary) opsional.
	"""
	logger.info(f"Menerima permintaan untuk model: {model_id}")
	if model_id not in model_info:
	logger.warning(f"Permintaan untuk model tidak dikenal: {model_id}")
	return jsonify({"error": f"Model '{model_id}' tidak dikenal. Lihat /api/models untuk daftar yang tersedia."}), 404

	try:
	model_pipeline = get_model_pipeline(model_id) # Memuat model jika belum ada
	model_task = model_info[model_id]["task"]

	data = request.json
	# Input sekarang diharapkan sebagai fullPromptString dari frontend
	full_prompt_string_from_frontend = data.get('inputs', '')
	parameters = data.get('parameters', {})

	if not full_prompt_string_from_frontend:
	return jsonify({"error": "Input 'inputs' (full prompt string) tidak boleh kosong."}), 400

	logger.info(f"Inferensi: Model='{model_id}', Task='{model_task}', Full Prompt='{full_prompt_string_from_frontend[:200]}...', Params='{parameters}'")

	result = []
	# --- Penanganan Parameter dan Inferensi berdasarkan Tipe Tugas ---
	if model_task == "text-generation":
	gen_params = {
	"max_new_tokens": parameters.get("max_new_tokens", 150),
	"temperature": parameters.get("temperature", 0.7),
	"do_sample": parameters.get("do_sample", True),
	"return_full_text": parameters.get("return_full_text", False), # Sangat penting untuk chatbot
	"num_return_sequences": parameters.get("num_return_sequences", 1),
	"top_k": parameters.get("top_k", 50),
	"top_p": parameters.get("top_p", 0.95),
	"repetition_penalty": parameters.get("repetition_penalty", 1.2),
	}
	# Langsung berikan full_prompt_string_from_frontend ke pipeline
	result = model_pipeline(full_prompt_string_from_frontend, **gen_params)

	elif model_task == "fill-mask":
	mask_params = {
	"top_k": parameters.get("top_k", 5)
	}
	# Untuk fill-mask, input harus string biasa, bukan prompt yang kompleks
	# Anda perlu memastikan frontend tidak mengirim prompt kompleks ke fill-mask model
	result = model_pipeline(full_prompt_string_from_frontend, **mask_params)

	elif model_task == "text2text-generation":
	t2t_params = {
	"max_new_tokens": parameters.get("max_new_tokens", 150),
	"temperature": parameters.get("temperature", 0.7),
	"do_sample": parameters.get("do_sample", True),
	}
	result = model_pipeline(full_prompt_string_from_frontend, **t2t_params)

	else:
	result = model_pipeline(full_prompt_string_from_frontend, **parameters)

	# --- Konsistensi Format Output (tidak berubah dari update sebelumnya) ---
	response_output = {}
	if model_task == "text-generation" or model_task == "text2text-generation":
	if result and len(result) > 0 and 'generated_text' in result[0]:
	response_output['text'] = result[0]['generated_text'].strip()
	else:
	response_output['text'] = "[Tidak ada teks yang dihasilkan atau format tidak sesuai.]"
	elif model_task == "fill-mask":
	response_output['predictions'] = [
	{"sequence": p.get('sequence', ''), "score": p.get('score', 0.0), "token_str": p.get('token_str', '')}
	for p in result
	]
	else:
	response_output = result

	logger.info(f"Inferensi berhasil untuk '{model_id}'. Output singkat: '{str(response_output)[:200]}'")
	return jsonify({"model": model_id, "inputs": full_prompt_string_from_frontend, "outputs": response_output})

	except ValueError as ve:
	logger.error(f"Validasi atau konfigurasi error untuk model '{model_id}': {str(ve)}")
	return jsonify({"error": str(ve), "message": "Kesalahan konfigurasi atau input model."}), 400
	except RuntimeError as re:
	logger.error(f"Error runtime saat memuat model '{model_id}': {str(re)}")
	return jsonify({"error": str(re), "message": "Model gagal dimuat."}), 503
	except Exception as e:
	logger.error(f"Terjadi kesalahan tak terduga saat memprediksi dengan model '{model_id}': {str(e)}", exc_info=True)
	return jsonify({"error": str(e), "message": "Terjadi kesalahan internal server."}), 500

	@app.route('/health', methods=['GET'])
	def health_check():
	"""Endpoint untuk health check."""
	return jsonify({"status": "healthy", "loaded_models_count": len(models), "message": "API berfungsi normal."})

	# --- Jalankan Aplikasi ---
	if __name__ == '__main__':
	# Untuk Hugging Face Spaces, port biasanya 7860
	# Menggunakan HOST dari environment variable jika tersedia, default ke 0.0.0.0
	# Debug=False untuk produksi
	app.run(host=os.getenv('HOST', '0.0.0.0'), port=int(os.getenv('PORT', 7860)), debug=False)