Spaces:

LAWSA07
/

medical_model

Runtime error

App Files Files Community

LAWSA07 commited on Feb 28

Commit

f0a5521

verified ·

1 Parent(s): 3e7f095

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -0

app.py CHANGED Viewed

	@@ -0,0 +1,72 @@

+from fastapi import FastAPI, HTTPException
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch
+app = FastAPI()
+# Load model once at startup
+@app.on_event("startup")
+async def load_model():
+    try:
+        # Configuration
+        model_name = "unsloth/deepseek-r1-distill-llama-8b-unsloth-bnb-4bit"
+        adapter_name = "LAWSA07/medical_fine_tuned_deepseekR1"
+        # Load base model with 4-bit quantization
+        app.state.base_model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            load_in_4bit=True,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True,
+        )
+        # Attach PEFT adapter
+        app.state.model = PeftModel.from_pretrained(
+            app.state.base_model,
+            adapter_name,
+            adapter_weight_name="adapter_model.safetensors"
+        )
+        # Load tokenizer
+        app.state.tokenizer = AutoTokenizer.from_pretrained(model_name)
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Model loading failed: {str(e)}"
+        )
+@app.get("/")
+def health_check():
+    return {"status": "OK"}
+@app.post("/generate")
+async def generate_text(prompt: str, max_length: int = 200):
+    try:
+        inputs = app.state.tokenizer(
+            prompt,
+            return_tensors="pt",
+            padding=True
+        ).to("cuda")
+        outputs = app.state.model.generate(
+            **inputs,
+            max_length=max_length,
+            temperature=0.7,
+            do_sample=True
+        )
+        decoded = app.state.tokenizer.decode(
+            outputs[0],
+            skip_special_tokens=True
+        )
+        return {"response": decoded}
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Generation failed: {str(e)}"
+        )