rest / main.py
Tahsin Hasem
Use coito
d8faee6
raw
history blame
1.63 kB
from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient
import uvicorn
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load pre-trained tokenizer and model (Works)
# model_name = "distilgpt2"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)
# # Example usage: Generate text
# prompt = "The quick brown fox"
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
# output = model.generate(input_ids, max_length=50, num_return_sequences=1)
# generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# print(generated_text)
import transformers
import torch
model_id = "deepcogito/cogito-v1-preview-llama-3B"
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device_map="auto",
)
app = FastAPI()
class EchoMessage(BaseModel):
message: str
class Item(BaseModel):
prompt: str
@app.post("/generate/")
async def generate_text(item: Item):
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": "Give me a short introduction to LLMs."},
]
outputs = pipeline(
messages,
max_new_tokens=512,
)
resp = outputs[0]["generated_text"][-1]
return {"response": resp}
@app.get("/")
async def home():
return {"msg":"hey"}
@app.post("/echo/")
async def echo(echo_msg: EchoMessage):
return {"msg":echo_msg.message}