Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from huggingface_hub import InferenceClient | |
import uvicorn | |
from transformers import pipeline | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load pre-trained tokenizer and model (Works) | |
# model_name = "distilgpt2" | |
# tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# model = AutoModelForCausalLM.from_pretrained(model_name) | |
# # Example usage: Generate text | |
# prompt = "The quick brown fox" | |
# input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
# output = model.generate(input_ids, max_length=50, num_return_sequences=1) | |
# generated_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
# print(generated_text) | |
import transformers | |
import torch | |
model_id = "deepcogito/cogito-v1-preview-llama-3B" | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=model_id, | |
model_kwargs={"torch_dtype": torch.bfloat16}, | |
device_map="auto", | |
) | |
app = FastAPI() | |
class EchoMessage(BaseModel): | |
message: str | |
class Item(BaseModel): | |
prompt: str | |
async def generate_text(item: Item): | |
messages = [ | |
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | |
{"role": "user", "content": "Give me a short introduction to LLMs."}, | |
] | |
outputs = pipeline( | |
messages, | |
max_new_tokens=512, | |
) | |
resp = outputs[0]["generated_text"][-1] | |
return {"response": resp} | |
async def home(): | |
return {"msg":"hey"} | |
async def echo(echo_msg: EchoMessage): | |
return {"msg":echo_msg.message} | |