Spaces:
Sleeping
Sleeping
Tahsin Hasem
commited on
Commit
·
d8faee6
1
Parent(s):
3aaba5d
Use coito
Browse files- Dockerfile +8 -0
- README.md +14 -0
- main.py +38 -21
- requirements.txt +2 -1
Dockerfile
CHANGED
@@ -6,6 +6,14 @@ COPY ./requirements.txt /code/requirements.txt
|
|
6 |
|
7 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
COPY . .
|
10 |
|
11 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
6 |
|
7 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
|
9 |
+
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
10 |
+
|
11 |
+
|
12 |
+
# sudo apt-get install lzma
|
13 |
+
# sudo apt-get install liblzma-dev
|
14 |
+
# sudo apt-get install libbz2-dev
|
15 |
+
|
16 |
+
|
17 |
COPY . .
|
18 |
|
19 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -8,3 +8,17 @@ pinned: false
|
|
8 |
---
|
9 |
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
---
|
9 |
|
10 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
11 |
+
|
12 |
+
## Instructions for local use:
|
13 |
+
|
14 |
+
1. Build with docker
|
15 |
+
|
16 |
+
```
|
17 |
+
docker build -t app:v1 .
|
18 |
+
```
|
19 |
+
|
20 |
+
2. Run
|
21 |
+
|
22 |
+
```
|
23 |
+
docker run -p 7860:7860 app:v1
|
24 |
+
```
|
main.py
CHANGED
@@ -3,8 +3,34 @@ from pydantic import BaseModel
|
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import uvicorn
|
5 |
from transformers import pipeline
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
app = FastAPI()
|
@@ -16,29 +42,20 @@ class Item(BaseModel):
|
|
16 |
prompt: str
|
17 |
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
max_length=50, # Maximum length of the generated text
|
29 |
-
num_return_sequences=1, # Number of different sequences to generate
|
30 |
-
temperature=0.8, # Controls the randomness of the output
|
31 |
-
top_k=50, # Limits the number of top tokens to consider
|
32 |
-
top_p=0.95, # Nucleus sampling parameter
|
33 |
-
do_sample=True # Enable sampling for non-deterministic output
|
34 |
)
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
@app.post("/generate/")
|
40 |
-
async def generate_text(item: Item):
|
41 |
-
return {"response": generate(item)}
|
42 |
|
43 |
@app.get("/")
|
44 |
async def home():
|
|
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import uvicorn
|
5 |
from transformers import pipeline
|
6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
7 |
|
8 |
+
|
9 |
+
# Load pre-trained tokenizer and model (Works)
|
10 |
+
# model_name = "distilgpt2"
|
11 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
+
# model = AutoModelForCausalLM.from_pretrained(model_name)
|
13 |
+
|
14 |
+
# # Example usage: Generate text
|
15 |
+
# prompt = "The quick brown fox"
|
16 |
+
# input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
17 |
+
# output = model.generate(input_ids, max_length=50, num_return_sequences=1)
|
18 |
+
# generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
19 |
+
|
20 |
+
# print(generated_text)
|
21 |
+
|
22 |
+
|
23 |
+
import transformers
|
24 |
+
import torch
|
25 |
+
|
26 |
+
model_id = "deepcogito/cogito-v1-preview-llama-3B"
|
27 |
+
|
28 |
+
pipeline = transformers.pipeline(
|
29 |
+
"text-generation",
|
30 |
+
model=model_id,
|
31 |
+
model_kwargs={"torch_dtype": torch.bfloat16},
|
32 |
+
device_map="auto",
|
33 |
+
)
|
34 |
|
35 |
|
36 |
app = FastAPI()
|
|
|
42 |
prompt: str
|
43 |
|
44 |
|
45 |
+
@app.post("/generate/")
|
46 |
+
async def generate_text(item: Item):
|
47 |
+
messages = [
|
48 |
+
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
49 |
+
{"role": "user", "content": "Give me a short introduction to LLMs."},
|
50 |
+
]
|
51 |
|
52 |
+
outputs = pipeline(
|
53 |
+
messages,
|
54 |
+
max_new_tokens=512,
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
)
|
56 |
|
57 |
+
resp = outputs[0]["generated_text"][-1]
|
58 |
+
return {"response": resp}
|
|
|
|
|
|
|
|
|
59 |
|
60 |
@app.get("/")
|
61 |
async def home():
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ fastapi
|
|
2 |
uvicorn
|
3 |
huggingface_hub
|
4 |
pydantic
|
5 |
-
transformers
|
|
|
|
2 |
uvicorn
|
3 |
huggingface_hub
|
4 |
pydantic
|
5 |
+
transformers
|
6 |
+
accelerate
|