Tahsin Hasem commited on
Commit
d8faee6
·
1 Parent(s): 3aaba5d
Files changed (4) hide show
  1. Dockerfile +8 -0
  2. README.md +14 -0
  3. main.py +38 -21
  4. requirements.txt +2 -1
Dockerfile CHANGED
@@ -6,6 +6,14 @@ COPY ./requirements.txt /code/requirements.txt
6
 
7
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
 
 
 
 
 
 
 
 
 
9
  COPY . .
10
 
11
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
6
 
7
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
 
9
+ RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
10
+
11
+
12
+ # sudo apt-get install lzma
13
+ # sudo apt-get install liblzma-dev
14
+ # sudo apt-get install libbz2-dev
15
+
16
+
17
  COPY . .
18
 
19
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -8,3 +8,17 @@ pinned: false
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
+
12
+ ## Instructions for local use:
13
+
14
+ 1. Build with docker
15
+
16
+ ```
17
+ docker build -t app:v1 .
18
+ ```
19
+
20
+ 2. Run
21
+
22
+ ```
23
+ docker run -p 7860:7860 app:v1
24
+ ```
main.py CHANGED
@@ -3,8 +3,34 @@ from pydantic import BaseModel
3
  from huggingface_hub import InferenceClient
4
  import uvicorn
5
  from transformers import pipeline
 
6
 
7
- model_name = "distilgpt2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  app = FastAPI()
@@ -16,29 +42,20 @@ class Item(BaseModel):
16
  prompt: str
17
 
18
 
19
- def generate(item: Item):
20
- generator = pipeline("text-generation", model=model_name)
21
-
22
- # Your input prompt
23
- prompt = item.prompt
 
24
 
25
- # Generate text
26
- generated_texts = generator(
27
- prompt,
28
- max_length=50, # Maximum length of the generated text
29
- num_return_sequences=1, # Number of different sequences to generate
30
- temperature=0.8, # Controls the randomness of the output
31
- top_k=50, # Limits the number of top tokens to consider
32
- top_p=0.95, # Nucleus sampling parameter
33
- do_sample=True # Enable sampling for non-deterministic output
34
  )
35
 
36
- return generated_texts
37
-
38
-
39
- @app.post("/generate/")
40
- async def generate_text(item: Item):
41
- return {"response": generate(item)}
42
 
43
  @app.get("/")
44
  async def home():
 
3
  from huggingface_hub import InferenceClient
4
  import uvicorn
5
  from transformers import pipeline
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
+
9
+ # Load pre-trained tokenizer and model (Works)
10
+ # model_name = "distilgpt2"
11
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ # model = AutoModelForCausalLM.from_pretrained(model_name)
13
+
14
+ # # Example usage: Generate text
15
+ # prompt = "The quick brown fox"
16
+ # input_ids = tokenizer.encode(prompt, return_tensors="pt")
17
+ # output = model.generate(input_ids, max_length=50, num_return_sequences=1)
18
+ # generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
19
+
20
+ # print(generated_text)
21
+
22
+
23
+ import transformers
24
+ import torch
25
+
26
+ model_id = "deepcogito/cogito-v1-preview-llama-3B"
27
+
28
+ pipeline = transformers.pipeline(
29
+ "text-generation",
30
+ model=model_id,
31
+ model_kwargs={"torch_dtype": torch.bfloat16},
32
+ device_map="auto",
33
+ )
34
 
35
 
36
  app = FastAPI()
 
42
  prompt: str
43
 
44
 
45
+ @app.post("/generate/")
46
+ async def generate_text(item: Item):
47
+ messages = [
48
+ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
49
+ {"role": "user", "content": "Give me a short introduction to LLMs."},
50
+ ]
51
 
52
+ outputs = pipeline(
53
+ messages,
54
+ max_new_tokens=512,
 
 
 
 
 
 
55
  )
56
 
57
+ resp = outputs[0]["generated_text"][-1]
58
+ return {"response": resp}
 
 
 
 
59
 
60
  @app.get("/")
61
  async def home():
requirements.txt CHANGED
@@ -2,4 +2,5 @@ fastapi
2
  uvicorn
3
  huggingface_hub
4
  pydantic
5
- transformers
 
 
2
  uvicorn
3
  huggingface_hub
4
  pydantic
5
+ transformers
6
+ accelerate