visit2sachin56 commited on
Commit
3f35d0e
·
verified ·
1 Parent(s): e770201

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (4).py +28 -0
  2. requirements.txt +4 -0
app (4).py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
4
+
5
+ model_id = str(st.text_input("Enter model_id"))
6
+ #model_id = "meta-llama/Meta-Llama-3-8B"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id,
11
+ load_in_4bit=True,
12
+ #attn_implementation="flash_attention_2", # if you have an ampere GPU
13
+ )
14
+
15
+ max_new_tokens=100
16
+ top_k=50
17
+ temperature=0.1
18
+ max_new_tokens = st.text_input("Enter max_new_tokens")
19
+ top_k = st.text_input("Enter max_new_tokens")
20
+ temperature = st.text_input("Enter temperature")
21
+
22
+ query = st.chat_input("Enter your query")
23
+ st.write(query)
24
+
25
+
26
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1)
27
+ llm = HuggingFacePipeline(pipeline=pipe)
28
+ st.write(llm.invoke(query))
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ huggingface_hub==0.22.2
2
+ streamlit
3
+ transformers
4
+ torch