jonathantiedchen commited on
Commit
52931fc
Β·
verified Β·
1 Parent(s): fb5fd52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -19
app.py CHANGED
@@ -4,29 +4,71 @@ import streamlit as st
4
  from huggingface_hub import hf_hub_download
5
  from unsloth import FastLanguageModel,is_bfloat16_supported
6
  import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
  import importlib
9
  import random
10
  from datasets import load_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- path = 'jonathantiedchen/MistralMath-CPT-IFT'
13
-
14
- #Sidebar Text
15
- st.sidebar.write("πŸ“₯ Downloading models from Hugging Face...")
16
- try:
17
- model, tokenizer = FastLanguageModel.from_pretrained(
18
- model_name=path,
19
- max_seq_length=2048,
20
- dtype=torch.bfloat16 if is_bfloat16_supported() else torch.float16,
21
- load_in_4bit=True
22
- )
23
- if tokenizer.pad_token is None:
24
- tokenizer.pad_token = tokenizer.eos_token
25
- FastLanguageModel.for_inference(model)
26
- st.sidebar.write("Model Downloaded Successfully")
27
- except Exception as e:
28
- st.sidebar.error(f"⚠️ Failed to load Mistral model with Unsloth: {e}")
29
 
30
  # Streamlit UI
31
  st.title("🧠 Math LLM Demo")
32
- st.write("πŸ’¬ Ask me anything!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from huggingface_hub import hf_hub_download
5
  from unsloth import FastLanguageModel,is_bfloat16_supported
6
  import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteriaList
8
  import importlib
9
  import random
10
  from datasets import load_dataset
11
+ from utils import SpecificStringStoppingCriteria
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+ # Some Specifications
16
+ generation_util = [
17
+ "Q:",
18
+ "</s>",
19
+ "<|im_end|>"
20
+ ]
21
+ mistral_path = 'jonathantiedchen/MistralMath-CPT-IFT'
22
+
23
+ #LOAD MISTRAL
24
+ @st.cache_resource
25
+ def load_mistral():
26
+ try:
27
+ model, tokenizer = FastLanguageModel.from_pretrained(
28
+ model_name=mistral_path,
29
+ max_seq_length=2048,
30
+ dtype=torch.bfloat16 if is_bfloat16_supported() else torch.float16,
31
+ load_in_4bit=True
32
+ )
33
+ if tokenizer.pad_token is None:
34
+ tokenizer.pad_token = tokenizer.eos_token
35
+ FastLanguageModel.for_inference(model)
36
+ except Exception as e:
37
+ st.sidebar.error(f"⚠️ Failed to load Mistral model with Unsloth: {e}")
38
+
39
+ return model, tokenizer
40
+
41
+ st.sidebar.write("πŸ“₯ Load Models.")
42
+ mistral, mistral_tokenizer = load_mistral()
43
+ st.sidebar.write(f"βœ… Successfully loaded Mistral.")
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  # Streamlit UI
47
  st.title("🧠 Math LLM Demo")
48
+ st.write("πŸ’¬ Please prompt me something!")
49
+
50
+ prompt = st.text_area("Enter your math prompt:", "Jasper has 5 apples and eats 2 of them. How many apples does he have left?")
51
+
52
+ if st.button("Generate Response", key="manual"):
53
+ with st.sidebar.spinner("πŸ”„ Generating..."):
54
+
55
+ #MISTRAL PROMPTING
56
+ inputs = mistral_tokenizer(prompt, return_tensors="pt").to(mistral.device)
57
+ stop_criteria = SpecificStringStoppingCriteria(mistral_tokenizer, generation_util, len(input_text))
58
+ stopping_criteria_list = StoppingCriteriaList([stop_criteria])
59
+ with torch.no_grad():
60
+ outputs = mistral.generate(
61
+ **inputs,
62
+ max_new_tokens=512,
63
+ pad_token_id=mistral_tokenizer.eos_token_id,
64
+ stopping_criteria=stopping_criteria_list
65
+ )
66
+ generated_text = mistral_tokenizer.decode(output[0], skip_special_tokens=True)
67
+ response_only = generated_text[len(prompt):].strip()
68
+
69
+ st.subheader("πŸ”Ž Prompt")
70
+ st.code(prompt)
71
+ st.subheader("🧠 Model Output")
72
+ st.code(generated_text)
73
+ st.subheader("βœ‚οΈ Response Only")
74
+ st.success(response_only)