MilanCalegari commited on
Commit
fc534ec
·
1 Parent(s): 4c52e42

feat: make it faster

Browse files
Files changed (2) hide show
  1. app.py +1 -4
  2. modules/llm/card_interpreter.py +9 -11
app.py CHANGED
@@ -27,10 +27,7 @@ st.title("🔮 Tarot Reading")
27
 
28
  # Secret configurations
29
  with st.sidebar:
30
- if 'expander_state' not in st.session_state:
31
- st.session_state['is_expanded'] = False
32
-
33
- with st.expander("⚙️ Settings", expanded=False):
34
  reversed_prob = st.slider(
35
  "Probability of reversed cards",
36
  min_value=0.0,
 
27
 
28
  # Secret configurations
29
  with st.sidebar:
30
+ with st.expander("Settings", expanded=False, icon="⚙️"):
 
 
 
31
  reversed_prob = st.slider(
32
  "Probability of reversed cards",
33
  min_value=0.0,
modules/llm/card_interpreter.py CHANGED
@@ -14,15 +14,16 @@ class CardInterpreter(CardInterpreterInterface):
14
  # Login to Hugging Face
15
  hf_token = os.getenv("HF_TOKEN")
16
  login(token=hf_token)
17
- # Initialize pipeline once and cache it
18
  self.pipeline = pipeline(
19
  "text-generation",
20
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
21
- device_map="auto",
22
  pad_token_id=2,
 
23
  )
24
 
25
- # Cache base prompt content that doesn't change
26
  self._base_content = """
27
  You are a powerful occultist and exceptional tarot reader. Provide a concise reading based on the given cards.
28
 
@@ -40,10 +41,6 @@ class CardInterpreter(CardInterpreterInterface):
40
  - With context: Focus on context-specific interpretation;
41
  - Without context: Give practical daily guidance;
42
 
43
- If the context is General reading:
44
- - Provide a general daily life reading;
45
- - Focus on practical matters;
46
-
47
  If other context is provided:
48
  - Focus on the context provided;
49
  - Provide a reading related to the context;
@@ -52,7 +49,7 @@ class CardInterpreter(CardInterpreterInterface):
52
  """
53
 
54
  def _format_card(self, card: Card) -> str:
55
- # Helper to format card name
56
  return f"{card.name} (Reversed)" if card.reversed else card.name
57
 
58
  def generate_prompt(
@@ -101,8 +98,9 @@ class CardInterpreter(CardInterpreterInterface):
101
  prompt = self.generate_prompt(cards, context or "General reading", method)
102
  result = self.pipeline(
103
  prompt,
104
- max_new_tokens=512, # Limit token generation
105
- num_return_sequences=1, # Only generate one response
106
- do_sample=False # Deterministic output
 
107
  )
108
  return result[0]["generated_text"][-1]["content"]
 
14
  # Login to Hugging Face
15
  hf_token = os.getenv("HF_TOKEN")
16
  login(token=hf_token)
17
+ # Initialize pipeline with smaller model and CPU
18
  self.pipeline = pipeline(
19
  "text-generation",
20
  model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
21
+ device_map="cpu", # Force CPU for HF Spaces compatibility
22
  pad_token_id=2,
23
+ model_kwargs={"low_cpu_mem_usage": True} # Reduce memory usage
24
  )
25
 
26
+ # Base prompt template
27
  self._base_content = """
28
  You are a powerful occultist and exceptional tarot reader. Provide a concise reading based on the given cards.
29
 
 
41
  - With context: Focus on context-specific interpretation;
42
  - Without context: Give practical daily guidance;
43
 
 
 
 
 
44
  If other context is provided:
45
  - Focus on the context provided;
46
  - Provide a reading related to the context;
 
49
  """
50
 
51
  def _format_card(self, card: Card) -> str:
52
+ # Format card name with reversed state
53
  return f"{card.name} (Reversed)" if card.reversed else card.name
54
 
55
  def generate_prompt(
 
98
  prompt = self.generate_prompt(cards, context or "General reading", method)
99
  result = self.pipeline(
100
  prompt,
101
+ max_new_tokens=256, # Reduced token limit for faster inference
102
+ num_return_sequences=1,
103
+ do_sample=False,
104
+ temperature=0.7 # Add some randomness while keeping coherence
105
  )
106
  return result[0]["generated_text"][-1]["content"]