Frankie-walsh4 commited on
Commit
bca9833
·
1 Parent(s): a03b5fc

change for AI thinking

Browse files
Files changed (1) hide show
  1. app.py +51 -10
app.py CHANGED
@@ -10,7 +10,7 @@ For more information on `huggingface_hub` Inference API support, please check th
10
  client = InferenceClient("Trinoid/Data_Management")
11
 
12
  def clean_response(text):
13
- """Clean up response by removing meta-text and thinking artifacts"""
14
  # Remove thinking phrases
15
  thinking_patterns = [
16
  r"I need to figure out",
@@ -22,17 +22,55 @@ def clean_response(text):
22
  r"I'm not entirely sure",
23
  r"I believe this is",
24
  r"I imagine it involves",
 
 
 
 
25
  ]
26
 
27
  for pattern in thinking_patterns:
28
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
29
 
30
- # Remove repeating paragraphs
31
  paragraphs = text.split('\n\n')
32
- unique_paragraphs = []
 
 
33
  for p in paragraphs:
34
- if p and p not in unique_paragraphs and len(p.strip()) > 20:
35
- unique_paragraphs.append(p)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  return '\n\n'.join(unique_paragraphs)
38
 
@@ -44,18 +82,21 @@ def respond(
44
  temperature,
45
  top_p,
46
  ):
47
- # Create a more structured system prompt
48
  enhanced_system_message = f"""
49
  {system_message}
50
 
51
- IMPORTANT INSTRUCTIONS FOR YOUR RESPONSES:
52
  1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
53
  2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
54
  3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
55
  4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
56
- 5. BE CONCISE AND FOCUSED - AVOID UNNECESSARY REPETITION.
57
- 6. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
58
- 7. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
 
 
 
59
  """
60
 
61
  messages = [{"role": "system", "content": enhanced_system_message}]
 
10
  client = InferenceClient("Trinoid/Data_Management")
11
 
12
  def clean_response(text):
13
+ """Clean up response by removing meta-text, thinking artifacts, and repetitive content"""
14
  # Remove thinking phrases
15
  thinking_patterns = [
16
  r"I need to figure out",
 
22
  r"I'm not entirely sure",
23
  r"I believe this is",
24
  r"I imagine it involves",
25
+ r"Okay, so I need to",
26
+ r"From what I know,",
27
+ r"One of the main reasons to",
28
+ r"Another reason to",
29
  ]
30
 
31
  for pattern in thinking_patterns:
32
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
33
 
34
+ # Split into paragraphs for deduplication
35
  paragraphs = text.split('\n\n')
36
+
37
+ # Only keep meaningful paragraphs
38
+ filtered_paragraphs = []
39
  for p in paragraphs:
40
+ if p and len(p.strip()) > 20: # Only include non-empty paragraphs with substance
41
+ filtered_paragraphs.append(p)
42
+
43
+ # Remove similar paragraphs (not just exact duplicates)
44
+ unique_paragraphs = []
45
+ for i, current_para in enumerate(filtered_paragraphs):
46
+ is_duplicate = False
47
+
48
+ # Convert to lowercase and remove punctuation for comparison
49
+ clean_current = re.sub(r'[^\w\s]', '', current_para.lower())
50
+
51
+ for prev_para in unique_paragraphs:
52
+ # Clean previous paragraph too
53
+ clean_prev = re.sub(r'[^\w\s]', '', prev_para.lower())
54
+
55
+ # Check for similarity using character overlap
56
+ # If more than 50% of words match, consider it similar
57
+ words_current = set(clean_current.split())
58
+ words_prev = set(clean_prev.split())
59
+
60
+ if len(words_current) > 0 and len(words_prev) > 0:
61
+ common_words = words_current.intersection(words_prev)
62
+ similarity = len(common_words) / min(len(words_current), len(words_prev))
63
+
64
+ if similarity > 0.5: # If more than 50% similar, it's a duplicate concept
65
+ is_duplicate = True
66
+ break
67
+
68
+ if not is_duplicate:
69
+ unique_paragraphs.append(current_para)
70
+
71
+ # Add a summary paragraph if we removed a lot
72
+ if len(unique_paragraphs) < len(filtered_paragraphs) / 2:
73
+ unique_paragraphs.append("Note: Some repetitive content has been removed from this response for clarity.")
74
 
75
  return '\n\n'.join(unique_paragraphs)
76
 
 
82
  temperature,
83
  top_p,
84
  ):
85
+ # Create a more structured system prompt with strict instructions about repetition
86
  enhanced_system_message = f"""
87
  {system_message}
88
 
89
+ CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
90
  1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
91
  2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
92
  3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
93
  4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
94
+ 5. NEVER REPEAT THE SAME INFORMATION IN DIFFERENT WORDS.
95
+ 6. MENTION EACH CONCEPT EXACTLY ONCE - DO NOT ELABORATE ON THE SAME IDEA MULTIPLE TIMES.
96
+ 7. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
97
+ 8. LIMIT YOUR RESPONSE LENGTH TO WHAT IS NECESSARY - BE CONCISE.
98
+ 9. WHEN GIVING EXAMPLES, PROVIDE ONE CLEAR EXAMPLE RATHER THAN MULTIPLE SIMILAR ONES.
99
+ 10. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
100
  """
101
 
102
  messages = [{"role": "system", "content": enhanced_system_message}]