Spaces:
Sleeping
Sleeping
Commit
·
bca9833
1
Parent(s):
a03b5fc
change for AI thinking
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
10 |
client = InferenceClient("Trinoid/Data_Management")
|
11 |
|
12 |
def clean_response(text):
|
13 |
-
"""Clean up response by removing meta-text and
|
14 |
# Remove thinking phrases
|
15 |
thinking_patterns = [
|
16 |
r"I need to figure out",
|
@@ -22,17 +22,55 @@ def clean_response(text):
|
|
22 |
r"I'm not entirely sure",
|
23 |
r"I believe this is",
|
24 |
r"I imagine it involves",
|
|
|
|
|
|
|
|
|
25 |
]
|
26 |
|
27 |
for pattern in thinking_patterns:
|
28 |
text = re.sub(pattern, "", text, flags=re.IGNORECASE)
|
29 |
|
30 |
-
#
|
31 |
paragraphs = text.split('\n\n')
|
32 |
-
|
|
|
|
|
33 |
for p in paragraphs:
|
34 |
-
if p and
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
return '\n\n'.join(unique_paragraphs)
|
38 |
|
@@ -44,18 +82,21 @@ def respond(
|
|
44 |
temperature,
|
45 |
top_p,
|
46 |
):
|
47 |
-
# Create a more structured system prompt
|
48 |
enhanced_system_message = f"""
|
49 |
{system_message}
|
50 |
|
51 |
-
|
52 |
1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
|
53 |
2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
|
54 |
3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
|
55 |
4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
|
56 |
-
5.
|
57 |
-
6.
|
58 |
-
7.
|
|
|
|
|
|
|
59 |
"""
|
60 |
|
61 |
messages = [{"role": "system", "content": enhanced_system_message}]
|
|
|
10 |
client = InferenceClient("Trinoid/Data_Management")
|
11 |
|
12 |
def clean_response(text):
|
13 |
+
"""Clean up response by removing meta-text, thinking artifacts, and repetitive content"""
|
14 |
# Remove thinking phrases
|
15 |
thinking_patterns = [
|
16 |
r"I need to figure out",
|
|
|
22 |
r"I'm not entirely sure",
|
23 |
r"I believe this is",
|
24 |
r"I imagine it involves",
|
25 |
+
r"Okay, so I need to",
|
26 |
+
r"From what I know,",
|
27 |
+
r"One of the main reasons to",
|
28 |
+
r"Another reason to",
|
29 |
]
|
30 |
|
31 |
for pattern in thinking_patterns:
|
32 |
text = re.sub(pattern, "", text, flags=re.IGNORECASE)
|
33 |
|
34 |
+
# Split into paragraphs for deduplication
|
35 |
paragraphs = text.split('\n\n')
|
36 |
+
|
37 |
+
# Only keep meaningful paragraphs
|
38 |
+
filtered_paragraphs = []
|
39 |
for p in paragraphs:
|
40 |
+
if p and len(p.strip()) > 20: # Only include non-empty paragraphs with substance
|
41 |
+
filtered_paragraphs.append(p)
|
42 |
+
|
43 |
+
# Remove similar paragraphs (not just exact duplicates)
|
44 |
+
unique_paragraphs = []
|
45 |
+
for i, current_para in enumerate(filtered_paragraphs):
|
46 |
+
is_duplicate = False
|
47 |
+
|
48 |
+
# Convert to lowercase and remove punctuation for comparison
|
49 |
+
clean_current = re.sub(r'[^\w\s]', '', current_para.lower())
|
50 |
+
|
51 |
+
for prev_para in unique_paragraphs:
|
52 |
+
# Clean previous paragraph too
|
53 |
+
clean_prev = re.sub(r'[^\w\s]', '', prev_para.lower())
|
54 |
+
|
55 |
+
# Check for similarity using character overlap
|
56 |
+
# If more than 50% of words match, consider it similar
|
57 |
+
words_current = set(clean_current.split())
|
58 |
+
words_prev = set(clean_prev.split())
|
59 |
+
|
60 |
+
if len(words_current) > 0 and len(words_prev) > 0:
|
61 |
+
common_words = words_current.intersection(words_prev)
|
62 |
+
similarity = len(common_words) / min(len(words_current), len(words_prev))
|
63 |
+
|
64 |
+
if similarity > 0.5: # If more than 50% similar, it's a duplicate concept
|
65 |
+
is_duplicate = True
|
66 |
+
break
|
67 |
+
|
68 |
+
if not is_duplicate:
|
69 |
+
unique_paragraphs.append(current_para)
|
70 |
+
|
71 |
+
# Add a summary paragraph if we removed a lot
|
72 |
+
if len(unique_paragraphs) < len(filtered_paragraphs) / 2:
|
73 |
+
unique_paragraphs.append("Note: Some repetitive content has been removed from this response for clarity.")
|
74 |
|
75 |
return '\n\n'.join(unique_paragraphs)
|
76 |
|
|
|
82 |
temperature,
|
83 |
top_p,
|
84 |
):
|
85 |
+
# Create a more structured system prompt with strict instructions about repetition
|
86 |
enhanced_system_message = f"""
|
87 |
{system_message}
|
88 |
|
89 |
+
CRITICAL INSTRUCTIONS FOR YOUR RESPONSES:
|
90 |
1. PROVIDE DIRECT, AUTHORITATIVE, AND COMPLETE ANSWERS ABOUT MICROSOFT 365 AND DATA MANAGEMENT.
|
91 |
2. DO NOT USE PHRASES LIKE "I think", "I believe", "I'm not sure", "I'll try to", "First, I need to".
|
92 |
3. DO NOT INCLUDE YOUR THINKING PROCESS IN RESPONSES.
|
93 |
4. USE CLEAR STRUCTURE WITH HEADINGS AND BULLET POINTS WHERE APPROPRIATE.
|
94 |
+
5. NEVER REPEAT THE SAME INFORMATION IN DIFFERENT WORDS.
|
95 |
+
6. MENTION EACH CONCEPT EXACTLY ONCE - DO NOT ELABORATE ON THE SAME IDEA MULTIPLE TIMES.
|
96 |
+
7. WHEN ANSWERING QUESTIONS ABOUT DOCUMENT MANAGEMENT, PROVIDE SPECIFIC DETAILS ABOUT THE ACTUAL TOOLS AND FEATURES.
|
97 |
+
8. LIMIT YOUR RESPONSE LENGTH TO WHAT IS NECESSARY - BE CONCISE.
|
98 |
+
9. WHEN GIVING EXAMPLES, PROVIDE ONE CLEAR EXAMPLE RATHER THAN MULTIPLE SIMILAR ONES.
|
99 |
+
10. ANSWER AS A MICROSOFT 365 EXPERT WITH AUTHORITATIVE KNOWLEDGE.
|
100 |
"""
|
101 |
|
102 |
messages = [{"role": "system", "content": enhanced_system_message}]
|