Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +52 -48
custom_utils.py
CHANGED
@@ -55,53 +55,9 @@ def rag_retrieval_advanced(openai_api_key,
|
|
55 |
|
56 |
# 1b) Post-retrieval processing: result filter (accomodates, bedrooms) plus...
|
57 |
|
58 |
-
#
|
59 |
-
# "$match": {
|
60 |
-
# "accommodates": { "$eq": 2},
|
61 |
-
# "bedrooms": { "$eq": 1}
|
62 |
-
# }
|
63 |
-
#}
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
review_average_stage = {
|
68 |
-
"$addFields": {
|
69 |
-
"averageReviewScore": {
|
70 |
-
"$divide": [
|
71 |
-
{
|
72 |
-
"$add": [
|
73 |
-
"$review_scores_rating",
|
74 |
-
"$review_scores_accuracy",
|
75 |
-
"$review_scores_cleanliness",
|
76 |
-
"$review_scores_checkin",
|
77 |
-
"$review_scores_communication",
|
78 |
-
"$review_scores_location",
|
79 |
-
"$review_scores_value",
|
80 |
-
]
|
81 |
-
},
|
82 |
-
7
|
83 |
-
]
|
84 |
-
},
|
85 |
-
"reviewCountBoost": "$number_of_reviews"
|
86 |
-
}
|
87 |
-
}
|
88 |
-
|
89 |
-
weighting_stage = {
|
90 |
-
"$addFields": {
|
91 |
-
"combinedScore": {
|
92 |
-
"$add": [
|
93 |
-
{"$multiply": ["$averageReviewScore", 0.9]},
|
94 |
-
{"$multiply": ["$reviewCountBoost", 0.1]},
|
95 |
-
]
|
96 |
-
}
|
97 |
-
}
|
98 |
-
}
|
99 |
-
|
100 |
-
sorting_stage_sort = {
|
101 |
-
"$sort": {"combinedScore": -1}
|
102 |
-
}
|
103 |
-
|
104 |
-
additional_stages = [review_average_stage, weighting_stage, sorting_stage_sort]
|
105 |
|
106 |
retrieval_result = vector_search_advanced(
|
107 |
openai_api_key,
|
@@ -201,7 +157,7 @@ def vector_search_advanced(openai_api_key,
|
|
201 |
if query_embedding is None:
|
202 |
return "Invalid query or embedding generation failed."
|
203 |
|
204 |
-
|
205 |
"$vectorSearch": {
|
206 |
"index": vector_index,
|
207 |
"queryVector": query_embedding,
|
@@ -217,7 +173,7 @@ def vector_search_advanced(openai_api_key,
|
|
217 |
}
|
218 |
}
|
219 |
|
220 |
-
pipeline = [
|
221 |
|
222 |
return invoke_search(db, collection, pipeline)
|
223 |
|
@@ -226,6 +182,54 @@ def get_remove_embedding_stage():
|
|
226 |
"$unset": "description_embedding"
|
227 |
}
|
228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
def invoke_search(db, collection, pipeline):
|
230 |
results = collection.aggregate(pipeline)
|
231 |
|
|
|
55 |
|
56 |
# 1b) Post-retrieval processing: result filter (accomodates, bedrooms) plus...
|
57 |
|
58 |
+
# 2) Weighted average review, sorted in descending order
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
additional_stages = [get_average_review_stage(), get_weighting_stage(), get_sorting_stage()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
retrieval_result = vector_search_advanced(
|
63 |
openai_api_key,
|
|
|
157 |
if query_embedding is None:
|
158 |
return "Invalid query or embedding generation failed."
|
159 |
|
160 |
+
vector_search_filter_stage = {
|
161 |
"$vectorSearch": {
|
162 |
"index": vector_index,
|
163 |
"queryVector": query_embedding,
|
|
|
173 |
}
|
174 |
}
|
175 |
|
176 |
+
pipeline = [vector_search_filter_stage, get_remove_embedding_stage()] + additional_stages
|
177 |
|
178 |
return invoke_search(db, collection, pipeline)
|
179 |
|
|
|
182 |
"$unset": "description_embedding"
|
183 |
}
|
184 |
|
185 |
+
def get_result_filter_stage():
|
186 |
+
return {
|
187 |
+
"$match": {
|
188 |
+
"accommodates": { "$eq": 2},
|
189 |
+
"bedrooms": { "$eq": 1}
|
190 |
+
}
|
191 |
+
}
|
192 |
+
|
193 |
+
def get_average_review_stage():
|
194 |
+
return {
|
195 |
+
"$addFields": {
|
196 |
+
"averageReview": {
|
197 |
+
"$divide": [
|
198 |
+
{
|
199 |
+
"$add": [
|
200 |
+
"$review_scores_rating",
|
201 |
+
"$review_scores_accuracy",
|
202 |
+
"$review_scores_cleanliness",
|
203 |
+
"$review_scores_checkin",
|
204 |
+
"$review_scores_communication",
|
205 |
+
"$review_scores_location",
|
206 |
+
"$review_scores_value",
|
207 |
+
]
|
208 |
+
},
|
209 |
+
7
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"reviewCount": "$number_of_reviews"
|
213 |
+
}
|
214 |
+
}
|
215 |
+
|
216 |
+
def get_weighting_stage():
|
217 |
+
return {
|
218 |
+
"$addFields": {
|
219 |
+
"weightedAverageReview": {
|
220 |
+
"$add": [
|
221 |
+
{"$multiply": ["$averageReview", 0.9]},
|
222 |
+
{"$multiply": ["$reviewCount", 0.1]},
|
223 |
+
]
|
224 |
+
}
|
225 |
+
}
|
226 |
+
}
|
227 |
+
|
228 |
+
def get_sorting_stage():
|
229 |
+
return {
|
230 |
+
"$sort": {"weightedAverageReview": -1}
|
231 |
+
}
|
232 |
+
|
233 |
def invoke_search(db, collection, pipeline):
|
234 |
results = collection.aggregate(pipeline)
|
235 |
|