bstraehle commited on
Commit
c90c1d1
·
verified ·
1 Parent(s): 0958f7d

Update custom_utils.py

Browse files
Files changed (1) hide show
  1. custom_utils.py +52 -48
custom_utils.py CHANGED
@@ -55,53 +55,9 @@ def rag_retrieval_advanced(openai_api_key,
55
 
56
  # 1b) Post-retrieval processing: result filter (accomodates, bedrooms) plus...
57
 
58
- #match_stage = {
59
- # "$match": {
60
- # "accommodates": { "$eq": 2},
61
- # "bedrooms": { "$eq": 1}
62
- # }
63
- #}
64
 
65
- # 2) Average review score and review count boost, sorted in descending order
66
-
67
- review_average_stage = {
68
- "$addFields": {
69
- "averageReviewScore": {
70
- "$divide": [
71
- {
72
- "$add": [
73
- "$review_scores_rating",
74
- "$review_scores_accuracy",
75
- "$review_scores_cleanliness",
76
- "$review_scores_checkin",
77
- "$review_scores_communication",
78
- "$review_scores_location",
79
- "$review_scores_value",
80
- ]
81
- },
82
- 7
83
- ]
84
- },
85
- "reviewCountBoost": "$number_of_reviews"
86
- }
87
- }
88
-
89
- weighting_stage = {
90
- "$addFields": {
91
- "combinedScore": {
92
- "$add": [
93
- {"$multiply": ["$averageReviewScore", 0.9]},
94
- {"$multiply": ["$reviewCountBoost", 0.1]},
95
- ]
96
- }
97
- }
98
- }
99
-
100
- sorting_stage_sort = {
101
- "$sort": {"combinedScore": -1}
102
- }
103
-
104
- additional_stages = [review_average_stage, weighting_stage, sorting_stage_sort]
105
 
106
  retrieval_result = vector_search_advanced(
107
  openai_api_key,
@@ -201,7 +157,7 @@ def vector_search_advanced(openai_api_key,
201
  if query_embedding is None:
202
  return "Invalid query or embedding generation failed."
203
 
204
- vector_search_stage = {
205
  "$vectorSearch": {
206
  "index": vector_index,
207
  "queryVector": query_embedding,
@@ -217,7 +173,7 @@ def vector_search_advanced(openai_api_key,
217
  }
218
  }
219
 
220
- pipeline = [vector_search_stage, get_remove_embedding_stage()] + additional_stages
221
 
222
  return invoke_search(db, collection, pipeline)
223
 
@@ -226,6 +182,54 @@ def get_remove_embedding_stage():
226
  "$unset": "description_embedding"
227
  }
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  def invoke_search(db, collection, pipeline):
230
  results = collection.aggregate(pipeline)
231
 
 
55
 
56
  # 1b) Post-retrieval processing: result filter (accomodates, bedrooms) plus...
57
 
58
+ # 2) Weighted average review, sorted in descending order
 
 
 
 
 
59
 
60
+ additional_stages = [get_average_review_stage(), get_weighting_stage(), get_sorting_stage()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  retrieval_result = vector_search_advanced(
63
  openai_api_key,
 
157
  if query_embedding is None:
158
  return "Invalid query or embedding generation failed."
159
 
160
+ vector_search_filter_stage = {
161
  "$vectorSearch": {
162
  "index": vector_index,
163
  "queryVector": query_embedding,
 
173
  }
174
  }
175
 
176
+ pipeline = [vector_search_filter_stage, get_remove_embedding_stage()] + additional_stages
177
 
178
  return invoke_search(db, collection, pipeline)
179
 
 
182
  "$unset": "description_embedding"
183
  }
184
 
185
+ def get_result_filter_stage():
186
+ return {
187
+ "$match": {
188
+ "accommodates": { "$eq": 2},
189
+ "bedrooms": { "$eq": 1}
190
+ }
191
+ }
192
+
193
+ def get_average_review_stage():
194
+ return {
195
+ "$addFields": {
196
+ "averageReview": {
197
+ "$divide": [
198
+ {
199
+ "$add": [
200
+ "$review_scores_rating",
201
+ "$review_scores_accuracy",
202
+ "$review_scores_cleanliness",
203
+ "$review_scores_checkin",
204
+ "$review_scores_communication",
205
+ "$review_scores_location",
206
+ "$review_scores_value",
207
+ ]
208
+ },
209
+ 7
210
+ ]
211
+ },
212
+ "reviewCount": "$number_of_reviews"
213
+ }
214
+ }
215
+
216
+ def get_weighting_stage():
217
+ return {
218
+ "$addFields": {
219
+ "weightedAverageReview": {
220
+ "$add": [
221
+ {"$multiply": ["$averageReview", 0.9]},
222
+ {"$multiply": ["$reviewCount", 0.1]},
223
+ ]
224
+ }
225
+ }
226
+ }
227
+
228
+ def get_sorting_stage():
229
+ return {
230
+ "$sort": {"weightedAverageReview": -1}
231
+ }
232
+
233
  def invoke_search(db, collection, pipeline):
234
  results = collection.aggregate(pipeline)
235