Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +11 -16
custom_utils.py
CHANGED
@@ -55,9 +55,9 @@ def rag_retrieval_advanced(openai_api_key,
|
|
55 |
# 2) Weighted average review, sorted in descending order
|
56 |
|
57 |
additional_stages = [
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
]
|
62 |
|
63 |
retrieval_result = vector_search_advanced(
|
@@ -139,7 +139,7 @@ def vector_search_naive(openai_api_key,
|
|
139 |
}
|
140 |
}
|
141 |
|
142 |
-
pipeline = [vector_search_stage,
|
143 |
|
144 |
return invoke_search(db, collection, pipeline)
|
145 |
|
@@ -172,24 +172,20 @@ def vector_search_advanced(openai_api_key,
|
|
172 |
}
|
173 |
}
|
174 |
|
175 |
-
pipeline = [vector_search_and_filter_stage,
|
176 |
|
177 |
return invoke_search(db, collection, pipeline)
|
178 |
|
179 |
-
def
|
180 |
return {
|
181 |
"$unset": "description_embedding"
|
182 |
}
|
183 |
|
184 |
-
def
|
185 |
return {
|
186 |
"$project": {
|
187 |
-
"_id": 1,
|
188 |
"id": 1,
|
189 |
"listing_url": 1,
|
190 |
-
"scrape_id": 1,
|
191 |
-
"last_scraped": 1,
|
192 |
-
"source": 1,
|
193 |
"name": 1,
|
194 |
"description": 1,
|
195 |
"neighborhood_overview": 1,
|
@@ -240,7 +236,6 @@ def get_project_fields_stage():
|
|
240 |
"availability_60": 1,
|
241 |
"availability_90": 1,
|
242 |
"availability_365": 1,
|
243 |
-
"calendar_last_scraped": 1,
|
244 |
"number_of_reviews": 1,
|
245 |
"number_of_reviews_ltm": 1,
|
246 |
"number_of_reviews_l30d": 1,
|
@@ -263,7 +258,7 @@ def get_project_fields_stage():
|
|
263 |
}
|
264 |
}
|
265 |
|
266 |
-
def
|
267 |
return {
|
268 |
"$match": {
|
269 |
"accommodates": { "$eq": 2},
|
@@ -271,7 +266,7 @@ def get_filter_result_stage():
|
|
271 |
}
|
272 |
}
|
273 |
|
274 |
-
def
|
275 |
return {
|
276 |
"$addFields": {
|
277 |
"averageReview": {
|
@@ -294,7 +289,7 @@ def get_average_review_and_review_count_stage():
|
|
294 |
}
|
295 |
}
|
296 |
|
297 |
-
def
|
298 |
return {
|
299 |
"$addFields": {
|
300 |
"weightedAverageReview": {
|
@@ -306,7 +301,7 @@ def get_weighting_stage():
|
|
306 |
}
|
307 |
}
|
308 |
|
309 |
-
def
|
310 |
return {
|
311 |
"$sort": {"weightedAverageReview": -1}
|
312 |
}
|
|
|
55 |
# 2) Weighted average review, sorted in descending order
|
56 |
|
57 |
additional_stages = [
|
58 |
+
get_stage_average_review_and_review_count(),
|
59 |
+
get_stage_weighting(),
|
60 |
+
get_stage_sorting()
|
61 |
]
|
62 |
|
63 |
retrieval_result = vector_search_advanced(
|
|
|
139 |
}
|
140 |
}
|
141 |
|
142 |
+
pipeline = [vector_search_stage, get_stage_include_fields()]
|
143 |
|
144 |
return invoke_search(db, collection, pipeline)
|
145 |
|
|
|
172 |
}
|
173 |
}
|
174 |
|
175 |
+
pipeline = [vector_search_and_filter_stage, get_stage_include_fields()] + additional_stages
|
176 |
|
177 |
return invoke_search(db, collection, pipeline)
|
178 |
|
179 |
+
def get_stage_exclude_fields():
|
180 |
return {
|
181 |
"$unset": "description_embedding"
|
182 |
}
|
183 |
|
184 |
+
def get_stage_include_fields():
|
185 |
return {
|
186 |
"$project": {
|
|
|
187 |
"id": 1,
|
188 |
"listing_url": 1,
|
|
|
|
|
|
|
189 |
"name": 1,
|
190 |
"description": 1,
|
191 |
"neighborhood_overview": 1,
|
|
|
236 |
"availability_60": 1,
|
237 |
"availability_90": 1,
|
238 |
"availability_365": 1,
|
|
|
239 |
"number_of_reviews": 1,
|
240 |
"number_of_reviews_ltm": 1,
|
241 |
"number_of_reviews_l30d": 1,
|
|
|
258 |
}
|
259 |
}
|
260 |
|
261 |
+
def get_stage_filter_result():
|
262 |
return {
|
263 |
"$match": {
|
264 |
"accommodates": { "$eq": 2},
|
|
|
266 |
}
|
267 |
}
|
268 |
|
269 |
+
def get_stage_average_review_and_review_count():
|
270 |
return {
|
271 |
"$addFields": {
|
272 |
"averageReview": {
|
|
|
289 |
}
|
290 |
}
|
291 |
|
292 |
+
def get_stage_weighting():
|
293 |
return {
|
294 |
"$addFields": {
|
295 |
"weightedAverageReview": {
|
|
|
301 |
}
|
302 |
}
|
303 |
|
304 |
+
def get_stage_sorting():
|
305 |
return {
|
306 |
"$sort": {"weightedAverageReview": -1}
|
307 |
}
|