Spaces:
Sleeping
Sleeping
yolo
Browse files- .DS_Store +0 -0
- app/__pycache__/core.cpython-310.pyc +0 -0
- app/app.py +1 -0
- app/core.py +14 -0
- app/dashboard.ipynb +72 -3
- app/requirements.txt +2 -1
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app/__pycache__/core.cpython-310.pyc
CHANGED
Binary files a/app/__pycache__/core.cpython-310.pyc and b/app/__pycache__/core.cpython-310.pyc differ
|
|
app/app.py
CHANGED
@@ -155,6 +155,7 @@ def recommend_wine_from_form(n_clicks):
|
|
155 |
user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO
|
156 |
user_rating_df.fillna(0, inplace=True)
|
157 |
user_rating_df['user'] = user_rating_df.index
|
|
|
158 |
|
159 |
|
160 |
wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)
|
|
|
155 |
user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO
|
156 |
user_rating_df.fillna(0, inplace=True)
|
157 |
user_rating_df['user'] = user_rating_df.index
|
158 |
+
user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)
|
159 |
|
160 |
|
161 |
wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)
|
app/core.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import pandas as pd
|
|
|
2 |
from sklearn.metrics.pairwise import cosine_similarity
|
3 |
def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
|
4 |
wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
|
@@ -22,3 +23,16 @@ def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5):
|
|
22 |
cluster_avg = cluster_users.mean()
|
23 |
cluster_avg = cluster_avg[user_unrated]
|
24 |
return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
|
5 |
wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
|
|
|
23 |
cluster_avg = cluster_users.mean()
|
24 |
cluster_avg = cluster_avg[user_unrated]
|
25 |
return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()
|
26 |
+
|
27 |
+
def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str):
|
28 |
+
df = df2.copy()
|
29 |
+
user_ratings = df.loc[new_user].drop('cluster')
|
30 |
+
# calculate the similarity between the new user and all other users
|
31 |
+
similarity = df.drop(['cluster', 'user'], axis=1).apply(
|
32 |
+
lambda x: np.nanmean((x - user_ratings) ** 2), axis=1
|
33 |
+
)
|
34 |
+
# sort the users by similarity
|
35 |
+
similarity = similarity.sort_values()
|
36 |
+
# get the most similar user
|
37 |
+
most_similar_user = similarity.keys()[1]
|
38 |
+
return int(df.loc[most_similar_user, 'cluster'])
|
app/dashboard.ipynb
CHANGED
@@ -2,14 +2,14 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
9 |
"name": "stderr",
|
10 |
"output_type": "stream",
|
11 |
"text": [
|
12 |
-
"/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/
|
13 |
"\n",
|
14 |
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
15 |
"\n"
|
@@ -30,7 +30,7 @@
|
|
30 |
" "
|
31 |
],
|
32 |
"text/plain": [
|
33 |
-
"<IPython.lib.display.IFrame at
|
34 |
]
|
35 |
},
|
36 |
"metadata": {},
|
@@ -55,6 +55,74 @@
|
|
55 |
"text": [
|
56 |
"0\n"
|
57 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
}
|
59 |
],
|
60 |
"source": [
|
@@ -215,6 +283,7 @@
|
|
215 |
" user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO\n",
|
216 |
" user_rating_df.fillna(0, inplace=True)\n",
|
217 |
" user_rating_df['user'] = user_rating_df.index\n",
|
|
|
218 |
"\n",
|
219 |
"\n",
|
220 |
" wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)\n",
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
9 |
"name": "stderr",
|
10 |
"output_type": "stream",
|
11 |
"text": [
|
12 |
+
"/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_23769/1782084106.py:82: FutureWarning:\n",
|
13 |
"\n",
|
14 |
"Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
|
15 |
"\n"
|
|
|
30 |
" "
|
31 |
],
|
32 |
"text/plain": [
|
33 |
+
"<IPython.lib.display.IFrame at 0x2a978bdf0>"
|
34 |
]
|
35 |
},
|
36 |
"metadata": {},
|
|
|
55 |
"text": [
|
56 |
"0\n"
|
57 |
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"name": "stderr",
|
61 |
+
"output_type": "stream",
|
62 |
+
"text": [
|
63 |
+
"/Users/ruimelo/Documents/GitHub/eda/app/core.py:20: SettingWithCopyWarning:\n",
|
64 |
+
"\n",
|
65 |
+
"\n",
|
66 |
+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
67 |
+
"\n",
|
68 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
69 |
+
"\n",
|
70 |
+
"/Users/ruimelo/Documents/GitHub/eda/app/core.py:31: SettingWithCopyWarning:\n",
|
71 |
+
"\n",
|
72 |
+
"\n",
|
73 |
+
"A value is trying to be set on a copy of a slice from a DataFrame\n",
|
74 |
+
"\n",
|
75 |
+
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
76 |
+
"\n",
|
77 |
+
"[2024-09-17 19:32:00,521] ERROR in app: Exception on /_dash-update-component [POST]\n",
|
78 |
+
"Traceback (most recent call last):\n",
|
79 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 1473, in wsgi_app\n",
|
80 |
+
" response = self.full_dispatch_request()\n",
|
81 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 882, in full_dispatch_request\n",
|
82 |
+
" rv = self.handle_user_exception(e)\n",
|
83 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 880, in full_dispatch_request\n",
|
84 |
+
" rv = self.dispatch_request()\n",
|
85 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 865, in dispatch_request\n",
|
86 |
+
" return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]\n",
|
87 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/dash.py\", line 1376, in dispatch\n",
|
88 |
+
" ctx.run(\n",
|
89 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 514, in add_context\n",
|
90 |
+
" raise err\n",
|
91 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 503, in add_context\n",
|
92 |
+
" output_value = _invoke_callback(func, *func_args, **func_kwargs)\n",
|
93 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 43, in _invoke_callback\n",
|
94 |
+
" return func(*args, **kwargs) # %% callback invoked %%\n",
|
95 |
+
" File \"/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_23769/1782084106.py\", line 158, in recommend_wine_from_form\n",
|
96 |
+
" user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)\n",
|
97 |
+
" File \"/Users/ruimelo/Documents/GitHub/eda/app/core.py\", line 34, in get_most_similar_user_clust\n",
|
98 |
+
" cluster_avg = cluster_avg[new_user_ratings.index]\n",
|
99 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/series.py\", line 1153, in __getitem__\n",
|
100 |
+
" return self._get_with(key)\n",
|
101 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/series.py\", line 1194, in _get_with\n",
|
102 |
+
" return self.loc[key]\n",
|
103 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1191, in __getitem__\n",
|
104 |
+
" return self._getitem_axis(maybe_callable, axis=axis)\n",
|
105 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1420, in _getitem_axis\n",
|
106 |
+
" return self._getitem_iterable(key, axis=axis)\n",
|
107 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1360, in _getitem_iterable\n",
|
108 |
+
" keyarr, indexer = self._get_listlike_indexer(key, axis)\n",
|
109 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1558, in _get_listlike_indexer\n",
|
110 |
+
" keyarr, indexer = ax._get_indexer_strict(key, axis_name)\n",
|
111 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 6200, in _get_indexer_strict\n",
|
112 |
+
" self._raise_if_missing(keyarr, indexer, axis_name)\n",
|
113 |
+
" File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 6252, in _raise_if_missing\n",
|
114 |
+
" raise KeyError(f\"{not_found} not in index\")\n",
|
115 |
+
"KeyError: \"['user'] not in index\"\n"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"name": "stdout",
|
120 |
+
"output_type": "stream",
|
121 |
+
"text": [
|
122 |
+
"Dedicado 2.0\n",
|
123 |
+
"user temporary_user\n",
|
124 |
+
"Name: temporary_user, dtype: object\n"
|
125 |
+
]
|
126 |
}
|
127 |
],
|
128 |
"source": [
|
|
|
283 |
" user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO\n",
|
284 |
" user_rating_df.fillna(0, inplace=True)\n",
|
285 |
" user_rating_df['user'] = user_rating_df.index\n",
|
286 |
+
" user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)\n",
|
287 |
"\n",
|
288 |
"\n",
|
289 |
" wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)\n",
|
app/requirements.txt
CHANGED
@@ -4,4 +4,5 @@ pandas
|
|
4 |
gunicorn
|
5 |
wandb==0.15.5
|
6 |
scipy
|
7 |
-
scikit-learn
|
|
|
|
4 |
gunicorn
|
5 |
wandb==0.15.5
|
6 |
scipy
|
7 |
+
scikit-learn
|
8 |
+
numpy
|