rufimelo commited on
Commit
bd7b6e0
·
1 Parent(s): 26485d2
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app/__pycache__/core.cpython-310.pyc CHANGED
Binary files a/app/__pycache__/core.cpython-310.pyc and b/app/__pycache__/core.cpython-310.pyc differ
 
app/app.py CHANGED
@@ -155,6 +155,7 @@ def recommend_wine_from_form(n_clicks):
155
  user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO
156
  user_rating_df.fillna(0, inplace=True)
157
  user_rating_df['user'] = user_rating_df.index
 
158
 
159
 
160
  wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)
 
155
  user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO
156
  user_rating_df.fillna(0, inplace=True)
157
  user_rating_df['user'] = user_rating_df.index
158
+ user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)
159
 
160
 
161
  wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)
app/core.py CHANGED
@@ -1,4 +1,5 @@
1
  import pandas as pd
 
2
  from sklearn.metrics.pairwise import cosine_similarity
3
  def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
4
  wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
@@ -22,3 +23,16 @@ def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5):
22
  cluster_avg = cluster_users.mean()
23
  cluster_avg = cluster_avg[user_unrated]
24
  return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
+ import numpy as np
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
5
  wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
 
23
  cluster_avg = cluster_users.mean()
24
  cluster_avg = cluster_avg[user_unrated]
25
  return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()
26
+
27
+ def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str):
28
+ df = df2.copy()
29
+ user_ratings = df.loc[new_user].drop('cluster')
30
+ # calculate the similarity between the new user and all other users
31
+ similarity = df.drop(['cluster', 'user'], axis=1).apply(
32
+ lambda x: np.nanmean((x - user_ratings) ** 2), axis=1
33
+ )
34
+ # sort the users by similarity
35
+ similarity = similarity.sort_values()
36
+ # get the most similar user
37
+ most_similar_user = similarity.keys()[1]
38
+ return int(df.loc[most_similar_user, 'cluster'])
app/dashboard.ipynb CHANGED
@@ -2,14 +2,14 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 9,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stderr",
10
  "output_type": "stream",
11
  "text": [
12
- "/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_4768/3788270293.py:82: FutureWarning:\n",
13
  "\n",
14
  "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
15
  "\n"
@@ -30,7 +30,7 @@
30
  " "
31
  ],
32
  "text/plain": [
33
- "<IPython.lib.display.IFrame at 0x2b30dab90>"
34
  ]
35
  },
36
  "metadata": {},
@@ -55,6 +55,74 @@
55
  "text": [
56
  "0\n"
57
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
  ],
60
  "source": [
@@ -215,6 +283,7 @@
215
  " user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO\n",
216
  " user_rating_df.fillna(0, inplace=True)\n",
217
  " user_rating_df['user'] = user_rating_df.index\n",
 
218
  "\n",
219
  "\n",
220
  " wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)\n",
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": null,
6
  "metadata": {},
7
  "outputs": [
8
  {
9
  "name": "stderr",
10
  "output_type": "stream",
11
  "text": [
12
+ "/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_23769/1782084106.py:82: FutureWarning:\n",
13
  "\n",
14
  "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
15
  "\n"
 
30
  " "
31
  ],
32
  "text/plain": [
33
+ "<IPython.lib.display.IFrame at 0x2a978bdf0>"
34
  ]
35
  },
36
  "metadata": {},
 
55
  "text": [
56
  "0\n"
57
  ]
58
+ },
59
+ {
60
+ "name": "stderr",
61
+ "output_type": "stream",
62
+ "text": [
63
+ "/Users/ruimelo/Documents/GitHub/eda/app/core.py:20: SettingWithCopyWarning:\n",
64
+ "\n",
65
+ "\n",
66
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
67
+ "\n",
68
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
69
+ "\n",
70
+ "/Users/ruimelo/Documents/GitHub/eda/app/core.py:31: SettingWithCopyWarning:\n",
71
+ "\n",
72
+ "\n",
73
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
74
+ "\n",
75
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
76
+ "\n",
77
+ "[2024-09-17 19:32:00,521] ERROR in app: Exception on /_dash-update-component [POST]\n",
78
+ "Traceback (most recent call last):\n",
79
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 1473, in wsgi_app\n",
80
+ " response = self.full_dispatch_request()\n",
81
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 882, in full_dispatch_request\n",
82
+ " rv = self.handle_user_exception(e)\n",
83
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 880, in full_dispatch_request\n",
84
+ " rv = self.dispatch_request()\n",
85
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 865, in dispatch_request\n",
86
+ " return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]\n",
87
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/dash.py\", line 1376, in dispatch\n",
88
+ " ctx.run(\n",
89
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 514, in add_context\n",
90
+ " raise err\n",
91
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 503, in add_context\n",
92
+ " output_value = _invoke_callback(func, *func_args, **func_kwargs)\n",
93
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 43, in _invoke_callback\n",
94
+ " return func(*args, **kwargs) # %% callback invoked %%\n",
95
+ " File \"/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_23769/1782084106.py\", line 158, in recommend_wine_from_form\n",
96
+ " user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)\n",
97
+ " File \"/Users/ruimelo/Documents/GitHub/eda/app/core.py\", line 34, in get_most_similar_user_clust\n",
98
+ " cluster_avg = cluster_avg[new_user_ratings.index]\n",
99
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/series.py\", line 1153, in __getitem__\n",
100
+ " return self._get_with(key)\n",
101
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/series.py\", line 1194, in _get_with\n",
102
+ " return self.loc[key]\n",
103
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1191, in __getitem__\n",
104
+ " return self._getitem_axis(maybe_callable, axis=axis)\n",
105
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1420, in _getitem_axis\n",
106
+ " return self._getitem_iterable(key, axis=axis)\n",
107
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1360, in _getitem_iterable\n",
108
+ " keyarr, indexer = self._get_listlike_indexer(key, axis)\n",
109
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1558, in _get_listlike_indexer\n",
110
+ " keyarr, indexer = ax._get_indexer_strict(key, axis_name)\n",
111
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 6200, in _get_indexer_strict\n",
112
+ " self._raise_if_missing(keyarr, indexer, axis_name)\n",
113
+ " File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 6252, in _raise_if_missing\n",
114
+ " raise KeyError(f\"{not_found} not in index\")\n",
115
+ "KeyError: \"['user'] not in index\"\n"
116
+ ]
117
+ },
118
+ {
119
+ "name": "stdout",
120
+ "output_type": "stream",
121
+ "text": [
122
+ "Dedicado 2.0\n",
123
+ "user temporary_user\n",
124
+ "Name: temporary_user, dtype: object\n"
125
+ ]
126
  }
127
  ],
128
  "source": [
 
283
  " user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO\n",
284
  " user_rating_df.fillna(0, inplace=True)\n",
285
  " user_rating_df['user'] = user_rating_df.index\n",
286
+ " user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)\n",
287
  "\n",
288
  "\n",
289
  " wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)\n",
app/requirements.txt CHANGED
@@ -4,4 +4,5 @@ pandas
4
  gunicorn
5
  wandb==0.15.5
6
  scipy
7
- scikit-learn
 
 
4
  gunicorn
5
  wandb==0.15.5
6
  scipy
7
+ scikit-learn
8
+ numpy