Spaces:

TourdeVino
/

showcase

Sleeping

App Files Files Community

rufimelo commited on Sep 17, 2024

Commit

bd7b6e0

1 Parent(s): 26485d2

yolo

Browse files

Files changed (6) hide show

.DS_Store +0 -0
app/__pycache__/core.cpython-310.pyc +0 -0
app/app.py +1 -0
app/core.py +14 -0
app/dashboard.ipynb +72 -3
app/requirements.txt +2 -1

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app/__pycache__/core.cpython-310.pyc CHANGED Viewed

Binary files a/app/__pycache__/core.cpython-310.pyc and b/app/__pycache__/core.cpython-310.pyc differ

app/app.py CHANGED Viewed

@@ -155,6 +155,7 @@ def recommend_wine_from_form(n_clicks):
         user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO
         user_rating_df.fillna(0, inplace=True)
         user_rating_df['user'] = user_rating_df.index
         wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)

         user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO
         user_rating_df.fillna(0, inplace=True)
         user_rating_df['user'] = user_rating_df.index
+        user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)
         wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)

app/core.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
 def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
     wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
@@ -22,3 +23,16 @@ def recommend_wine_from_users(df:pd.DataFrame, user:str, n=5):
     cluster_avg = cluster_users.mean()
     cluster_avg = cluster_avg[user_unrated]
     return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()

 import pandas as pd
+import numpy as np
 from sklearn.metrics.pairwise import cosine_similarity
 def get_top_5_similar_wines(wine_name: str, df: pd.DataFrame) -> pd.DataFrame:
     wine_row = df[df["NAME"] == wine_name].drop(columns=["NAME", "cluster"])
     cluster_avg = cluster_users.mean()
     cluster_avg = cluster_avg[user_unrated]
     return cluster_avg.sort_values(ascending=False).keys()[:n].tolist()
+def get_most_similar_user_clust(df2:pd.DataFrame, new_user:str):
+    df = df2.copy()
+    user_ratings = df.loc[new_user].drop('cluster')
+    # calculate the similarity between the new user and all other users
+    similarity = df.drop(['cluster', 'user'], axis=1).apply(
+        lambda x: np.nanmean((x - user_ratings) ** 2), axis=1
+    )
+    # sort the users by similarity
+    similarity = similarity.sort_values()
+    # get the most similar user
+    most_similar_user = similarity.keys()[1]
+    return int(df.loc[most_similar_user, 'cluster'])

app/dashboard.ipynb CHANGED Viewed

@@ -2,14 +2,14 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_4768/3788270293.py:82: FutureWarning:\n",
       "\n",
       "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
       "\n"
@@ -30,7 +30,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x2b30dab90>"
       ]
      },
      "metadata": {},
@@ -55,6 +55,74 @@
      "text": [
       "0\n"
      ]
     }
    ],
    "source": [
@@ -215,6 +283,7 @@
     "        user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO\n",
     "        user_rating_df.fillna(0, inplace=True)\n",
     "        user_rating_df['user'] = user_rating_df.index\n",
     "\n",
     "\n",
     "        wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)\n",

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_23769/1782084106.py:82: FutureWarning:\n",
       "\n",
       "Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
       "\n"
        "        "
       ],
       "text/plain": [
+       "<IPython.lib.display.IFrame at 0x2a978bdf0>"
       ]
      },
      "metadata": {},
      "text": [
       "0\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/ruimelo/Documents/GitHub/eda/app/core.py:20: SettingWithCopyWarning:\n",
+      "\n",
+      "\n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "\n",
+      "/Users/ruimelo/Documents/GitHub/eda/app/core.py:31: SettingWithCopyWarning:\n",
+      "\n",
+      "\n",
+      "A value is trying to be set on a copy of a slice from a DataFrame\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "\n",
+      "[2024-09-17 19:32:00,521] ERROR in app: Exception on /_dash-update-component [POST]\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 1473, in wsgi_app\n",
+      "    response = self.full_dispatch_request()\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 882, in full_dispatch_request\n",
+      "    rv = self.handle_user_exception(e)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 880, in full_dispatch_request\n",
+      "    rv = self.dispatch_request()\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/flask/app.py\", line 865, in dispatch_request\n",
+      "    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/dash.py\", line 1376, in dispatch\n",
+      "    ctx.run(\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 514, in add_context\n",
+      "    raise err\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 503, in add_context\n",
+      "    output_value = _invoke_callback(func, *func_args, **func_kwargs)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/dash/_callback.py\", line 43, in _invoke_callback\n",
+      "    return func(*args, **kwargs)  # %% callback invoked %%\n",
+      "  File \"/var/folders/b4/lwfgccm95kqd2skcwvrt2fr00000gn/T/ipykernel_23769/1782084106.py\", line 158, in recommend_wine_from_form\n",
+      "    user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)\n",
+      "  File \"/Users/ruimelo/Documents/GitHub/eda/app/core.py\", line 34, in get_most_similar_user_clust\n",
+      "    cluster_avg = cluster_avg[new_user_ratings.index]\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/series.py\", line 1153, in __getitem__\n",
+      "    return self._get_with(key)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/series.py\", line 1194, in _get_with\n",
+      "    return self.loc[key]\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1191, in __getitem__\n",
+      "    return self._getitem_axis(maybe_callable, axis=axis)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1420, in _getitem_axis\n",
+      "    return self._getitem_iterable(key, axis=axis)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1360, in _getitem_iterable\n",
+      "    keyarr, indexer = self._get_listlike_indexer(key, axis)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexing.py\", line 1558, in _get_listlike_indexer\n",
+      "    keyarr, indexer = ax._get_indexer_strict(key, axis_name)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 6200, in _get_indexer_strict\n",
+      "    self._raise_if_missing(keyarr, indexer, axis_name)\n",
+      "  File \"/Users/ruimelo/anaconda3/envs/gan-nlp/lib/python3.10/site-packages/pandas/core/indexes/base.py\", line 6252, in _raise_if_missing\n",
+      "    raise KeyError(f\"{not_found} not in index\")\n",
+      "KeyError: \"['user'] not in index\"\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dedicado               2.0\n",
+      "user        temporary_user\n",
+      "Name: temporary_user, dtype: object\n"
+     ]
     }
    ],
    "source": [
     "        user_rating_df.loc[user] = TEMPORARY_WINE_RECOMMENDATION_FORM_INFO\n",
     "        user_rating_df.fillna(0, inplace=True)\n",
     "        user_rating_df['user'] = user_rating_df.index\n",
+    "        user_rating_df['cluster'] = core.get_most_similar_user_clust(user_rating_df, user)\n",
     "\n",
     "\n",
     "        wine_recommendation_from_user = core.recommend_wine_from_users(user_rating_df, user, 3)\n",

app/requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ pandas
 gunicorn
 wandb==0.15.5
 scipy
-scikit-learn

 gunicorn
 wandb==0.15.5
 scipy
+scikit-learn
+numpy