Spaces:
Sleeping
Sleeping
mads
commited on
Commit
·
29c7384
1
Parent(s):
54c2eb1
updated
Browse files- RecommendationEngineAI_3_2.ipynb +0 -0
- app.py +33 -0
- app_v2.py +56 -0
- requirements.txt +2 -0
- requirements_v2.txt +4 -0
RecommendationEngineAI_3_2.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
CHANGED
|
@@ -54,3 +54,36 @@ if st.button("Get Recommendations"):
|
|
| 54 |
else:
|
| 55 |
st.write("No sufficient data to generate recommendations.")
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
else:
|
| 55 |
st.write("No sufficient data to generate recommendations.")
|
| 56 |
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
import pandas as pd
|
| 61 |
+
from surprise import SVD, Dataset, Reader
|
| 62 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 63 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 64 |
+
|
| 65 |
+
# Load the dataset
|
| 66 |
+
df_songsDB = pd.read_csv('song_dataset.csv')
|
| 67 |
+
|
| 68 |
+
# Prepare surprise data
|
| 69 |
+
reader = Reader(rating_scale=(1, df_songsDB['play_count'].max()))
|
| 70 |
+
surpriseData = Dataset.load_from_df(df_songsDB[['user', 'song', 'play_count']], reader)
|
| 71 |
+
trainset = surpriseData.build_full_trainset()
|
| 72 |
+
|
| 73 |
+
# Train SVD model
|
| 74 |
+
algo_SVD = SVD()
|
| 75 |
+
algo_SVD.fit(trainset)
|
| 76 |
+
|
| 77 |
+
# Define recommendation functions
|
| 78 |
+
def recommend_songs(user_history):
|
| 79 |
+
user_songs = df_songsDB[df_songsDB['user'].isin(user_history)]['song']
|
| 80 |
+
unlistened_songs = df_songsDB[~df_songsDB['song'].isin(user_songs)]
|
| 81 |
+
|
| 82 |
+
# Content-based similarity
|
| 83 |
+
tfidf = TfidfVectorizer()
|
| 84 |
+
tfidf_matrix = tfidf.fit_transform(unlistened_songs['title'] + " " + unlistened_songs['artist_name'])
|
| 85 |
+
user_vector = tfidf.transform(user_songs['title'] + " " + user_songs['artist_name'])
|
| 86 |
+
|
| 87 |
+
cosine_sim = cosine_similarity(user_vector, tfidf_matrix)
|
| 88 |
+
recommendations = unlistened_songs.iloc[cosine_sim.argmax(axis=1)]
|
| 89 |
+
return recommendations
|
app_v2.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
+
import pickle
|
| 6 |
+
import numpy as np
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
# Load the dataset
|
| 10 |
+
df_songsDB = pd.read_csv('song_dataset.csv')
|
| 11 |
+
|
| 12 |
+
# Build a dropdown for user selection
|
| 13 |
+
st.title("Song Recommendation System")
|
| 14 |
+
user_id = st.selectbox("Select User", options=df_songsDB['user'].unique())
|
| 15 |
+
|
| 16 |
+
# Dynamically update the dropdown based on user selection
|
| 17 |
+
if user_id:
|
| 18 |
+
listened_songs = df_songsDB[df_songsDB['user'] == user_id]['title'].unique()
|
| 19 |
+
selected_songs = st.multiselect("Select Songs You've Liked", options=listened_songs)
|
| 20 |
+
|
| 21 |
+
# Collaborative Filtering Recommendation
|
| 22 |
+
if st.button("Get Recommendations"):
|
| 23 |
+
# Prepare user-item interaction matrix
|
| 24 |
+
user_item_matrix = df_songsDB.pivot_table(
|
| 25 |
+
index='user', columns='title', values='play_count', fill_value=0
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Compute cosine similarity
|
| 29 |
+
if user_id in user_item_matrix.index:
|
| 30 |
+
user_vector = user_item_matrix.loc[user_id].values.reshape(1, -1)
|
| 31 |
+
similarity = cosine_similarity(user_vector, user_item_matrix.values)
|
| 32 |
+
recommendations = (
|
| 33 |
+
user_item_matrix.columns[np.argsort(-similarity[0])].tolist()
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Filter out already listened songs
|
| 37 |
+
recommendations = [
|
| 38 |
+
song for song in recommendations if song not in listened_songs
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
# Shuffle recommendations to mix up starting letters
|
| 42 |
+
random.shuffle(recommendations)
|
| 43 |
+
|
| 44 |
+
# Select 10 random recommendations if there are enough songs
|
| 45 |
+
# if len(recommendations) >= 10:
|
| 46 |
+
# random_recommendations = random.sample(recommendations, 10)
|
| 47 |
+
# else:
|
| 48 |
+
random_recommendations = recommendations
|
| 49 |
+
|
| 50 |
+
# Display Recommendations
|
| 51 |
+
st.write("Recommended Songs for You:")
|
| 52 |
+
for song in random_recommendations:
|
| 53 |
+
st.write(song)
|
| 54 |
+
else:
|
| 55 |
+
st.write("No sufficient data to generate recommendations.")
|
| 56 |
+
|
requirements.txt
CHANGED
|
@@ -2,3 +2,5 @@ streamlit>=1.4.0
|
|
| 2 |
pandas>=1.0.0
|
| 3 |
scikit-learn>=1.0.0
|
| 4 |
numpy>=1.19.5
|
|
|
|
|
|
|
|
|
| 2 |
pandas>=1.0.0
|
| 3 |
scikit-learn>=1.0.0
|
| 4 |
numpy>=1.19.5
|
| 5 |
+
|
| 6 |
+
surprise
|
requirements_v2.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit>=1.4.0
|
| 2 |
+
pandas>=1.0.0
|
| 3 |
+
scikit-learn>=1.0.0
|
| 4 |
+
numpy>=1.19.5
|