mads commited on
Commit
a8d7b98
·
1 Parent(s): 9b69687

added app file

Browse files
Files changed (1) hide show
  1. app.py +177 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from sklearn.decomposition import TruncatedSVD
7
+
8
+ # Add this at the very top of the file, before any other Streamlit commands
9
+ st.set_page_config(layout="wide")
10
+
11
+ # Add custom CSS after the st.set_page_config
12
+ st.markdown("""
13
+ <style>
14
+ .small-select {
15
+ max-width: 200px !important;
16
+ }
17
+ .stButton > button {
18
+ background-color: pink;
19
+ color: black !important;
20
+ border: none;
21
+ }
22
+ .stButton > button:hover {
23
+ background-color: pink !important;
24
+ color: white !important;
25
+ border: 2px solid white !important;
26
+ }
27
+ /* Style for selected items in multiselect */
28
+ .stMultiSelect [data-baseweb="tag"] {
29
+ background-color: pink !important;
30
+ color: black !important;
31
+ }
32
+ /* Add focus/click style for multiselect and select */
33
+ .stMultiSelect [data-baseweb="select"] > div:first-child,
34
+ .stSelectbox [data-baseweb="select"] > div:first-child {
35
+ border-color: white !important;
36
+ box-shadow: 0 0 0 1px white !important;
37
+ }
38
+ </style>
39
+ """, unsafe_allow_html=True)
40
+
41
+ # Cache data loading
42
+ @st.cache_data
43
+ def load_data():
44
+ df = pd.read_csv('song_dataset.csv')
45
+ return df
46
+
47
+ # Cache matrix computations
48
+ @st.cache_data
49
+ def compute_matrices(df_songsDB):
50
+ user_item_matrix = df_songsDB.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
51
+ svd = TruncatedSVD(n_components=20, random_state=20)
52
+ svd_matrix = svd.fit_transform(user_item_matrix)
53
+ item_factors = svd.components_
54
+ return user_item_matrix, svd_matrix, item_factors
55
+
56
+ # Load data and compute matrices once
57
+ df_songsDB = load_data()
58
+ user_item_matrix, svd_matrix, item_factors = compute_matrices(df_songsDB)
59
+
60
+ # Cache the TF-IDF computation
61
+ @st.cache_data
62
+ def compute_tfidf(df_songsDB):
63
+ df_songsDB['combined_features'] = (
64
+ df_songsDB['artist_name'] + " " +
65
+ df_songsDB['release'] + " " +
66
+ df_songsDB['title']
67
+ )
68
+ tfidf = TfidfVectorizer()
69
+ tfidf_matrix = tfidf.fit_transform(df_songsDB['combined_features'])
70
+ return tfidf, tfidf_matrix
71
+
72
+ # Helper functions
73
+ def content_score_calculator(selected_songs, unlistened_songs):
74
+ df_songsDB['combined_features'] = (
75
+ df_songsDB['artist_name'] + " " +
76
+ df_songsDB['release'] + " " +
77
+ df_songsDB['title']
78
+ )
79
+
80
+ selected_song_features = df_songsDB[df_songsDB['title'].isin(selected_songs)]['combined_features']
81
+ unlistened_song_features = df_songsDB[df_songsDB['song'].isin(unlistened_songs)]['combined_features']
82
+
83
+ tfidf = TfidfVectorizer()
84
+ tfidf_matrix = tfidf.fit_transform(df_songsDB['combined_features'])
85
+
86
+ selected_matrix = tfidf.transform(selected_song_features)
87
+ unlistened_matrix = tfidf.transform(unlistened_song_features)
88
+ similarity_scores = cosine_similarity(selected_matrix, unlistened_matrix)
89
+
90
+ avg_similarity = similarity_scores.mean(axis=0)
91
+
92
+ return dict(zip(unlistened_songs, avg_similarity))
93
+
94
+ def collaborative_score_calculator(user_id, unlistened_songs):
95
+ user_idx = user_item_matrix.index.get_loc(user_id)
96
+ user_vector = svd_matrix[user_idx]
97
+ cf_scores = {}
98
+
99
+ for song_id in unlistened_songs:
100
+ if (song_id in user_item_matrix.columns):
101
+ song_idx = user_item_matrix.columns.get_loc(song_id)
102
+ song_vector = item_factors[:, song_idx]
103
+ cf_scores[song_id] = np.dot(user_vector, song_vector)
104
+ else:
105
+ cf_scores[song_id] = 0
106
+ return cf_scores
107
+
108
+ def hybridRecommendationEngine(user_id, selected_songs):
109
+ alpha = 0.5
110
+
111
+ listened_songs = df_songsDB[df_songsDB['user'] == user_id]['song'].unique()
112
+ all_songs = df_songsDB['song'].unique()
113
+ unlistened_songs = set(all_songs) - set(listened_songs)
114
+
115
+ cf_scores = collaborative_score_calculator(user_id, unlistened_songs)
116
+ content_scores = content_score_calculator(selected_songs, unlistened_songs)
117
+
118
+ final_scores = {}
119
+ for song_id in unlistened_songs:
120
+ cf_score = cf_scores.get(song_id, 0)
121
+ content_score = content_scores.get(song_id, 0)
122
+ final_scores[song_id] = alpha * cf_score + (1 - alpha) * content_score
123
+
124
+ scores = list(final_scores.values())
125
+ min_score = min(scores) if scores else 0
126
+ max_score = max(scores) if scores else 1
127
+
128
+ if max_score > min_score:
129
+ normalized_scores = {
130
+ song_id: (score - min_score) / (max_score - min_score)
131
+ for song_id, score in final_scores.items()
132
+ }
133
+ else:
134
+ normalized_scores = {song_id: 0.5 for song_id in final_scores}
135
+
136
+ sorted_songs = sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True)
137
+ recommended_song_ids = [song_id for song_id, _ in sorted_songs[:10]]
138
+
139
+ recommended_songs = (
140
+ pd.DataFrame(recommended_song_ids, columns=['song'])
141
+ .merge(df_songsDB[['song', 'title', 'release', 'artist_name']].drop_duplicates(), on='song', how='left')
142
+ .assign(recommendation=lambda x: x['title'] + ' by ' + x['artist_name'])
143
+ )
144
+ return recommended_songs['recommendation'].tolist()
145
+
146
+ # Streamlit app
147
+ st.title("Delta Melody Match 🎶")
148
+
149
+ # Make columns take more width
150
+ col1, col2 = st.columns([2, 4])
151
+
152
+ with col1:
153
+ with st.container():
154
+ user_id = st.selectbox(
155
+ "👤 Select User ID",
156
+ options=df_songsDB['user'].unique().tolist(),
157
+ key="small_select"
158
+ )
159
+ st.markdown('<style>div[data-testid="stSelectbox"] > div:first-child {max-width: 200px;}</style>', unsafe_allow_html=True)
160
+
161
+ songs_selectable = df_songsDB[df_songsDB['user'] == user_id]['title'].unique()
162
+
163
+ with col2:
164
+ song_titles = st.multiselect(
165
+ "🎵 Select Songs You Like",
166
+ options=songs_selectable,
167
+ default=songs_selectable[:1]
168
+ )
169
+
170
+ # Make the recommendations table wider
171
+ if st.button("Get Recommendations"):
172
+ st.subheader("Recommended Songs")
173
+ recommendations = hybridRecommendationEngine(user_id, song_titles)
174
+ for i, rec in enumerate(recommendations, 1):
175
+ # Split the recommendation into title and artist
176
+ title, artist = rec.split(' by ')
177
+ st.write(f"{i}. ***{title}*** by {artist}")