madskazi commited on
Commit
9b69687
·
verified ·
1 Parent(s): c34322a

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -177
app.py DELETED
@@ -1,177 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- from sklearn.decomposition import TruncatedSVD
7
-
8
- # Add this at the very top of the file, before any other Streamlit commands
9
- st.set_page_config(layout="wide")
10
-
11
- # Add custom CSS after the st.set_page_config
12
- st.markdown("""
13
- <style>
14
- .small-select {
15
- max-width: 200px !important;
16
- }
17
- .stButton > button {
18
- background-color: pink;
19
- color: black !important;
20
- border: none;
21
- }
22
- .stButton > button:hover {
23
- background-color: pink !important;
24
- color: white !important;
25
- border: 2px solid white !important;
26
- }
27
- /* Style for selected items in multiselect */
28
- .stMultiSelect [data-baseweb="tag"] {
29
- background-color: pink !important;
30
- color: black !important;
31
- }
32
- /* Add focus/click style for multiselect and select */
33
- .stMultiSelect [data-baseweb="select"] > div:first-child,
34
- .stSelectbox [data-baseweb="select"] > div:first-child {
35
- border-color: white !important;
36
- box-shadow: 0 0 0 1px white !important;
37
- }
38
- </style>
39
- """, unsafe_allow_html=True)
40
-
41
- # Cache data loading
42
- @st.cache_data
43
- def load_data():
44
- df = pd.read_csv('song_dataset.csv')
45
- return df
46
-
47
- # Cache matrix computations
48
- @st.cache_data
49
- def compute_matrices(df_songsDB):
50
- user_item_matrix = df_songsDB.pivot_table(index='user', columns='song', values='play_count', fill_value=0)
51
- svd = TruncatedSVD(n_components=20, random_state=20)
52
- svd_matrix = svd.fit_transform(user_item_matrix)
53
- item_factors = svd.components_
54
- return user_item_matrix, svd_matrix, item_factors
55
-
56
- # Load data and compute matrices once
57
- df_songsDB = load_data()
58
- user_item_matrix, svd_matrix, item_factors = compute_matrices(df_songsDB)
59
-
60
- # Cache the TF-IDF computation
61
- @st.cache_data
62
- def compute_tfidf(df_songsDB):
63
- df_songsDB['combined_features'] = (
64
- df_songsDB['artist_name'] + " " +
65
- df_songsDB['release'] + " " +
66
- df_songsDB['title']
67
- )
68
- tfidf = TfidfVectorizer()
69
- tfidf_matrix = tfidf.fit_transform(df_songsDB['combined_features'])
70
- return tfidf, tfidf_matrix
71
-
72
- # Helper functions
73
- def content_score_calculator(selected_songs, unlistened_songs):
74
- df_songsDB['combined_features'] = (
75
- df_songsDB['artist_name'] + " " +
76
- df_songsDB['release'] + " " +
77
- df_songsDB['title']
78
- )
79
-
80
- selected_song_features = df_songsDB[df_songsDB['title'].isin(selected_songs)]['combined_features']
81
- unlistened_song_features = df_songsDB[df_songsDB['song'].isin(unlistened_songs)]['combined_features']
82
-
83
- tfidf = TfidfVectorizer()
84
- tfidf_matrix = tfidf.fit_transform(df_songsDB['combined_features'])
85
-
86
- selected_matrix = tfidf.transform(selected_song_features)
87
- unlistened_matrix = tfidf.transform(unlistened_song_features)
88
- similarity_scores = cosine_similarity(selected_matrix, unlistened_matrix)
89
-
90
- avg_similarity = similarity_scores.mean(axis=0)
91
-
92
- return dict(zip(unlistened_songs, avg_similarity))
93
-
94
- def collaborative_score_calculator(user_id, unlistened_songs):
95
- user_idx = user_item_matrix.index.get_loc(user_id)
96
- user_vector = svd_matrix[user_idx]
97
- cf_scores = {}
98
-
99
- for song_id in unlistened_songs:
100
- if (song_id in user_item_matrix.columns):
101
- song_idx = user_item_matrix.columns.get_loc(song_id)
102
- song_vector = item_factors[:, song_idx]
103
- cf_scores[song_id] = np.dot(user_vector, song_vector)
104
- else:
105
- cf_scores[song_id] = 0
106
- return cf_scores
107
-
108
- def hybridRecommendationEngine(user_id, selected_songs):
109
- alpha = 0.5
110
-
111
- listened_songs = df_songsDB[df_songsDB['user'] == user_id]['song'].unique()
112
- all_songs = df_songsDB['song'].unique()
113
- unlistened_songs = set(all_songs) - set(listened_songs)
114
-
115
- cf_scores = collaborative_score_calculator(user_id, unlistened_songs)
116
- content_scores = content_score_calculator(selected_songs, unlistened_songs)
117
-
118
- final_scores = {}
119
- for song_id in unlistened_songs:
120
- cf_score = cf_scores.get(song_id, 0)
121
- content_score = content_scores.get(song_id, 0)
122
- final_scores[song_id] = alpha * cf_score + (1 - alpha) * content_score
123
-
124
- scores = list(final_scores.values())
125
- min_score = min(scores) if scores else 0
126
- max_score = max(scores) if scores else 1
127
-
128
- if max_score > min_score:
129
- normalized_scores = {
130
- song_id: (score - min_score) / (max_score - min_score)
131
- for song_id, score in final_scores.items()
132
- }
133
- else:
134
- normalized_scores = {song_id: 0.5 for song_id in final_scores}
135
-
136
- sorted_songs = sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True)
137
- recommended_song_ids = [song_id for song_id, _ in sorted_songs[:10]]
138
-
139
- recommended_songs = (
140
- pd.DataFrame(recommended_song_ids, columns=['song'])
141
- .merge(df_songsDB[['song', 'title', 'release', 'artist_name']].drop_duplicates(), on='song', how='left')
142
- .assign(recommendation=lambda x: x['title'] + ' by ' + x['artist_name'])
143
- )
144
- return recommended_songs['recommendation'].tolist()
145
-
146
- # Streamlit app
147
- st.title("Delta Melody Match 🎶")
148
-
149
- # Make columns take more width
150
- col1, col2 = st.columns([2, 4])
151
-
152
- with col1:
153
- with st.container():
154
- user_id = st.selectbox(
155
- "👤 Select User ID",
156
- options=df_songsDB['user'].unique().tolist(),
157
- key="small_select"
158
- )
159
- st.markdown('<style>div[data-testid="stSelectbox"] > div:first-child {max-width: 200px;}</style>', unsafe_allow_html=True)
160
-
161
- songs_selectable = df_songsDB[df_songsDB['user'] == user_id]['title'].unique()
162
-
163
- with col2:
164
- song_titles = st.multiselect(
165
- "🎵 Select Songs You Like",
166
- options=songs_selectable,
167
- default=songs_selectable[:1]
168
- )
169
-
170
- # Make the recommendations table wider
171
- if st.button("Get Recommendations"):
172
- st.subheader("Recommended Songs")
173
- recommendations = hybridRecommendationEngine(user_id, song_titles)
174
- for i, rec in enumerate(recommendations, 1):
175
- # Split the recommendation into title and artist
176
- title, artist = rec.split(' by ')
177
- st.write(f"{i}. ***{title}*** by {artist}")