import streamlit as st import pandas as pd import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from sklearn.decomposition import TruncatedSVD # Add this at the very top of the file, before any other Streamlit commands st.set_page_config(layout="wide") # Add custom CSS after the st.set_page_config st.markdown(""" """, unsafe_allow_html=True) # Cache data loading @st.cache_data def load_data(): df = pd.read_csv('song_dataset.csv') return df # Cache matrix computations @st.cache_data def compute_matrices(df_songsDB): user_item_matrix = df_songsDB.pivot_table(index='user', columns='song', values='play_count', fill_value=0) svd = TruncatedSVD(n_components=20, random_state=20) svd_matrix = svd.fit_transform(user_item_matrix) item_factors = svd.components_ return user_item_matrix, svd_matrix, item_factors # Load data and compute matrices once df_songsDB = load_data() user_item_matrix, svd_matrix, item_factors = compute_matrices(df_songsDB) # Cache the TF-IDF computation @st.cache_data def compute_tfidf(df_songsDB): df_songsDB['combined_features'] = ( df_songsDB['artist_name'] + " " + df_songsDB['release'] + " " + df_songsDB['title'] ) tfidf = TfidfVectorizer() tfidf_matrix = tfidf.fit_transform(df_songsDB['combined_features']) return tfidf, tfidf_matrix # Helper functions def content_score_calculator(selected_songs, unlistened_songs): df_songsDB['combined_features'] = ( df_songsDB['artist_name'] + " " + df_songsDB['release'] + " " + df_songsDB['title'] ) selected_song_features = df_songsDB[df_songsDB['title'].isin(selected_songs)]['combined_features'] unlistened_song_features = df_songsDB[df_songsDB['song'].isin(unlistened_songs)]['combined_features'] tfidf = TfidfVectorizer() tfidf_matrix = tfidf.fit_transform(df_songsDB['combined_features']) selected_matrix = tfidf.transform(selected_song_features) unlistened_matrix = tfidf.transform(unlistened_song_features) similarity_scores = cosine_similarity(selected_matrix, unlistened_matrix) avg_similarity = similarity_scores.mean(axis=0) return dict(zip(unlistened_songs, avg_similarity)) def collaborative_score_calculator(user_id, unlistened_songs): user_idx = user_item_matrix.index.get_loc(user_id) user_vector = svd_matrix[user_idx] cf_scores = {} for song_id in unlistened_songs: if (song_id in user_item_matrix.columns): song_idx = user_item_matrix.columns.get_loc(song_id) song_vector = item_factors[:, song_idx] cf_scores[song_id] = np.dot(user_vector, song_vector) else: cf_scores[song_id] = 0 return cf_scores def hybridRecommendationEngine(user_id, selected_songs): alpha = 0.5 listened_songs = df_songsDB[df_songsDB['user'] == user_id]['song'].unique() all_songs = df_songsDB['song'].unique() unlistened_songs = set(all_songs) - set(listened_songs) cf_scores = collaborative_score_calculator(user_id, unlistened_songs) content_scores = content_score_calculator(selected_songs, unlistened_songs) final_scores = {} for song_id in unlistened_songs: cf_score = cf_scores.get(song_id, 0) content_score = content_scores.get(song_id, 0) final_scores[song_id] = alpha * cf_score + (1 - alpha) * content_score scores = list(final_scores.values()) min_score = min(scores) if scores else 0 max_score = max(scores) if scores else 1 if max_score > min_score: normalized_scores = { song_id: (score - min_score) / (max_score - min_score) for song_id, score in final_scores.items() } else: normalized_scores = {song_id: 0.5 for song_id in final_scores} sorted_songs = sorted(normalized_scores.items(), key=lambda x: x[1], reverse=True) recommended_song_ids = [song_id for song_id, _ in sorted_songs[:10]] recommended_songs = ( pd.DataFrame(recommended_song_ids, columns=['song']) .merge(df_songsDB[['song', 'title', 'release', 'artist_name']].drop_duplicates(), on='song', how='left') .assign(recommendation=lambda x: x['title'] + ' by ' + x['artist_name']) ) return recommended_songs['recommendation'].tolist() # Streamlit app st.title("Delta Melody Match 🎶") # Make columns take more width col1, col2 = st.columns([2, 4]) with col1: with st.container(): user_id = st.selectbox( "👤 Select User ID", options=df_songsDB['user'].unique().tolist(), key="small_select" ) st.markdown('', unsafe_allow_html=True) songs_selectable = df_songsDB[df_songsDB['user'] == user_id]['title'].unique() with col2: song_titles = st.multiselect( "🎵 Select Songs You Like", options=songs_selectable, default=songs_selectable[:1] ) # Make the recommendations table wider if st.button("Get Recommendations"): st.subheader("Recommended Songs") recommendations = hybridRecommendationEngine(user_id, song_titles) for i, rec in enumerate(recommendations, 1): # Split the recommendation into title and artist title, artist = rec.split(' by ') st.write(f"{i}. ***{title}*** by {artist}")