spotify-genre-analyzer

Paused

App Files Files Community

spotify-genre-analyzer / app.py

plozia

Update app.py

21700f4 verified 5 months ago

raw

history blame

14.7 kB

	import os
	import sys
	import io
	import random
	import math
	import requests
	import spotipy
	import gradio as gr
	import matplotlib.pyplot as plt
	import pandas as pd
	from spotipy.oauth2 import SpotifyClientCredentials
	from PIL import Image

	# Spotify credentials from environment variables (fallback)
	ENV_SPOTIFY_CLIENT_ID = os.getenv("SPOTIFY_CLIENT_ID")
	ENV_SPOTIFY_CLIENT_SECRET = os.getenv("SPOTIFY_CLIENT_SECRET")
	if not ENV_SPOTIFY_CLIENT_ID or not ENV_SPOTIFY_CLIENT_SECRET:
	print("Error: Spotify credentials not set.")
	sys.exit(1)

	global_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
	client_id=ENV_SPOTIFY_CLIENT_ID,
	client_secret=ENV_SPOTIFY_CLIENT_SECRET
	))

	def get_musicbrainz_genre(artist_name):
	search_url = "https://musicbrainz.org/ws/2/artist/"
	headers = {"User-Agent": "SpotifyAnalyzer/1.0 ([email protected])"}
	params = {"query": artist_name, "fmt": "json"}
	try:
	search_response = requests.get(search_url, params=params, headers=headers)
	search_data = search_response.json()
	if "artists" in search_data and search_data["artists"]:
	best_artist = None
	best_score = 0
	for artist in search_data["artists"]:
	name = artist.get("name", "")
	score = int(artist.get("score", 0))
	if name.lower() == artist_name.lower():
	best_artist = artist
	break
	if score > best_score:
	best_score = score
	best_artist = artist
	if best_artist:
	mbid = best_artist.get("id")
	if mbid:
	lookup_url = f"https://musicbrainz.org/ws/2/artist/{mbid}"
	lookup_params = {"inc": "tags+genres", "fmt": "json"}
	lookup_response = requests.get(lookup_url, params=lookup_params, headers=headers)
	lookup_data = lookup_response.json()
	official_genres = lookup_data.get("genres", [])
	if official_genres:
	return official_genres[0].get("name", "Unknown")
	tags = lookup_data.get("tags", [])
	if tags:
	sorted_tags = sorted(tags, key=lambda t: t.get("count", 0), reverse=True)
	return sorted_tags[0].get("name", "Unknown")
	except Exception:
	pass
	return "Unknown"

	def get_audiodb_genre(artist_name):
	url = "https://theaudiodb.com/api/v1/json/1/search.php"
	params = {"s": artist_name}
	try:
	response = requests.get(url, params=params)
	if response.ok:
	data = response.json()
	if data and data.get("artists"):
	artist_data = data["artists"][0]
	genre = artist_data.get("strGenre", "")
	if genre:
	return genre
	except Exception:
	pass
	return "Unknown"

	def extract_playlist_id(url: str) -> str:
	if "playlist" not in url:
	return ""
	parts = url.split("/")
	try:
	idx = parts.index("playlist")
	return parts[idx + 1].split("?")[0]
	except (ValueError, IndexError):
	return ""

	def get_playlist_tracks(playlist_id: str, spotify_client) -> list:
	tracks = []
	try:
	results = spotify_client.playlist_tracks(playlist_id)
	tracks.extend(results["items"])
	while results["next"]:
	results = spotify_client.next(results)
	tracks.extend(results["items"])
	except spotipy.SpotifyException:
	return []
	return tracks

	def analyze_playlist(playlist_url: str, spotify_client_id: str, spotify_client_secret: str):
	if spotify_client_id.strip() and spotify_client_secret.strip():
	local_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
	client_id=spotify_client_id.strip(),
	client_secret=spotify_client_secret.strip()
	))
	else:
	local_sp = global_sp

	playlist_id = extract_playlist_id(playlist_url.strip())
	if not playlist_id:
	return ("Invalid playlist URL.", None, None, None, None, "")

	tracks = get_playlist_tracks(playlist_id, local_sp)
	if not tracks:
	return ("No tracks found or playlist is private.", None, None, None, None, "")

	genre_count = {}
	artist_cache = {}
	tracks_table = []
	for item in tracks:
	track = item.get("track")
	if not track:
	continue
	track_name = track.get("name", "Unknown Track")
	artists = track.get("artists", [])
	if not artists:
	continue
	artist_info = artists[0]
	artist_name = artist_info.get("name", "Unknown Artist")
	artist_id = artist_info.get("id")
	# Try Spotify genres; if empty, fallback to MusicBrainz then AudioDB.
	if artist_id:
	if artist_id in artist_cache:
	genres = artist_cache[artist_id]
	else:
	try:
	artist_data = local_sp.artist(artist_id)
	genres = artist_data.get("genres", [])
	except spotipy.SpotifyException:
	genres = []
	if not genres:
	mb_genre = get_musicbrainz_genre(artist_name)
	if mb_genre == "Unknown":
	audiodb_genre = get_audiodb_genre(artist_name)
	if audiodb_genre != "Unknown":
	genres = [audiodb_genre]
	else:
	genres = [mb_genre]
	artist_cache[artist_id] = genres
	else:
	genres = []
	if genres:
	for g in genres:
	genre_count[g] = genre_count.get(g, 0) + 1
	primary_genre = genres[0] if genres else "Unknown"
	spotify_url = track.get("external_urls", {}).get("spotify", "#")
	query = f"{track_name} {artist_name}"
	yt_link = f'<a href="https://music.youtube.com/search?q={requests.utils.quote(query)}" target="_blank">YouTube Music</a>'
	tracks_table.append([track_name, artist_name, primary_genre,
	f'<a href="{spotify_url}" target="_blank">Listen on Spotify</a>', yt_link])

	total_occurrences = sum(genre_count.values())
	genres_table_data = [[genre, count, f"{(count / total_occurrences * 100):.2f}%"]
	for genre, count in genre_count.items()]
	genres_table_data.sort(key=lambda x: x[1], reverse=True)
	genres_df = pd.DataFrame(genres_table_data, columns=["Genre", "Count", "Percentage"])

	top15 = genres_df.head(15)
	plt.figure(figsize=(10, 6))
	plt.bar(top15["Genre"], top15["Count"], color='skyblue')
	plt.xticks(rotation=45, ha="right")
	plt.xlabel("Genre")
	plt.ylabel("Count")
	plt.title("Top 15 Genres")
	plt.tight_layout()
	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	plt.close()
	buf.seek(0)
	chart_image = Image.open(buf).convert("RGB")

	tracks_df = pd.DataFrame(tracks_table, columns=["Song Name", "Artist", "Genre", "Spotify Link", "YouTube Music Link"])
	table_style = """
	<style>
	.nice-table { width: 100%; border-collapse: collapse; margin-top: 1em; }
	.nice-table th, .nice-table td { border: 1px solid #ccc; padding: 8px; }
	.nice-table th { background-color: #f9f9f9; font-weight: bold; }
	.nice-table tr:nth-child(even) { background-color: #f2f2f2; }
	.nice-table a { color: #007bff; text-decoration: none; }
	.nice-table a:hover { text-decoration: underline; }
	</style>
	"""
	tracks_html = table_style + tracks_df.to_html(escape=False, index=False, classes="nice-table")

	# Prepare state for recommendations: exclude "Unknown" genres.
	top_genres = [genre for genre in genres_df["Genre"].head(15).tolist() if genre.lower() != "unknown"]
	if not top_genres:
	top_genres = ["pop"]
	num_genres = len(top_genres)
	rec_per_genre = math.ceil(75 / num_genres)
	existing_tracks = set((t[0].strip().lower(), t[1].strip().lower()) for t in tracks_table)
	analysis_state = {
	"top_genres": top_genres,
	"existing_tracks": existing_tracks,
	"sp_client_id": spotify_client_id,
	"sp_client_secret": spotify_client_secret,
	"rec_per_genre": rec_per_genre
	}

	recommended_html = generate_recommendations(analysis_state, local_sp, table_style)
	processed_info = f"Processed {len(tracks_table)} songs from the playlist."

	return (genres_df, chart_image, tracks_html, recommended_html, analysis_state, processed_info)

	def generate_recommendations(state, local_sp, table_style):
	rec_tracks = []
	recommended_artists = set()
	for genre in state["top_genres"]:
	try:
	search_result = local_sp.search(q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"])
	total = search_result.get("tracks", {}).get("total", 0)
	if total > state["rec_per_genre"]:
	offset = random.randint(0, min(total - state["rec_per_genre"], 100))
	search_result = local_sp.search(q=f'genre:"{genre}"', type="track", limit=state["rec_per_genre"], offset=offset)
	items = search_result.get("tracks", {}).get("items", [])
	for t in items:
	track_name = t.get("name", "Unknown Track")
	artists_list = [a.get("name", "") for a in t.get("artists", [])]
	if not artists_list:
	continue
	artist_str = ", ".join(artists_list)
	first_artist = artists_list[0].strip().lower()
	if (track_name.strip().lower(), first_artist) in state["existing_tracks"] or first_artist in recommended_artists:
	continue
	spotify_url = t.get("external_urls", {}).get("spotify", "#")
	query = f"{track_name} {artist_str}"
	yt_link = f'<a href="https://music.youtube.com/search?q={requests.utils.quote(query)}" target="_blank">YouTube Music</a>'
	rec_tracks.append([f"{track_name} by {artist_str}", genre,
	f'<a href="{spotify_url}" target="_blank">Listen on Spotify</a>',
	yt_link])
	recommended_artists.add(first_artist)
	except Exception:
	continue

	if len(rec_tracks) > 75:
	rec_tracks = random.sample(rec_tracks, 75)
	rec_tracks_df = pd.DataFrame(rec_tracks, columns=["Title + Author", "Genre", "Spotify Link", "YouTube Music Link"])
	return table_style + rec_tracks_df.to_html(escape=False, index=False, classes="nice-table")

	def refresh_recommendations(state):
	if state["sp_client_id"].strip() and state["sp_client_secret"].strip():
	local_sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
	client_id=state["sp_client_id"].strip(),
	client_secret=state["sp_client_secret"].strip()
	))
	else:
	local_sp = global_sp
	table_style = """
	<style>
	.nice-table { width: 100%; border-collapse: collapse; margin-top: 1em; }
	.nice-table th, .nice-table td { border: 1px solid #ccc; padding: 8px; }
	.nice-table th { background-color: #f9f9f9; font-weight: bold; }
	.nice-table tr:nth-child(even) { background-color: #f2f2f2; }
	.nice-table a { color: #007bff; text-decoration: none; }
	.nice-table a:hover { text-decoration: underline; }
	</style>
	"""
	return generate_recommendations(state, local_sp, table_style)

	# Main interface description and disclaimer
	description_text = (
	"This agent analyzes a public Spotify playlist (must be user-shared; providing a playlist uploaded by Spotify will result in an error) "
	"by generating a genre distribution, a track list (with direct Spotify and YouTube Music search links), and a table of recommended tracks "
	"based on the top genres found in the playlist. API keys are not stored. Use the 'Refresh recommendations' button to get a new set of recommendations."
	)

	disclaimer_text = (
	"<b>Disclaimer:</b> This tool works best for playlists with around 100-200 songs (30-60s). For larger playlists, processing may take multiple minutes. "
	"A default API key is provided, but if you reach the limits, you can supply your own API keys, which you can quickly obtain from "
	"<a href='https://developer.spotify.com/' target='_blank'>Spotify Developer</a>.<br>"
	"Note: If the agent is processing for too long, check the logs. If you see a message like 'Your application has reached a rate/request limit', "
	"it means that the provided Spotify API key has reached its limits. Please generate your own API keys and add them."
	)

	with gr.Blocks() as demo:
	gr.Markdown("# Spotify Playlist Analyzer & Recommendations + YouTube Music Links")
	gr.Markdown(disclaimer_text)
	gr.Markdown(description_text)

	with gr.Row():
	playlist_url = gr.Textbox(
	label="Spotify Playlist URL",
	placeholder="e.g. https://open.spotify.com/playlist/1zgenIMomxFp4irGwgW4Rb"
	)
	with gr.Row():
	sp_client_id = gr.Textbox(label="Spotify Client ID (optional)")
	sp_client_secret = gr.Textbox(label="Spotify Client Secret (optional)")

	analyze_button = gr.Button("Analyze Playlist")

	with gr.Tab("Analysis Results"):
	output_genres = gr.Dataframe(label="Genre Distribution Table")
	output_chart = gr.Image(label="Top 15 Genre Chart")
	output_tracks_html = gr.HTML(label="Playlist Tracks Table")
	output_processed = gr.HTML(label="Processing Info")

	with gr.Tab("Recommended Tracks"):
	refresh_button = gr.Button("Refresh recommendations")
	recommended_html_output = gr.HTML(label="Recommended Tracks Table")

	state_out = gr.State() # Will hold the analysis state

	def run_analysis(playlist_url, sp_client_id, sp_client_secret):
	result = analyze_playlist(playlist_url, sp_client_id, sp_client_secret)
	# result: (genres_df, chart_image, tracks_html, recommended_html, analysis_state, processed_info)
	return result

	analyze_button.click(
	fn=run_analysis,
	inputs=[playlist_url, sp_client_id, sp_client_secret],
	outputs=[output_genres, output_chart, output_tracks_html, recommended_html_output, state_out, output_processed]
	)

	refresh_button.click(
	fn=refresh_recommendations,
	inputs=[state_out],
	outputs=[recommended_html_output]
	)

	if __name__ == "__main__":
	demo.launch(share=True)