import gradio as gr # def greet(name): # return "Hello " + name + "!!" from sentence_transformers import SentenceTransformer import numpy as np from sklearn.metrics.pairwise import cosine_similarity from datasets import load_dataset # Load pre-trained SentenceTransformer model embedding_model = SentenceTransformer("thenlper/gte-large") # # Example dataset with genres (replace with your actual data) # dataset = load_dataset("hugginglearners/netflix-shows") # dataset = dataset.filter(lambda x: x['description'] is not None and x['listed_in'] is not None and x['title'] is not None) # data = dataset['train'] # Accessing the 'train' split of the dataset # # Convert the dataset to a list of dictionaries for easier indexing # data_list = list[data] # print(data_list) # # Combine description and genre for embedding # def combine_description_title_and_genre(description, listed_in, title): # return f"{description} Genre: {listed_in} Title: {title}" # # Generate embedding for the query # def get_embedding(text): # return embedding_model.encode(text) # # Vector search function # def vector_search(query): # query_embedding = get_embedding(query) # # Generate embeddings for the combined description and genre # embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list[0]]) # # Calculate cosine similarity between the query and all embeddings # similarities = cosine_similarity([query_embedding], embeddings) # Load dataset (using the correct dataset identifier for your case) dataset = load_dataset("hugginglearners/netflix-shows") # Combine description and genre for embedding def combine_description_title_and_genre(description, listed_in, title): return f"{description} Genre: {listed_in} Title: {title}" # Generate embedding for the query def get_embedding(text): return embedding_model.encode(text) # Vector search function def vector_search(query): query_embedding = get_embedding(query) # Function to generate embeddings for each item in the dataset def generate_embeddings(example): return { 'embedding': get_embedding(combine_description_title_and_genre(example["description"], example["listed_in"], example["title"])) } # Generate embeddings for the dataset using map embeddings_dataset = dataset["train"].map(generate_embeddings) # Extract embeddings embeddings = np.array([embedding['embedding'] for embedding in embeddings_dataset]) # Calculate cosine similarity between the query and all embeddings similarities = cosine_similarity([query_embedding], embeddings) # # Adjust similarity scores based on ratings # ratings = np.array([item["rating"] for item in data_list]) # adjusted_similarities = similarities * ratings.reshape(-1, 1) # Get top N most similar items (e.g., top 3) top_n = 3 top_indices = similarities[0].argsort()[-top_n:][::-1] # Get indices of the top N results top_items = [dataset["train"][i] for i in top_indices] # Format the output for display search_result = "" for item in top_items: search_result += f"Title: {item['title']}, Description: {item['description']}, Genre: {item['listed_in']}\n" return search_result # Gradio Interface def movie_search(query): return vector_search(query) with gr.Blocks() as demo: gr.Markdown("# Netflix Recommendation System") gr.Markdown("Enter a query to receive Netflix show recommendations based on title, description, and genre.") query = gr.Textbox(label="Enter your query") output = gr.Textbox(label="Recommendations") submit_button = gr.Button("Submit") submit_button.click(fn=movie_search, inputs=query, outputs=output) demo.launch() # iface = gr.Interface(fn=movie_search, # inputs=gr.inputs.Textbox(label="Enter your query"), # outputs="text", # live=True, # title="Netflix Recommendation System", # description="Enter a query to get Netflix recommendations based on description and genre.") # iface.launch() # demo = gr.Interface(fn=greet, inputs="text", outputs="text") # demo.launch()