Spaces:
Sleeping
Sleeping
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| def prediction(file_paths,uploaded_text): | |
| sentences = [] | |
| for file_path in file_paths: | |
| with open(file_path, 'r') as file: | |
| text = file.read() | |
| sentences.append(text) | |
| sentences.append(uploaded_text) | |
| # Load the pre-trained BERT-based model | |
| model_name = 'sentence-transformers/bert-base-nli-mean-tokens' | |
| model = SentenceTransformer(model_name) | |
| # Use the model to encode the sentences | |
| sentence_embeddings = model.encode(sentences) | |
| # Calculate cosine similarity between the uploaded text and the rest of the sentence embeddings | |
| query_embedding = sentence_embeddings[-1] # Uploaded text embedding | |
| candidate_embeddings = sentence_embeddings[:-1] # Embeddings of other files | |
| cosine_similarities = cosine_similarity([query_embedding], candidate_embeddings)[0] | |
| # Combine the candidate sentences with their corresponding cosine similarities | |
| predictions = list(zip(file_paths, cosine_similarities)) | |
| # Sort the predictions based on cosine similarity (highest to lowest) | |
| predictions.sort(key=lambda x: x[1], reverse=True) | |
| return predictions | |