Spaces:
Paused
Paused
| #!/usr/bin/env python3.10 | |
| # """ | |
| # Semantrix Game Module | |
| # This module defines the Semantrix class, which implements a word guessing game using word embeddings. The game can be configured to use either a Word2Vec model or a SentenceTransformer model for word embeddings. The game supports multiple languages and difficulty levels. | |
| # Classes: | |
| # Semantrix: A class that implements the Semantrix word guessing game. | |
| # Semantrix.DictWrapper: A helper class to wrap configuration dictionaries. | |
| # Functions: | |
| # __init__(self, lang=0, model_type="SentenceTransformer"): Initializes the Semantrix game with the specified language and model type. | |
| # prepare_game(self, difficulty): Prepares the game with the selected difficulty level. | |
| # gen_rank(self, repeated): Generates the ranking file based on the scores. | |
| # play_game(self, word): Plays the game with the selected word and returns feedback. | |
| # curiosity(self): Generates a curiosity hint about the secret word once the game is over. | |
| # Attributes: | |
| # model (KeyedVectors): The word embeddings model. | |
| # config_file_path (str): Path to the configuration file. | |
| # secret_file_path (str): Path to the secret words file. | |
| # data_path (str): Path to the data directory. | |
| # Config_full (dict): Full configuration data. | |
| # secret (dict): Secret words data. | |
| # lang (int): Language of the game (0 for Spanish, 1 for English). | |
| # model_type (str): Type of the model ("word2vec" or "SentenceTransformer"). | |
| # Config (DictWrapper): Configuration data for the selected language. | |
| # secret_dict (dict): Secret words for the selected language. | |
| # secret_list (list): List of secret words for the selected difficulty. | |
| # words (list): List of words guessed by the player. | |
| # scores (list): List of scores for the guessed words. | |
| # win (bool): Indicates if the player has won the game. | |
| # n (int): Number of hints given. | |
| # recent_hint (int): Counter for recent hints. | |
| # f_dev_avg (float): Moving average of the tendency slope. | |
| # last_hint (int): Index of the last hint given. | |
| # difficulty (int): Difficulty level of the game. | |
| # """ | |
| import os | |
| import sys | |
| import json | |
| import uuid | |
| import random | |
| from datetime import datetime | |
| import time | |
| from tqdm import tqdm | |
| import numpy as np | |
| from gensim.models import KeyedVectors | |
| from hints import curiosity, hint | |
| from tracking import ( | |
| calculate_moving_average, | |
| calculate_tendency_slope, | |
| ) | |
| from sentence_transformers import SentenceTransformer, util | |
| import warnings | |
| from huggingface_hub import snapshot_download | |
| warnings.filterwarnings(action="ignore", category=UserWarning, module="gensim") | |
| class Model_class: | |
| base_path = os.path.dirname(os.path.abspath(__file__)) | |
| def __init__(self, lang=0, model_type="SentenceTransformer"): | |
| if model_type == "SentenceTransformer": | |
| repo_url = "Jsevisal/strans_models" | |
| else: | |
| repo_url = "Jsevisal/w2v_models" | |
| # Check if the model exists, clone it if it doesn't | |
| if not os.path.exists( | |
| os.path.join(self.base_path, "config/strans_models/") | |
| ) or not os.path.exists(os.path.join(self.base_path, "config/w2v_models/")): | |
| model_path = snapshot_download(repo_id=repo_url) | |
| if lang == 1: | |
| if model_type == "word2vec": | |
| self.model = KeyedVectors.load( | |
| os.path.join(model_path, "eng_w2v_model"), | |
| mmap="r", | |
| ) | |
| elif model_type == "SentenceTransformer": | |
| self.model = KeyedVectors.load( | |
| os.path.join(model_path, "eng_strans_model"), | |
| mmap="r", | |
| ) | |
| else: | |
| if model_type == "word2vec": | |
| self.model = KeyedVectors.load( | |
| os.path.join(model_path, "esp_w2v_model"), | |
| mmap="r", | |
| ) | |
| elif model_type == "SentenceTransformer": | |
| self.model = KeyedVectors.load( | |
| os.path.join(model_path, "esp_strans_model"), | |
| mmap="r", | |
| ) | |
| self.model_st = SentenceTransformer( | |
| "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" | |
| ) | |
| # Define the class Semantrix | |
| class Semantrix: | |
| # Define the paths for the configuration files and the data | |
| base_path = os.path.dirname(os.path.abspath(__file__)) | |
| config_file_path = os.path.join(base_path, "config/") | |
| config_file_path = os.path.join(base_path, "config/lang.json") | |
| secret_file_path = os.path.join(base_path, "config/secret.json") | |
| data_path = os.path.join(base_path, "data/") | |
| # Define the class DictWrapper to store the configuration data | |
| class DictWrapper: | |
| def __init__(self, data_dict): | |
| self.__dict__.update(data_dict) | |
| # Define the constructor of the class which loads the configuration files and initializes the class variables depending on the language parameter and the model type | |
| def __init__(self, lang=0, model_type="SentenceTransformer", session_hash=None): | |
| # Load the configuration files | |
| with open(self.config_file_path, "r") as file: | |
| self.Config_full = json.load(file) | |
| # Load the secret file where the secret words are stored | |
| with open(self.secret_file_path, "r") as file: | |
| self.secret = json.load(file) | |
| # Set the language of the game | |
| self.lang = lang | |
| self.session_id = str(uuid.uuid4().hex) | |
| # Set the model type | |
| self.model_type = model_type | |
| self.session_hash = session_hash | |
| self.ranking_path = "rankings/ranking_" + str(self.session_hash) + ".txt" | |
| self.ranking_data = [] | |
| self.ranking_msg = "" | |
| if lang == 1: | |
| self.Config = self.DictWrapper(self.Config_full["ENG"]["Game"]) | |
| self.secret_dict = self.secret["ENG"] | |
| self.secret_list = self.secret_dict["basic"] | |
| else: | |
| self.Config = self.DictWrapper(self.Config_full["SPA"]["Game"]) | |
| self.secret_dict = self.secret["SPA"] | |
| self.secret_list = self.secret_dict["basic"] | |
| # Create the ranking file | |
| with open(self.data_path + self.ranking_path, "w+") as file: | |
| file.write("---------------------------") | |
| def reset_game(self): | |
| self.session_id = str(uuid.uuid4().hex) | |
| # Load the secret file where the secret words are stored | |
| with open(self.secret_file_path, "r") as file: | |
| self.secret = json.load(file) | |
| self.secret_dict = self.secret["SPA"] | |
| self.secret_list = self.secret_dict["basic"] | |
| def generate_gensim_model(self, model_class, batch_size=32): | |
| from tqdm import tqdm | |
| self.model_trans = KeyedVectors(768) | |
| self.model_trans.init_sims(replace=True) | |
| words = list(model_class.model.key_to_index.keys()) | |
| num_batches = ( | |
| len(words) + batch_size - 1 | |
| ) // batch_size # Calculate the number of batches | |
| for batch_index in tqdm(range(num_batches)): | |
| # Get the batch of words | |
| start_index = batch_index * batch_size | |
| end_index = min(start_index + batch_size, len(words)) | |
| batch_words = words[start_index:end_index] | |
| # Encode the batch of words | |
| encoded_vectors = model_class.model_st.encode( | |
| batch_words, | |
| convert_to_tensor=True, | |
| prompt="Encuentra el valor semántico de la palabra: ", | |
| ).tolist() | |
| # # Add vectors to the model | |
| self.model_trans.add_vectors(batch_words, encoded_vectors) | |
| self.model_trans.save("config/strans_models/esp_strans_model_prompt") | |
| # Define the function to prepare the game with the selected difficulty | |
| def prepare_game(self, secret_word_used, difficulty): | |
| # Set the secret list depending on the difficulty | |
| self.secret = self.secret_list[secret_word_used] | |
| self.secret = self.secret.lower() | |
| self.init_time = time.time() | |
| # Store the secret word in the words list | |
| self.words = [self.Config.secret_word] | |
| # Store the score in the scores list | |
| self.scores = [10] | |
| # Initialize the game variables | |
| self.win = False | |
| self.n = 0 | |
| self.recent_hint = 0 | |
| self.f_dev_avg = 0 | |
| self.last_hint = -1 | |
| self.difficulty = difficulty | |
| # Set the number of hints depending on the difficulty | |
| if self.difficulty == 1: | |
| self.n = 3 | |
| # Define the function to generate the ranking file | |
| def gen_rank(self, repeated): | |
| ascending_indices = np.argsort(self.scores) | |
| descending_indices = list(ascending_indices[::-1]) | |
| self.ranking_data.clear() | |
| k = len(self.words) - 1 | |
| if repeated != -1: | |
| k = repeated | |
| self.ranking_data.append(["#" + str(k), self.words[k], self.scores[k]]) | |
| self.ranking_data.append("---------------------------") | |
| for i in descending_indices: | |
| if i == 0: | |
| continue | |
| self.ranking_data.append(["#" + str(i), self.words[i], self.scores[i]]) | |
| with open(self.data_path + self.ranking_path, "w+") as file: | |
| for item in self.ranking_data: | |
| file.write("%s\n" % item) | |
| self.ranking_msg = "" | |
| for item in self.ranking_data: | |
| self.ranking_msg += f"{item}\n" | |
| # Define the function to play the game with the selected word | |
| def play_game(self, word, model_class): | |
| # Convert the word to lowercase | |
| word = word.lower().strip() | |
| # Check if the user wants to give up | |
| if word == "give_up": | |
| text = ( | |
| "[lose]" | |
| + str(self.Config.Feedback_9) | |
| + self.secret | |
| + "\n\n" | |
| + self.Config.Feedback_10 | |
| ) | |
| return text | |
| # Check if the word is repeated | |
| if word in self.words: | |
| repeated = self.words.index(word) | |
| else: | |
| repeated = -1 | |
| self.words.append(word) | |
| # Check if the word is in the model already | |
| if word not in model_class.model.key_to_index.keys(): | |
| # If the word is not in the model, remove it from the words list and provide feedback | |
| self.words.pop(len(self.words) - 1) | |
| feedback = ( | |
| "I don't know that word. Try another word." | |
| if self.lang == 1 | |
| else "No conozco esa palabra. Prueba con otra palabra." | |
| ) | |
| feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n" | |
| return feedback | |
| similarity = model_class.model.similarity(self.secret, word) | |
| if self.model_type == "word2vec": | |
| score = np.round(similarity * 10, 2) | |
| else: | |
| # log_similarity = np.log10(similarity * 10) if np.any(similarity > 0) else 0 | |
| # score = np.round( | |
| # np.interp( | |
| # log_similarity, | |
| # [0, np.log10(10)], | |
| # [0, 10], | |
| # ), | |
| # 2, | |
| # ) | |
| score = np.round(similarity * 10, 2) | |
| # Remove the word from the score list if it is repeated | |
| if repeated == -1: | |
| self.scores.append(score) | |
| # Generate the feedback message depending on the score | |
| if score <= 2.5: | |
| feedback = self.Config.Feedback_0 + str(score) | |
| elif score > 2.5 and score <= 6.0: | |
| feedback = self.Config.Feedback_1 + str(score) | |
| elif score > 6.0 and score <= 7.0: | |
| feedback = self.Config.Feedback_2 + str(score) | |
| elif score > 7.0 and score <= 8: | |
| feedback = self.Config.Feedback_3 + str(score) | |
| elif score > 8 and score <= 9.0: | |
| feedback = self.Config.Feedback_4 + str(score) | |
| elif score > 9.0 and score < 10.0: | |
| feedback = self.Config.Feedback_5 + str(score) | |
| # If the score is 10, the user wins the game | |
| else: | |
| self.win = True | |
| feedback = "[win]" + self.Config.Feedback_8 | |
| self.words[0] = self.secret | |
| self.words.pop(len(self.words) - 1) | |
| self.scores.pop(len(self.scores) - 1) | |
| # Generate the feedback message depending on the score and the previous score | |
| if score > self.scores[len(self.scores) - 2] and self.win == False: | |
| feedback += "\n" + self.Config.Feedback_6 | |
| elif score < self.scores[len(self.scores) - 2] and self.win == False: | |
| feedback += "\n" + self.Config.Feedback_7 | |
| ## Hint generation | |
| # If the difficulty is not 4, calculate the moving average of the scores and the tendency slope | |
| if self.difficulty != 4 and len(self.scores) > 1: | |
| mov_avg = calculate_moving_average(self.scores[1:], 5) | |
| # If the moving average has more than one element and the user has not won yet, calculate the tendency slope and the moving average of the tendency slope | |
| if len(mov_avg) > 1 and self.win == False: | |
| f_dev = calculate_tendency_slope(mov_avg) | |
| f_dev_avg = calculate_moving_average(f_dev, 3) | |
| # If the tendency slope is negative and the hint has not been given recently (at least three rounds earlier), generate a hint | |
| if f_dev_avg[len(f_dev_avg) - 1] < 0 and self.recent_hint == 0: | |
| # Generate a random hint intro from the hint list | |
| i = random.randint(0, len(self.Config.hint_intro) - 1) | |
| feedback += "\n\n[hint]" + self.Config.hint_intro[i] | |
| # Generate a dynamic hint | |
| hint_text, self.n, self.last_hint = hint( | |
| self.secret, | |
| self.n, | |
| model_class.model_st, | |
| self.last_hint, | |
| self.lang, | |
| ( | |
| self.DictWrapper(self.Config_full["ENG"]["Hint"]) | |
| if self.lang == 1 | |
| else self.DictWrapper(self.Config_full["SPA"]["Hint"]) | |
| ), | |
| ) | |
| feedback += "\n" + hint_text | |
| self.recent_hint = 3 | |
| if self.recent_hint != 0: | |
| self.recent_hint -= 1 | |
| # Generate the ranking file | |
| self.gen_rank(repeated) | |
| # Add the ranking file to the feedback message | |
| feedback += "[rank]" + self.ranking_msg if len(self.words) > 1 else "\n\n" | |
| # Return the feedback message | |
| return feedback | |
| # Define the function to generate a curiosity hint once the game is over | |
| def curiosity(self): | |
| # Generate a curiosity aboyt the secret word | |
| feedback = curiosity( | |
| self.secret, | |
| ( | |
| self.DictWrapper(self.Config_full["ENG"]["Hint"]) | |
| if self.lang == 1 | |
| else self.DictWrapper(self.Config_full["SPA"]["Hint"]) | |
| ), | |
| ) | |
| # Save the ranking file with the plays of the user if the user wins | |
| with open(self.data_path + self.ranking_path, "r") as original_file: | |
| file_content = original_file.readlines()[2:] | |
| new_file_name = f"{self.session_id}-{self.secret}.json" | |
| play_data = { | |
| "session_id": self.session_id, | |
| "datetime": str(datetime.now()), | |
| "time": time.time() - self.init_time, | |
| "data": file_content, | |
| "win": self.win, | |
| "secret": self.secret, | |
| "number_of_hints": self.n, | |
| } | |
| with open(self.data_path + "plays/" + new_file_name, "w") as new_file: | |
| json.dump(play_data, new_file, indent=4) | |
| # Return the feedback message | |
| return feedback | |
| def get_session_id(self): | |
| return self.session_id | |