# inference.py from text_embeddings_runner import EmbeddingRunner def create_runner(): # Force PyTorch runner on GPU return EmbeddingRunner.from_pretrained( repo_id="magnifi/optimized-semcache-embeds-en-final", backend="pytorch", device="cuda" )