| import asyncio | |
| from infinity_emb import AsyncEngineArray, EngineArgs, AsyncEmbeddingEngine | |
| query = "中国的首都是哪里?" # "What is the capital of China?" | |
| docs = ["beijing", "shanghai"] # "北京", "上海" | |
| INSTRUCTION = "Query:" | |
| query = f"{INSTRUCTION} {query}" | |
| array = AsyncEngineArray.from_args( | |
| [EngineArgs(model_name_or_path = "OpenBMB/MiniCPM-Reranker-Light", engine="torch", dtype="float16", bettertransformer=False, trust_remote_code=True, model_warmup=False)] | |
| ) | |
| async def rerank(engine: AsyncEmbeddingEngine): | |
| async with engine: | |
| ranking, usage = await engine.rerank(query=query, docs=docs) | |
| print(list(zip(ranking, docs))) | |
| asyncio.run(rerank(array[0])) # [(RerankReturnType(relevance_score=0.017917344, document='beijing', index=0), 'beijing'), (RerankReturnType(relevance_score=0.00024729347, document='shanghai', index=1), 'shanghai')] |