Spaces:
Runtime error
Runtime error
| # from langchain.document_loaders import TextLoader | |
| # import pinecone | |
| # from langchain.vectorstores import Pinecone | |
| # import os | |
| # from transformers import AutoTokenizer, AutoModel | |
| # from langchain.agents.agent_toolkits import create_conversational_retrieval_agent | |
| # from langchain.agents.agent_toolkits import create_retriever_tool | |
| # from langchain.chat_models import ChatOpenAI | |
| # import torch | |
| # from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (AgentTokenBufferMemory) | |
| # from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent | |
| # from langchain.schema.messages import SystemMessage | |
| # from langchain.prompts import MessagesPlaceholder | |
| # import gradio as gr | |
| # import time | |
| # from db_func import insert_one | |
| # from global_variable_module import gobal_input, global_output | |
| # import random | |
| # def get_bert_embeddings(sentence): | |
| # embeddings = [] | |
| # input_ids = tokenizer.encode(sentence, return_tensors="pt") | |
| # with torch.no_grad(): | |
| # output = model(input_ids) | |
| # embedding = output.last_hidden_state[:,0,:].numpy().tolist() | |
| # return embedding | |
| # model_name = "BAAI/bge-base-en-v1.5" | |
| # model = AutoModel.from_pretrained("/Users/aakashbhatnagar/Documents/masters/ophthal_llm/models/models--BAAI--bge-base-en-v1.5/snapshots/617ca489d9e86b49b8167676d8220688b99db36e") | |
| # tokenizer = AutoTokenizer.from_pretrained("/Users/aakashbhatnagar/Documents/masters/ophthal_llm/models/models--BAAI--bge-base-en-v1.5/snapshots/617ca489d9e86b49b8167676d8220688b99db36e") | |
| # prompt_file = open("prompts/version_2.txt", "r").read() | |
| # pinecone.init( | |
| # api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io | |
| # environment=os.getenv("PINECONE_ENV"), # next to api key in console | |
| # ) | |
| # index_name = "ophtal-knowledge-base" | |
| # index = pinecone.Index(index_name) | |
| # vectorstore = Pinecone(index, get_bert_embeddings, "text") | |
| # retriever = vectorstore.as_retriever() | |
| # tool = create_retriever_tool( | |
| # retriever, | |
| # "search_ophtal-knowledge-base", | |
| # "Searches and returns documents regarding the ophtal-knowledge-base.", | |
| # ) | |
| # tools = [tool] | |
| # system_message = SystemMessage(content=prompt_file) | |
| # memory_key='history' | |
| # llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4", temperature=0.2) | |
| # prompt = OpenAIFunctionsAgent.create_prompt( | |
| # system_message=system_message, | |
| # extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)], | |
| # ) | |
| # agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=False, prompt=prompt) | |
| # user_name = None | |
| # def run(input_): | |
| # output = agent_executor({"input": input_}) | |
| # output_text = output["output"] | |
| # source_text = "" | |
| # doc_text = "" | |
| # global_input = input_ | |
| # global_output = output_text | |
| # if len(output["intermediate_steps"])>0: | |
| # documents = output["intermediate_steps"][0][1] | |
| # sources = [] | |
| # docs = [] | |
| # for doc in documents: | |
| # if doc.metadata["source"] not in sources: | |
| # sources.append(doc.metadata["source"]) | |
| # docs.append(doc.page_content) | |
| # for i in range(len(sources)): | |
| # temp = sources[i].replace('.pdf', '').replace('.txt', '').replace("AAO", "").replace("2022-2023", "").replace("data/book", "").replace("text", "").replace(" ", " ") | |
| # source_text += f"{i+1}. {temp}\n" | |
| # doc_text += f"{i+1}. {docs[i]}\n" | |
| # # output_text = f"{output_text} \n\nSources: \n{source_text}\n\nDocuments: \n{doc_text}" | |
| # # output_text = f"{output_text}" | |
| # doc_to_insert = { | |
| # "user": user_name, | |
| # "input": input_, | |
| # "output": output_text, | |
| # "source": source_text, | |
| # "documents": doc_text | |
| # } | |
| # insert_one(doc_to_insert) | |
| # return output_text | |
| # def make_conversation(message, history): | |
| # text_ = run(message) | |
| # for i in range(len(text_)): | |
| # time.sleep(0.001) | |
| # yield text_[: i+1] | |
| # def auth_function(username, password): | |
| # user_name = username | |
| # return username == password | |
| # def random_response(message, accuracy, history): | |
| # print(type(message)) | |
| # print(message) | |
| # print(accuracy) | |
| # out = random.choice(["Yes", "No"]) | |
| # gobal_input = out | |
| # # open a txt file | |
| # with open("function hit", "a+") as f: | |
| # f.write(message) | |
| # return out | |
| from langchain.document_loaders import TextLoader | |
| import pinecone | |
| from langchain.vectorstores import Pinecone | |
| import os | |
| from transformers import AutoTokenizer, AutoModel | |
| from langchain.agents.agent_toolkits import create_conversational_retrieval_agent | |
| from langchain.agents.agent_toolkits import create_retriever_tool | |
| from langchain.chat_models import ChatOpenAI | |
| import torch | |
| from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (AgentTokenBufferMemory) | |
| from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent | |
| from langchain.schema.messages import SystemMessage | |
| from langchain.prompts import MessagesPlaceholder | |
| import gradio as gr | |
| import time | |
| from db_func import insert_one | |
| from langchain.agents import AgentExecutor | |
| import wordninja | |
| def clean_text(text): | |
| text = text.strip().lower() | |
| output_paragraph = ' '.join(''.join(text.split()).split(' ')) | |
| words = wordninja.split(output_paragraph) | |
| return ' '.join(words) | |
| def get_bert_embeddings(sentence): | |
| embeddings = [] | |
| input_ids = tokenizer.encode(sentence, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model(input_ids) | |
| embedding = output.last_hidden_state[:,0,:].numpy().tolist() | |
| return embedding | |
| model_name = "BAAI/bge-base-en-v1.5" | |
| model = AutoModel.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # prompt_file = open("prompts/version_2.txt", "r").read() | |
| pinecone.init( | |
| api_key=os.getenv("PINECONE_API_KEY"), # find at app.pinecone.io | |
| environment=os.getenv("PINECONE_ENV"), # next to api key in console | |
| ) | |
| index_name = "ophtal-knowledge-base" | |
| index = pinecone.Index(index_name) | |
| vectorstore = Pinecone(index, get_bert_embeddings, "text") | |
| retriever = vectorstore.as_retriever() | |
| tool = create_retriever_tool( | |
| retriever, | |
| "search_ophtal-knowledge-base", | |
| "Searches and returns documents regarding the ophtamology from ophtal-knowledge-base.", | |
| ) | |
| tools = [tool] | |
| system_message = SystemMessage(content="Your name is Dr. V AI. You are an assistant to Ophthamologists. Your task is to assist them with their questions. Communicate in mecial terms and be professional. You are working with a doctor so do not give the warning that you are an AI agent be confident.") | |
| memory_key='history' | |
| llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4", temperature=0.2) | |
| # llm = ChatOpenAI(openai_api_key="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", model="gpt-4", temperature=0.2) | |
| prompt = OpenAIFunctionsAgent.create_prompt( | |
| system_message=system_message, | |
| extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)], | |
| ) | |
| memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm, max_token_limit=4000) | |
| # agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=False, prompt=prompt, ) | |
| agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt) | |
| agent_executor = AgentExecutor( | |
| agent=agent, | |
| tools=tools, | |
| memory=memory, | |
| verbose=True, | |
| return_intermediate_steps=True, | |
| max_iterations = 2 | |
| ) | |
| # agent_executor.s | |
| user_name = None | |
| def run(input_): | |
| output = agent_executor({"input": input_}) | |
| output_text = output["output"] | |
| print(output_text) | |
| source_text = "" | |
| doc_text = "" | |
| if len(output["intermediate_steps"])>0: | |
| documents = output["intermediate_steps"][0][1] | |
| sources = [] | |
| docs = [] | |
| for doc in documents: | |
| if doc.metadata["source"] not in sources: | |
| sources.append(doc.metadata["source"]) | |
| docs.append(doc.page_content) | |
| for i in range(len(sources)): | |
| temp = sources[i].replace('.pdf', '').replace('.txt', '').replace("AAO", "").replace("2022-2023", "").replace("data/book", "").replace("text", "").replace(" ", " ") | |
| source_text += f"{i+1}. {temp}\n" | |
| doc_text += f"{i+1}. {docs[i]}\n" | |
| output_text = f"{output_text} \n\nSources: \n{source_text}\n\nDocuments: \n{doc_text}" | |
| # output_text = f"{output_text} \n\nSources: \n{source_text}" | |
| # output_text = f"{output_text}" | |
| doc_to_insert = { | |
| "user": user_name, | |
| "input": input_, | |
| "output": output_text, | |
| "source": source_text, | |
| "documents": doc_text | |
| } | |
| insert_one(doc_to_insert) | |
| return output_text | |
| def make_conversation(message, history = {}): | |
| text_ = run(message) | |
| for i in range(len(text_)): | |
| time.sleep(0.001) | |
| yield text_[: i+1] | |
| # def auth_function(username, password): | |
| # user_name = username | |
| # return username == password | |