import os import pickle import langchain import faiss from langchain import HuggingFaceHub from langchain.chains import ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader, UnstructuredPDFLoader, UnstructuredWordDocumentLoader, PyPDFLoader from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings from langchain.memory import ConversationBufferWindowMemory from langchain.llms.openai import OpenAI, OpenAIChat from langchain.prompts.chat import ( ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ) from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores.faiss import FAISS from langchain.cache import InMemoryCache from langchain.chat_models import ChatGooglePalm import traceback langchain.llm_cache = InMemoryCache() global model_name models = ["GPT-3.5", "Flan UL2", "GPT-4", "Flan T5", "Palm"] pickle_file = "_vs.pkl" updated_pickle_file = "_vs_updated.pkl" index_file = "_vs.index" models_folder = "models/" llm = ChatOpenAI(model_name="gpt-4", temperature=0.1) embeddings = OpenAIEmbeddings(model='text-embedding-ada-002') chat_history = [] memory = ConversationBufferWindowMemory(memory_key="chat_history", k=10) vectorstore_index = None # get name to be used in prompt from environment variable `name` name = os.environ.get("name", "Rohan") system_template = """You are ROAR, {name}'s personal assistant supposed to ANSWER QUESTIONS ON HIS BEHALF. STRICTLY FOLLOW THIS: FOR OPINIONS, PREFERENCES, EXPERIENCES,ALWAYS ANSWER IN FIRST PERSON AS IF YOU ARE {name}. ALWAYS Generate this response with experiences/opinions using {name}'s RESUME available in context/vectorstore. Response should be in professional language and tone, impressive, catchy, and grammatically correct. Use {name}'s resume and your knowledge of his experience and skills to answer questions to the best of your ability. Answer the question as if you are assisting {name} or answering on his behalf. ---------------- This activity of answering questions on {name}'s behalf will be called Roar. For example: If someone wants to ask you a question, they will say "Roar it" and you will answer the question on {name}'s behalf by generating a response using {name}'s resume and your knowledge of his experience and skills. Add a qwirky and funny line in the end to encourage the user to try more Roars as they are free. ---------------- {context} """ # append name in system template to be used in prompt system_template = system_template.format(name=name, context="{context}") messages = [ SystemMessagePromptTemplate.from_template(system_template), HumanMessagePromptTemplate.from_template("{question}"), ] CHAT_PROMPT = ChatPromptTemplate.from_messages(messages) def set_model_and_embeddings(model): global chat_history set_model(model) # set_embeddings(model) chat_history = [] def set_model(model): global llm print("Setting model to " + str(model)) if model == "GPT-3.5": print("Loading GPT-3.5") llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5) elif model == "GPT-4": print("Loading GPT-4") llm = OpenAI(model_name="gpt-4", temperature=1) elif model == "Flan UL2": print("Loading Flan-UL2") llm = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature": 0.1, "max_new_tokens":500}) elif model == "Flan T5": print("Loading Flan T5") llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1}) elif model == "Palm": llm = ChatGooglePalm(temperature=0) else: print("Loading GPT-3.5 from else") llm = OpenAI(model_name="text-davinci-002", temperature=0.1) def set_embeddings(model): global embeddings if model == "GPT-3.5" or model == "GPT-4": print("Loading OpenAI embeddings") embeddings = OpenAIEmbeddings(model='text-embedding-ada-002') elif model == "Flan UL2" or model == "Flan T5": print("Loading Hugging Face embeddings") embeddings = HuggingFaceHubEmbeddings(repo_id="sentence-transformers/all-MiniLM-L6-v2") def get_search_index(model, first_time=False): global vectorstore_index if not first_time: print("Using updated pickle file") file = updated_pickle_file else: print("Using base pickle file") file = pickle_file if os.path.isfile(get_file_path(model, file)) and os.path.isfile( get_file_path(model, index_file)) and os.path.getsize(get_file_path(model, file)) > 0: # Load index from pickle file search_index = load_index(model) else: search_index = create_index(model) vectorstore_index = search_index return search_index def load_index(model): with open(get_file_path(model, pickle_file), "rb") as f: search_index = pickle.load(f) print("Loaded index") return search_index def create_index(model): sources = fetch_data_for_embeddings() source_chunks = split_docs(sources) search_index = search_index_from_docs(source_chunks) faiss.write_index(search_index.index, get_file_path(model, index_file)) # Save index to pickle file with open(get_file_path(model, pickle_file), "wb") as f: pickle.dump(search_index, f) print("Created index") return search_index def get_file_path(model, file): # If model is GPT3.5 or GPT4 return models_folder + openai + file else return models_folder + hf + file if model == "GPT-3.5" or model == "GPT-4": return models_folder + "openai" + file elif model == "Palm": return models_folder + "palm" + file else: return models_folder + "hf" + file def search_index_from_docs(source_chunks): # print("source chunks: " + str(len(source_chunks))) # print("embeddings: " + str(embeddings)) search_index = FAISS.from_documents(source_chunks, embeddings) return search_index def get_html_files(): loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True) document_list = loader.load() return document_list def fetch_data_for_embeddings(): document_list = get_word_files() document_list.extend(get_html_files()) print("document list: " + str(len(document_list))) return document_list def get_word_files(): loader = DirectoryLoader('docs', glob="**/*.docx", loader_cls=UnstructuredWordDocumentLoader, recursive=True) document_list = loader.load() return document_list def split_docs(docs): splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0) source_chunks = splitter.split_documents(docs) print("chunks: " + str(len(source_chunks))) return source_chunks def load_documents(file_paths): # Check the type of file from the extension and load it accordingly document_list = [] for file_path in file_paths: if file_path.endswith(".txt"): loader = TextLoader(file_path) elif file_path.endswith(".docx"): loader = UnstructuredWordDocumentLoader(file_path) elif file_path.endswith(".html"): loader = UnstructuredHTMLLoader(file_path) elif file_path.endswith(".pdf"): loader = PyPDFLoader(file_path) else: print("Unsupported file type") raise Exception("Unsupported file type") docs = loader.load() document_list.extend(docs) # print("Loaded " + file_path) print("Loaded " + str(len(document_list)) + " documents") return document_list def add_to_index(docs, index, model): global vectorstore_index index.add_documents(docs) with open(get_file_path(model, updated_pickle_file), "wb") as f: pickle.dump(index, f) vectorstore_index = index print("Vetorstore index updated") return True def ingest(file_paths, model): print("Ingesting files") try: # handle txt, docx, html, pdf docs = load_documents(file_paths) split_docs(docs) add_to_index(docs, vectorstore_index, model) print("Ingestion complete") except Exception as e: traceback.print_exc() return False return True def get_qa_chain(vectorstore_index): global llm, model_name print(llm) # embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76) # compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=gpt_3_5_index.as_retriever()) retriever = vectorstore_index.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .8}) chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True, verbose=True, get_chat_history=get_chat_history, combine_docs_chain_kwargs={"prompt": CHAT_PROMPT}) return chain def get_chat_history(inputs) -> str: res = [] for human, ai in inputs: res.append(f"Human:{human}\nAI:{ai}") return "\n".join(res) def generate_answer(question) -> str: global chat_history, vectorstore_index chain = get_qa_chain(vectorstore_index) result = chain( {"question": question, "chat_history": chat_history, "vectordbkwargs": {"search_distance": 0.6}}) chat_history = [(question, result["answer"])] sources = [] print(result) for document in result['source_documents']: # sources.append(document.metadata['url']) sources.append(document.metadata['source'].split('/')[-1].split('.')[0]) print(sources) source = ',\n'.join(set(sources)) return result['answer'] + '\nSOURCES: ' + source