Spaces:
Runtime error
Runtime error
import getpass | |
import os | |
import numpy as np | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_openai import ChatOpenAI | |
from langchain.chains import create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_core.documents import Document | |
def generate_openai_response(input_prompt, system_prompt=None, temperature=0): | |
if system_prompt is None: | |
system_prompt = '''You are an assistant designed to provide answers when no (0) relevant documents are retrieved from the vector database. When this happens, you should follow these steps: | |
1) First, determine if you can answer the user's query using general knowledge or internal information. If so, generate a confident, helpful response in a straightforward narrative style. Do not use phrases such as 'According to me,' 'As of my knowledge,' 'I don’t know but,' or mention knowledge cutoffs or lack of information. Simply provide the answer as if you are certain of the facts. | |
2) If the question is domain-specific, too specific (e.g., about a particular person or object that could mislead), or outside your knowledge, do not attempt to answer. Politely respond with: 'I'm sorry, I currently do not have enough information to answer your question. | |
When asked about you, answer from the file(s): | |
- Who-is-Wagner-Chatbot-Response.docx | |
When asked about Daniel, answer from the file(s): | |
- Ringel_Daniel_CV_V1.docx | |
''' | |
llm = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"), temperature=temperature) | |
return llm(system_prompt+input_prompt).content | |
class ChatbotDataQuery: | |
def __init__(self, vector_store): | |
self.llm = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY")) | |
self.system_prompt = '''You are Wagner, a highly intelligent and friendly AI assistant. | |
You are developed to provide answers to given prompts about you, Daniel and Daniel's work. You will be given some context to generate the answer from to the user. | |
Answer in a very affirmative, concise, and clear manner. Never refuse to answer, always provide relevant information. | |
When asked about you, answer from the file(s): | |
- Who-is-Wagner-Chatbot-Response.docx | |
When asked about Daniel, answer from the file(s): | |
- Ringel_Daniel_CV_V1.docx | |
''' | |
if vector_store is None: | |
raise ValueError("Vector store cannot be None") | |
else: | |
self.vector_store = vector_store | |
def __generate_response(self, query_text, k, reranker=None, reranker_docs=0): | |
retrieved_results = self.vector_store.similarity_search_with_score( | |
query_text, k=k | |
) | |
Highest_Similarity_Score = retrieved_results[0][1] | |
Highest_Similarity_Score = np.around(Highest_Similarity_Score, decimals=1) | |
print(f'The highest similarity score is {Highest_Similarity_Score}') | |
if Highest_Similarity_Score > 0.51: | |
context_docs = [] | |
for res, score in retrieved_results: | |
context_docs.append(res) | |
context_docs_texts = [] | |
for res in context_docs: | |
context_docs_texts.append(res.page_content) | |
if (reranker is not None) and (reranker_docs > 0) and (len(context_docs_texts) > 0): | |
relevant_docs = reranker.rerank(query_text, context_docs_texts, k=reranker_docs) | |
All_Scores = [doc['score'] for doc in relevant_docs] | |
Min = min(All_Scores) | |
Max = max(All_Scores) | |
Normalized_Scores = [(doc['score'] - Min) / (Max - Min) for doc in relevant_docs] | |
for idx,doc in enumerate(relevant_docs): | |
doc['score'] = Normalized_Scores[idx] | |
final_reranked_docs = [] | |
for reranked_doc in relevant_docs: | |
idx_of_content_in_context_doc = reranked_doc['result_index'] | |
meta_data = context_docs[idx_of_content_in_context_doc].metadata | |
print(f'Reranked_Doc: {meta_data}') | |
if reranked_doc['score'] < 0.25: | |
continue | |
else: | |
idx_of_content_in_context_doc = reranked_doc['result_index'] | |
meta_data = context_docs[idx_of_content_in_context_doc].metadata | |
final_reranked_docs.append(Document(page_content=reranked_doc['content'], metadata=meta_data)) | |
context_docs = [final_reranked_docs[0]] | |
prompt = ChatPromptTemplate.from_template( | |
"You are a helpful assistant that only answers questions about the context. " | |
"You try your best to extract the relevant answers from the context. " | |
"The context is:\n\n{context}\n\n" | |
"Question: {question}\n" | |
"Helpful Answer:" | |
) | |
chain = create_stuff_documents_chain( | |
llm=self.llm, | |
prompt=prompt, | |
document_variable_name="context", | |
) | |
context = '\n\n'.join([doc.page_content for doc in context_docs]) | |
query = [ | |
("system", f"{self.system_prompt}"), | |
("human", f"context: {context}\nInput: {query_text}"), | |
] | |
response = '' | |
for chunk in self.llm.stream(query): | |
response += chunk.content | |
revised_context_docs = [] | |
for doc in context_docs: | |
if doc.metadata['source'] not in revised_context_docs: | |
revised_context_docs.append(doc.metadata['source']) | |
return response, revised_context_docs | |
else: | |
system_prompt = ''' | |
You are an intelligent assistant designed to provide clear, accurate, and helpful responses. | |
Focus on understanding user intent, give concise answers, and offer step-by-step solutions when necessary. | |
Be friendly, professional, and avoid unnecessary information.\n''' | |
input_prompt = f'Query: {query_text}' | |
response = generate_openai_response(input_prompt, system_prompt) | |
return response, 'GPT Response' | |
def query(self, query_text, k=1, reranker=None): | |
try: | |
return self.__generate_response(query_text=query_text, k=k, reranker=reranker, reranker_docs=k//2) | |
except Exception as e: | |
print(f"Failed to retrieve documents: {str(e)}") | |
return None |