Spaces:

AseemD
/

WikipediaAgent

Sleeping

App Files Files Community

WikipediaAgent / app.py

AseemD

Update app.py

18a99a1 verified 11 months ago

raw

history blame contribute delete

5.96 kB

	import re
	import os
	import wikipediaapi
	import gradio as gr
	from groq import Groq
	from langchain_community.vectorstores import FAISS
	from langchain_openai import OpenAIEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter

	from utils.context import system_prompt

	# Agent Class
	class Agent:
	def __init__(self, client, system):
	self.client = client
	self.system = system
	self.memory = []
	# If there is no memory, initialize it with the system message
	if self.memory is not None:
	self.memory = [{"role": "system", "content": self.system}]

	def __call__(self, message=""):
	if message:
	self.memory.append({"role": "user", "content": message})
	result = self.execute()
	self.memory.append({"role": "assistant", "content": result})
	return result

	def execute(self):
	completion = client.chat.completions.create(
	messages = self.memory,
	model="llama3-70b-8192",
	)
	return completion.choices[0].message.content

	# Gloabal variables
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
	wiki = wikipediaapi.Wikipedia(language='en', user_agent="aseem" )
	embeddings = OpenAIEmbeddings()
	faiss_store = None

	# Utils/Tools for the agent
	def calculate(operation):
	return eval(operation)

	def wikipedia_search(query, advanced_query, advanced_search=False, top_k=5):
	global faiss_store
	page = wiki.page(query)

	# Check if the page exists
	if page.exists():
	if advanced_search:
	# Get the full content of the Wikipedia page
	content = page.text
	# Split the content into chunks
	chunks = chunk_text(content)
	# Store the chunks in FAISS
	faiss_store = store_in_faiss(chunks)
	# Retrieve the top-k relevant chunks
	top_k_documents = retrieve_top_k(advanced_query, top_k)
	# Return the retrieved documents
	return f"Context: {' '.join(top_k_documents)}\n"
	else:
	return f"Summary: {page.summary}\n"
	else:
	return f"The page '{query}' does not exist on Wikipedia."


	def chunk_text(text, chunk_size=512, chunk_overlap=50):
	"""
	Uses LangChain's RecursiveCharacterTextSplitter to chunk the text.
	"""
	splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
	chunks = splitter.split_text(text)
	return chunks

	def store_in_faiss(chunks):
	"""
	Stores the chunks in a FAISS vector store.
	"""
	vector_store = FAISS.from_texts(chunks, embeddings)
	return vector_store

	def retrieve_top_k(query, top_k=5):
	"""
	Retrieves the top-k most relevant chunks from FAISS.
	"""
	if faiss_store is None:
	return "No vector data available. Perform advanced search first."

	# Retrieve top-k documents
	docs_and_scores = faiss_store.similarity_search_with_score(query, top_k)
	top_k_chunks = [doc.page_content for doc, score in docs_and_scores]
	return top_k_chunks

	# Automatic execution of the agent
	def run_agent(max_iterations=10, query: str = "", display_reasoning=True):
	agent = Agent(client=client, system=system_prompt)
	tools = ["calculate", "wikipedia_search"]
	next_prompt = query
	iteration = 0
	steps = 1
	partial_results = ""

	while iteration < max_iterations:
	iteration += 1
	result = agent(next_prompt)

	if display_reasoning:
	partial_results += f" -------- (Step {steps}) -------- \n"
	steps += 1
	partial_results += result + "\n\n"
	yield partial_results

	if "Thought" in result and "Action" in result:
	action = re.findall(r"Action: ([a-z_]+): (.+)", result, re.IGNORECASE)
	chosen_tool = action[0][0]
	args = action[0][1]
	if chosen_tool in tools:
	if chosen_tool == "calculate":
	tool_result = eval(f"{chosen_tool}({'args'})")
	next_prompt = f"Observation: {tool_result}"
	else:
	tool_result = eval(f"{chosen_tool}({args})")
	next_prompt = f"Observation: {tool_result}"
	else:
	next_prompt = "Observation: Tool not found"

	if display_reasoning:
	partial_results += f" -------- (Step {steps}) -------- \n"
	steps += 1
	partial_results += next_prompt[:100] + " ..." + "\n\n"
	yield partial_results
	continue

	if "Answer" in result:
	if display_reasoning:
	yield partial_results
	else:
	partial_results += result.split("Answer:")[-1].strip()
	yield partial_results
	break

	if iteration >= max_iterations:
	partial_text += "\nThe Wikipedia AI Agent is likely hallucinating. Please try again :("
	yield partial_text

	def generate_response_stream(message, show_reasoning):
	# If show_reasoning = True, we'll show all the partial steps
	# If show_reasoning = False, we only yield the final answer
	yield from run_agent(query=message, display_reasoning=show_reasoning)

	def main():
	interface = gr.Interface(
	fn=generate_response_stream,
	inputs=[
	gr.Textbox(label="Ask your question here:"),
	gr.Checkbox(label="Show reasoning")
	],
	outputs=gr.Textbox(label="Agent Output"),
	title="Wikipedia AI Agent",
	description= (
	"Ask a question to the Wikipedia AI Agent."
	"For eg: \n"
	"- \"What is the weight of a tiger?\" \n"
	"- \"Why are fiber optic cables so fragile?\" \n"
	"- \"How does an internal combustion engine work?\" \n"
	)
	)
	interface.launch()


	if __name__ == "__main__":
	main()