Final_Assignment_Template

Sleeping

App Files Files Community

mjschock commited on Apr 28

Commit

e4c7240

unverified ·

1 Parent(s): 837e221

Enhance agent functionality in main_v2.py by adding WikipediaSearchTool and updating DuckDuckGoSearchTool and VisitWebpageTool parameters. Modify agent initialization to accommodate new tools and increase max results and output length. Update requirements.txt to include Wikipedia-API dependency. Refactor imports for better organization across agent modules.

Browse files

Files changed (31) hide show

agents/__init__.py +2 -6
agents/data_agent/__init__.py +1 -1
agents/data_agent/agent.py +4 -1
agents/media_agent/__init__.py +1 -1
agents/media_agent/agent.py +4 -1
agents/web_agent/__init__.py +1 -1
agents/web_agent/agent.py +5 -2
main.py +1 -1
main_v2.py +9 -4
requirements.txt +1 -0
tools/__init__.py +18 -16
tools/analyze_image/__init__.py +1 -1
tools/analyze_image/tool.py +4 -2
tools/browse_webpage/__init__.py +1 -1
tools/browse_webpage/tool.py +4 -2
tools/extract_dates/__init__.py +1 -1
tools/extract_dates/tool.py +4 -2
tools/find_in_page/__init__.py +1 -1
tools/find_in_page/tool.py +4 -2
tools/parse_csv/__init__.py +1 -1
tools/parse_csv/tool.py +5 -3
tools/perform_calculation/__init__.py +1 -1
tools/perform_calculation/tool.py +4 -2
tools/read_pdf/__init__.py +1 -1
tools/read_pdf/tool.py +2 -1
tools/web_search/__init__.py +1 -1
tools/web_search/tool.py +1 -0
tools/wiki_search/tool.py +40 -0
tools/wikipedia_rag/__init__.py +1 -1
tools/wikipedia_rag/run.py +18 -9
tools/wikipedia_rag/tool.py +18 -16

agents/__init__.py CHANGED Viewed

@@ -1,9 +1,5 @@
-from .web_agent import create_web_agent
 from .data_agent import create_data_agent
 from .media_agent import create_media_agent
-__all__ = [
-    'create_web_agent',
-    'create_data_agent',
-    'create_media_agent'
-]

 from .data_agent import create_data_agent
 from .media_agent import create_media_agent
+from .web_agent import create_web_agent
+__all__ = ["create_web_agent", "create_data_agent", "create_media_agent"]

agents/data_agent/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .agent import create_data_agent
-__all__ = ['create_data_agent']


1	from .agent import create_data_agent
2
3	+ __all__ = ["create_data_agent"]

agents/data_agent/agent.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import importlib
 import yaml
 from smolagents import CodeAgent
 from tools import parse_csv, perform_calculation
 def create_data_agent(model):
     """
     Create a specialized agent for data analysis tasks.
@@ -30,4 +33,4 @@ def create_data_agent(model):
         prompt_templates=prompt_templates,
     )
-    return data_agent

 import importlib
 import yaml
 from smolagents import CodeAgent
 from tools import parse_csv, perform_calculation
 def create_data_agent(model):
     """
     Create a specialized agent for data analysis tasks.
         prompt_templates=prompt_templates,
     )
+    return data_agent

agents/media_agent/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .agent import create_media_agent
-__all__ = ['create_media_agent']


1	from .agent import create_media_agent
2
3	+ __all__ = ["create_media_agent"]

agents/media_agent/agent.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import importlib
 import yaml
 from smolagents import CodeAgent
 from tools import analyze_image, read_pdf
 def create_media_agent(model):
     """
     Create a specialized agent for handling media (images, PDFs).
@@ -30,4 +33,4 @@ def create_media_agent(model):
         prompt_templates=prompt_templates,
     )
-    return media_agent

 import importlib
 import yaml
 from smolagents import CodeAgent
 from tools import analyze_image, read_pdf
 def create_media_agent(model):
     """
     Create a specialized agent for handling media (images, PDFs).
         prompt_templates=prompt_templates,
     )
+    return media_agent

agents/web_agent/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .agent import create_web_agent
-__all__ = ['create_web_agent']


1	from .agent import create_web_agent
2
3	+ __all__ = ["create_web_agent"]

agents/web_agent/agent.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import importlib
 import yaml
 from smolagents import CodeAgent
-from tools import web_search, browse_webpage, find_in_page, extract_dates
 def create_web_agent(model):
     """
@@ -30,4 +33,4 @@ def create_web_agent(model):
         prompt_templates=prompt_templates,
     )
-    return web_agent

 import importlib
 import yaml
 from smolagents import CodeAgent
+from tools import browse_webpage, extract_dates, find_in_page, web_search
 def create_web_agent(model):
     """
         prompt_templates=prompt_templates,
     )
+    return web_agent

main.py CHANGED Viewed

@@ -14,12 +14,12 @@ from langgraph.graph import END, START, StateGraph
 from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
 from phoenix.otel import register
 from smolagents import CodeAgent, LiteLLMModel
 from smolagents.memory import ActionStep, FinalAnswerStep
 from smolagents.monitoring import LogLevel
 from agents import create_data_analysis_agent, create_media_agent, create_web_agent
-from prompts import MANAGER_SYSTEM_PROMPT
 from tools import perform_calculation, web_search
 from utils import extract_final_answer

 from openinference.instrumentation.smolagents import SmolagentsInstrumentor
 from opentelemetry.sdk.trace.export import BatchSpanProcessor
 from phoenix.otel import register
+from prompts import MANAGER_SYSTEM_PROMPT
 from smolagents import CodeAgent, LiteLLMModel
 from smolagents.memory import ActionStep, FinalAnswerStep
 from smolagents.monitoring import LogLevel
 from agents import create_data_analysis_agent, create_media_agent, create_web_agent
 from tools import perform_calculation, web_search
 from utils import extract_final_answer

main_v2.py CHANGED Viewed

@@ -11,7 +11,11 @@ from phoenix.otel import register
 # from smolagents import CodeAgent, LiteLLMModel, LiteLLMRouterModel
 from smolagents import CodeAgent, LiteLLMModel
-from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool
 from smolagents.monitoring import LogLevel
 from agents.data_agent.agent import create_data_agent
@@ -67,8 +71,9 @@ agent = CodeAgent(
     model=model,
     prompt_templates=prompt_templates,
     tools=[
-        DuckDuckGoSearchTool(max_results=1),
-        VisitWebpageTool(max_output_length=256),
     ],
     step_callbacks=None,
     verbosity_level=LogLevel.ERROR,
@@ -81,7 +86,7 @@ def main(task: str):
     result = agent.run(
         additional_args=None,
         images=None,
-        max_steps=3,
         reset=True,
         stream=False,
         task=task,

 # from smolagents import CodeAgent, LiteLLMModel, LiteLLMRouterModel
 from smolagents import CodeAgent, LiteLLMModel
+from smolagents.default_tools import (
+    DuckDuckGoSearchTool,
+    VisitWebpageTool,
+    WikipediaSearchTool,
+)
 from smolagents.monitoring import LogLevel
 from agents.data_agent.agent import create_data_agent
     model=model,
     prompt_templates=prompt_templates,
     tools=[
+        DuckDuckGoSearchTool(max_results=3),
+        VisitWebpageTool(max_output_length=1024),
+        WikipediaSearchTool(),
     ],
     step_callbacks=None,
     verbosity_level=LogLevel.ERROR,
     result = agent.run(
         additional_args=None,
         images=None,
+        max_steps=5,
         reset=True,
         stream=False,
         task=task,

requirements.txt CHANGED Viewed

@@ -14,3 +14,4 @@ wikipedia-api>=0.8.1
 langchain>=0.1.0
 langchain-community>=0.0.10
 pandas>=2.0.0

 langchain>=0.1.0
 langchain-community>=0.0.10
 pandas>=2.0.0
+Wikipedia-API>=0.8.1

tools/__init__.py CHANGED Viewed

@@ -1,21 +1,23 @@
-from .wikipedia_rag import WikipediaRAGTool
-from .web_search import web_search
-from .browse_webpage import browse_webpage
 from .analyze_image import analyze_image
-from .read_pdf import read_pdf
-from .parse_csv import parse_csv
-from .find_in_page import find_in_page
 from .extract_dates import extract_dates
 from .perform_calculation import perform_calculation
 __all__ = [
-    'WikipediaRAGTool',
-    'web_search',
-    'browse_webpage',
-    'analyze_image',
-    'read_pdf',
-    'parse_csv',
-    'find_in_page',
-    'extract_dates',
-    'perform_calculation'
-]

 from .analyze_image import analyze_image
+from .browse_webpage import browse_webpage
 from .extract_dates import extract_dates
+from .find_in_page import find_in_page
+from .parse_csv import parse_csv
 from .perform_calculation import perform_calculation
+from .read_pdf import read_pdf
+from .web_search import web_search
+from .wiki_search.tool import wiki
+from .wikipedia_rag import WikipediaRAGTool
 __all__ = [
+    "WikipediaRAGTool",
+    "web_search",
+    "browse_webpage",
+    "analyze_image",
+    "read_pdf",
+    "parse_csv",
+    "find_in_page",
+    "extract_dates",
+    "perform_calculation",
+    "wiki",
+]

tools/analyze_image/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import analyze_image
-__all__ = ['analyze_image']


1	from .tool import analyze_image
2
3	+ __all__ = ["analyze_image"]

tools/analyze_image/tool.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import io
-from typing import Dict, Any
 import requests
 from PIL import Image
 from smolagents import tool
 @tool
 def analyze_image(image_url: str) -> Dict[str, Any]:
     """
@@ -36,4 +38,4 @@ def analyze_image(image_url: str) -> Dict[str, Any]:
             "aspect_ratio": width / height,
         }
     except Exception as e:
-        return {"error": str(e)}

 import io
+from typing import Any, Dict
 import requests
 from PIL import Image
 from smolagents import tool
 @tool
 def analyze_image(image_url: str) -> Dict[str, Any]:
     """
             "aspect_ratio": width / height,
         }
     except Exception as e:
+        return {"error": str(e)}

tools/browse_webpage/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import browse_webpage
-__all__ = ['browse_webpage']


1	from .tool import browse_webpage
2
3	+ __all__ = ["browse_webpage"]

tools/browse_webpage/tool.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from typing import Dict, Any
 import requests
 from bs4 import BeautifulSoup
 from smolagents import tool
 @tool
 def browse_webpage(url: str) -> Dict[str, Any]:
     """
@@ -40,4 +42,4 @@ def browse_webpage(url: str) -> Dict[str, Any]:
         return {"title": title, "content": text_content, "links": links}
     except Exception as e:
-        return {"error": str(e)}

+from typing import Any, Dict
 import requests
 from bs4 import BeautifulSoup
 from smolagents import tool
 @tool
 def browse_webpage(url: str) -> Dict[str, Any]:
     """
         return {"title": title, "content": text_content, "links": links}
     except Exception as e:
+        return {"error": str(e)}

tools/extract_dates/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import extract_dates
-__all__ = ['extract_dates']


1	from .tool import extract_dates
2
3	+ __all__ = ["extract_dates"]

tools/extract_dates/tool.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from typing import List
 import re
 from smolagents import tool
 @tool
 def extract_dates(text: str) -> List[str]:
     """
@@ -27,4 +29,4 @@ def extract_dates(text: str) -> List[str]:
         matches = re.findall(pattern, text, re.IGNORECASE)
         results.extend(matches)
-    return results

 import re
+from typing import List
 from smolagents import tool
 @tool
 def extract_dates(text: str) -> List[str]:
     """
         matches = re.findall(pattern, text, re.IGNORECASE)
         results.extend(matches)
+    return results

tools/find_in_page/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import find_in_page
-__all__ = ['find_in_page']


1	from .tool import find_in_page
2
3	+ __all__ = ["find_in_page"]

tools/find_in_page/tool.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from typing import List, Dict, Any
 import re
 from smolagents import tool
 @tool
 def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]:
     """
@@ -25,4 +27,4 @@ def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]:
             if query.lower() in sentence.lower():
                 results.append(sentence)
-    return results

 import re
+from typing import Any, Dict, List
 from smolagents import tool
 @tool
 def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]:
     """
             if query.lower() in sentence.lower():
                 results.append(sentence)
+    return results

tools/parse_csv/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import parse_csv
-__all__ = ['parse_csv']


1	from .tool import parse_csv
2
3	+ __all__ = ["parse_csv"]

tools/parse_csv/tool.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import io
-from typing import Dict, Any
-import requests
 import pandas as pd
 from smolagents import tool
 @tool
 def parse_csv(csv_url: str) -> Dict[str, Any]:
     """
@@ -35,4 +37,4 @@ def parse_csv(csv_url: str) -> Dict[str, Any]:
             "column_dtypes": {col: str(df[col].dtype) for col in columns},
         }
     except Exception as e:
-        return {"error": str(e)}

 import io
+from typing import Any, Dict
 import pandas as pd
+import requests
 from smolagents import tool
 @tool
 def parse_csv(csv_url: str) -> Dict[str, Any]:
     """
             "column_dtypes": {col: str(df[col].dtype) for col in columns},
         }
     except Exception as e:
+        return {"error": str(e)}

tools/perform_calculation/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import perform_calculation
-__all__ = ['perform_calculation']


1	from .tool import perform_calculation
2
3	+ __all__ = ["perform_calculation"]

tools/perform_calculation/tool.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from typing import Dict, Any
 import math
 from smolagents import tool
 @tool
 def perform_calculation(expression: str) -> Dict[str, Any]:
     """
@@ -35,4 +37,4 @@ def perform_calculation(expression: str) -> Dict[str, Any]:
         return {"result": result}
     except Exception as e:
-        return {"error": str(e)}

 import math
+from typing import Any, Dict
 from smolagents import tool
 @tool
 def perform_calculation(expression: str) -> Dict[str, Any]:
     """
         return {"result": result}
     except Exception as e:
+        return {"error": str(e)}

tools/read_pdf/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import read_pdf
-__all__ = ['read_pdf']


1	from .tool import read_pdf
2
3	+ __all__ = ["read_pdf"]

tools/read_pdf/tool.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import requests
 from smolagents import tool
 @tool
 def read_pdf(pdf_url: str) -> str:
     """
@@ -21,4 +22,4 @@ def read_pdf(pdf_url: str) -> str:
         # such as PyPDF2, pdfplumber, or pdf2text
         return "PDF content extraction would happen here in a real implementation"
     except Exception as e:
-        return f"Error: {str(e)}"

 import requests
 from smolagents import tool
 @tool
 def read_pdf(pdf_url: str) -> str:
     """
         # such as PyPDF2, pdfplumber, or pdf2text
         return "PDF content extraction would happen here in a real implementation"
     except Exception as e:
+        return f"Error: {str(e)}"

tools/web_search/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import web_search
-__all__ = ['web_search']


1	from .tool import web_search
2
3	+ __all__ = ["web_search"]

tools/web_search/tool.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from smolagents import tool
 from smolagents.default_tools import DuckDuckGoSearchTool
 @tool
 def web_search(query: str) -> str:
     """

 from smolagents import tool
 from smolagents.default_tools import DuckDuckGoSearchTool
 @tool
 def web_search(query: str) -> str:
     """

tools/wiki_search/tool.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import wikipediaapi
+from smolagents import tool
+@tool
+def wiki(query: str) -> str:
+    """
+    Search and retrieve information from Wikipedia using the Wikipedia-API library.
+    Args:
+        query: The search query to look up on Wikipedia
+    Returns:
+        A string containing the Wikipedia page summary and relevant sections
+    """
+    # Initialize Wikipedia API with a user agent
+    wiki_wiki = wikipediaapi.Wikipedia(
+        user_agent="HF-Agents-Course (https://huggingface.co/courses/agents-course)",
+        language="en",
+    )
+    # Search for the page
+    page = wiki_wiki.page(query)
+    if not page.exists():
+        return f"No Wikipedia page found for query: {query}"
+    # Get the page summary
+    result = f"Title: {page.title}\n\n"
+    result += f"Summary: {page.summary}\n\n"
+    # Add the first few sections if they exist
+    if page.sections:
+        result += "Sections:\n"
+        for section in page.sections[
+            :3
+        ]:  # Limit to first 3 sections to avoid too much text
+            result += f"\n{section.title}:\n{section.text[:500]}...\n"
+    return result

tools/wikipedia_rag/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .tool import WikipediaRAGTool
-__all__ = ['WikipediaRAGTool']


1	from .tool import WikipediaRAGTool
2
3	+ __all__ = ["WikipediaRAGTool"]

tools/wikipedia_rag/run.py CHANGED Viewed

@@ -1,22 +1,30 @@
-import os
 import argparse
 from dotenv import load_dotenv
 from tool import WikipediaRAGTool
 def main():
     # Load environment variables
     load_dotenv()
     # Set up argument parser
-    parser = argparse.ArgumentParser(description='Run Wikipedia RAG Tool')
-    parser.add_argument('--query', type=str, required=True, help='Search query for Wikipedia articles')
-    parser.add_argument('--dataset-path', type=str, default='wikipedia-structured-contents',
-                      help='Path to the Wikipedia dataset')
     args = parser.parse_args()
     # Initialize the tool
     tool = WikipediaRAGTool(dataset_path=args.dataset_path)
     # Run the query
     print(f"\nQuery: {args.query}")
     print("-" * 50)
@@ -24,5 +32,6 @@ def main():
     print(f"Result: {result}")
     print("-" * 50)
 if __name__ == "__main__":
-    main()

 import argparse
+import os
 from dotenv import load_dotenv
 from tool import WikipediaRAGTool
 def main():
     # Load environment variables
     load_dotenv()
     # Set up argument parser
+    parser = argparse.ArgumentParser(description="Run Wikipedia RAG Tool")
+    parser.add_argument(
+        "--query", type=str, required=True, help="Search query for Wikipedia articles"
+    )
+    parser.add_argument(
+        "--dataset-path",
+        type=str,
+        default="wikipedia-structured-contents",
+        help="Path to the Wikipedia dataset",
+    )
     args = parser.parse_args()
     # Initialize the tool
     tool = WikipediaRAGTool(dataset_path=args.dataset_path)
     # Run the query
     print(f"\nQuery: {args.query}")
     print("-" * 50)
     print(f"Result: {result}")
     print("-" * 50)
 if __name__ == "__main__":
+    main()

tools/wikipedia_rag/tool.py CHANGED Viewed

@@ -1,17 +1,19 @@
 import os
-import pandas as pd
 from typing import List, Optional
 from langchain.docstore.document import Document
 from langchain_community.retrievers import BM25Retriever
 from smolagents import Tool
 class WikipediaRAGTool(Tool):
     name = "wikipedia_rag"
     description = "Retrieves relevant information from Wikipedia articles using RAG."
     inputs = {
         "query": {
             "type": "string",
-            "description": "The search query to find relevant Wikipedia content."
         }
     }
     output_type = "string"
@@ -27,24 +29,24 @@ class WikipediaRAGTool(Tool):
         try:
             # Load the dataset
             df = pd.read_csv(os.path.join(self.dataset_path, "wikipedia_articles.csv"))
             # Convert each article into a Document
             self.docs = [
                 Document(
                     page_content=f"Title: {row['title']}\n\nContent: {row['content']}",
                     metadata={
-                        "title": row['title'],
-                        "url": row['url'],
-                        "category": row.get('category', '')
-                    }
                 )
                 for _, row in df.iterrows()
             ]
             # Initialize the retriever
             self.retriever = BM25Retriever.from_documents(self.docs)
             self.is_initialized = True
         except Exception as e:
             print(f"Error loading documents: {e}")
             raise
@@ -53,17 +55,17 @@ class WikipediaRAGTool(Tool):
         """Process the query and return relevant Wikipedia content."""
         if not self.is_initialized:
             self._load_documents()
         if not self.retriever:
             return "Error: Retriever not initialized properly."
         try:
             # Get relevant documents
             results = self.retriever.get_relevant_documents(query)
             if not results:
                 return "No relevant Wikipedia articles found."
             # Format the results
             formatted_results = []
             for doc in results[:3]:  # Return top 3 most relevant results
@@ -74,8 +76,8 @@ class WikipediaRAGTool(Tool):
                     f"Category: {metadata['category']}\n"
                     f"Content: {doc.page_content[:500]}...\n"
                 )
             return "\n\n".join(formatted_results)
         except Exception as e:
-            return f"Error retrieving information: {str(e)}"

 import os
 from typing import List, Optional
+import pandas as pd
 from langchain.docstore.document import Document
 from langchain_community.retrievers import BM25Retriever
 from smolagents import Tool
 class WikipediaRAGTool(Tool):
     name = "wikipedia_rag"
     description = "Retrieves relevant information from Wikipedia articles using RAG."
     inputs = {
         "query": {
             "type": "string",
+            "description": "The search query to find relevant Wikipedia content.",
         }
     }
     output_type = "string"
         try:
             # Load the dataset
             df = pd.read_csv(os.path.join(self.dataset_path, "wikipedia_articles.csv"))
             # Convert each article into a Document
             self.docs = [
                 Document(
                     page_content=f"Title: {row['title']}\n\nContent: {row['content']}",
                     metadata={
+                        "title": row["title"],
+                        "url": row["url"],
+                        "category": row.get("category", ""),
+                    },
                 )
                 for _, row in df.iterrows()
             ]
             # Initialize the retriever
             self.retriever = BM25Retriever.from_documents(self.docs)
             self.is_initialized = True
         except Exception as e:
             print(f"Error loading documents: {e}")
             raise
         """Process the query and return relevant Wikipedia content."""
         if not self.is_initialized:
             self._load_documents()
         if not self.retriever:
             return "Error: Retriever not initialized properly."
         try:
             # Get relevant documents
             results = self.retriever.get_relevant_documents(query)
             if not results:
                 return "No relevant Wikipedia articles found."
             # Format the results
             formatted_results = []
             for doc in results[:3]:  # Return top 3 most relevant results
                     f"Category: {metadata['category']}\n"
                     f"Content: {doc.page_content[:500]}...\n"
                 )
             return "\n\n".join(formatted_results)
         except Exception as e:
+            return f"Error retrieving information: {str(e)}"