"""Tools for data enrichment. This module contains functions that are directly exposed to the LLM as tools. These tools can be used for tasks such as web searching and scraping. Users can edit and extend these tools as needed. """ import json from typing import Any, Optional, cast import aiohttp from langchain_community.tools.tavily_search import TavilySearchResults from langchain_core.runnables import RunnableConfig from langchain_core.tools import InjectedToolArg from langgraph.prebuilt import InjectedState from typing_extensions import Annotated from researchgraph.configuration import Configuration from researchgraph.state import State from researchgraph.utils import init_model async def get_file_content( task_id: str, *, config: Annotated[RunnableConfig, InjectedToolArg] ) -> Optional[str]: """Fetch and process a file from the scoring system. Args: task_id: The ID of the task/file to fetch. config: Runtime configuration. Returns: Optional[str]: The content of the file if successful, None otherwise. """ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}" async with aiohttp.ClientSession() as session: async with session.get(url) as response: if response.status == 200: return await response.text() return None async def search( query: str, *, config: Annotated[RunnableConfig, InjectedToolArg] ) -> Optional[list[dict[str, Any]]]: """Query a search engine. This function queries the web to fetch comprehensive, accurate, and trusted results. It's particularly useful for answering questions about current events. Provide as much context in the query as needed to ensure high recall. """ configuration = Configuration.from_runnable_config(config) wrapped = TavilySearchResults(max_results=configuration.max_search_results) result = await wrapped.ainvoke({"query": query}) return cast(list[dict[str, Any]], result) _INFO_PROMPT = """You are doing web research on behalf of a user. You are trying to find out this information: {info} You just scraped the following website: {url} Based on the website content below, jot down some notes about the website. {content} """ async def scrape_website( url: str, *, state: Annotated[State, InjectedState], config: Annotated[RunnableConfig, InjectedToolArg], ) -> str: """Scrape and summarize content from a given URL. Returns: str: A summary of the scraped content, tailored to the extraction schema. """ async with aiohttp.ClientSession() as session: async with session.get(url) as response: content = await response.text() configuration = Configuration.from_runnable_config(config) p = _INFO_PROMPT.format( info=json.dumps(configuration.extraction_schema, indent=2), url=url, content=content[:40_000], ) raw_model = init_model(config) result = await raw_model.ainvoke(p) return str(result.content)