Spaces:

actualbrain
/

agent-course-assesment

Sleeping

App Files Files Community

actualbrain commited on Apr 24

Commit

3c0a133

1 Parent(s): 81917a3

score-45, gpt-4.1

Browse files

Files changed (13) hide show

.gitignore +6 -0
.python-version +1 -0
app.py +73 -37
pyproject.toml +15 -0
requirements.txt +305 -2
researchgraph/configuration.py +71 -0
researchgraph/graph.py +257 -0
researchgraph/prompts.py +27 -0
researchgraph/schema.py +10 -0
researchgraph/state.py +88 -0
researchgraph/tools.py +94 -0
researchgraph/utils.py +34 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+.lock
+__pycache__/
+.venv/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

app.py CHANGED Viewed

@@ -1,34 +1,48 @@
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -38,13 +52,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -55,16 +69,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -80,22 +94,41 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
@@ -162,20 +195,19 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +215,18 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
 import pandas as pd
+from dotenv import load_dotenv
+from researchgraph.graph import researchgraph
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+ENV_FILE = ".env"
+if os.path.exists(ENV_FILE):
+    load_dotenv(ENV_FILE)
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
+    async def __call__(self, question: str, task_id: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        research_result = await researchgraph.ainvoke(
+            {"question": question, "task_id": task_id}
+        )
+        result = research_result.get("info", {})
+        answer = result.get("result", "No answer found.")
+        print(f"Agent returning answer: {answer}")
+        return answer
+async def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = await agent(question_text, task_id)
+            answers_payload.append(
+                {"task_id": task_id, "submitted_answer": submitted_answer}
+            )
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": submitted_answer,
+                }
+            )
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": f"AGENT ERROR: {e}",
+                }
+            )
+    # ... rest of function remains the same ...
+    # 4. Prepare Submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False
+    )
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(
+            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
+        )
     else:
+        print(
+            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
+        )
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.queue().launch(debug=True, share=False)  # Added queue() for async support

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "agent-course-final-assesment"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "aiohttp>=3.11.18",
+    "gradio[oauth]>=5.26.0",
+    "langchain>=0.3.24",
+    "langchain-community>=0.3.22",
+    "langchain-core>=0.3.55",
+    "langchain-openai>=0.3.14",
+    "langgraph>=0.3.34",
+]

requirements.txt CHANGED Viewed

@@ -1,2 +1,305 @@
-gradio
-requests

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml --output-file requirements.txt
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.11.18
+    # via
+    #   agent-course-final-assesment (pyproject.toml)
+    #   langchain-community
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   gradio
+    #   httpx
+    #   openai
+    #   starlette
+attrs==25.3.0
+    # via aiohttp
+authlib==1.5.2
+    # via gradio
+certifi==2025.1.31
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via cryptography
+charset-normalizer==3.4.1
+    # via requests
+click==8.1.8
+    # via
+    #   typer
+    #   uvicorn
+colorama==0.4.6
+    # via
+    #   click
+    #   tqdm
+cryptography==44.0.2
+    # via authlib
+dataclasses-json==0.6.7
+    # via langchain-community
+distro==1.9.0
+    # via openai
+fastapi==0.115.12
+    # via gradio
+ffmpy==0.5.0
+    # via gradio
+filelock==3.18.0
+    # via huggingface-hub
+frozenlist==1.6.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2025.3.2
+    # via
+    #   gradio-client
+    #   huggingface-hub
+gradio==5.26.0
+    # via agent-course-final-assesment (pyproject.toml)
+gradio-client==1.9.0
+    # via gradio
+greenlet==3.2.1
+    # via sqlalchemy
+groovy==0.1.2
+    # via gradio
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.8
+    # via httpx
+httpx==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+    #   langgraph-sdk
+    #   langsmith
+    #   openai
+    #   safehttpx
+httpx-sse==0.4.0
+    # via langchain-community
+huggingface-hub==0.30.2
+    # via
+    #   gradio
+    #   gradio-client
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+itsdangerous==2.2.0
+    # via gradio
+jinja2==3.1.6
+    # via gradio
+jiter==0.9.0
+    # via openai
+jsonpatch==1.33
+    # via langchain-core
+jsonpointer==3.0.0
+    # via jsonpatch
+langchain==0.3.24
+    # via
+    #   agent-course-final-assesment (pyproject.toml)
+    #   langchain-community
+langchain-community==0.3.22
+    # via agent-course-final-assesment (pyproject.toml)
+langchain-core==0.3.56
+    # via
+    #   agent-course-final-assesment (pyproject.toml)
+    #   langchain
+    #   langchain-community
+    #   langchain-openai
+    #   langchain-text-splitters
+    #   langgraph
+    #   langgraph-checkpoint
+    #   langgraph-prebuilt
+langchain-openai==0.3.14
+    # via agent-course-final-assesment (pyproject.toml)
+langchain-text-splitters==0.3.8
+    # via langchain
+langgraph==0.3.34
+    # via agent-course-final-assesment (pyproject.toml)
+langgraph-checkpoint==2.0.24
+    # via
+    #   langgraph
+    #   langgraph-prebuilt
+langgraph-prebuilt==0.1.8
+    # via langgraph
+langgraph-sdk==0.1.63
+    # via langgraph
+langsmith==0.3.33
+    # via
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+marshmallow==3.26.1
+    # via dataclasses-json
+mdurl==0.1.2
+    # via markdown-it-py
+multidict==6.4.3
+    # via
+    #   aiohttp
+    #   yarl
+mypy-extensions==1.1.0
+    # via typing-inspect
+numpy==2.2.5
+    # via
+    #   gradio
+    #   langchain-community
+    #   pandas
+openai==1.76.0
+    # via langchain-openai
+orjson==3.10.16
+    # via
+    #   gradio
+    #   langgraph-sdk
+    #   langsmith
+ormsgpack==1.9.1
+    # via langgraph-checkpoint
+packaging==24.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   langchain-core
+    #   langsmith
+    #   marshmallow
+pandas==2.2.3
+    # via gradio
+pillow==11.2.1
+    # via gradio
+propcache==0.3.1
+    # via
+    #   aiohttp
+    #   yarl
+pycparser==2.22
+    # via cffi
+pydantic==2.11.3
+    # via
+    #   fastapi
+    #   gradio
+    #   langchain
+    #   langchain-core
+    #   langsmith
+    #   openai
+    #   pydantic-settings
+pydantic-core==2.33.1
+    # via pydantic
+pydantic-settings==2.9.1
+    # via langchain-community
+pydub==0.25.1
+    # via gradio
+pygments==2.19.1
+    # via rich
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.1.0
+    # via pydantic-settings
+python-multipart==0.0.20
+    # via gradio
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   gradio
+    #   huggingface-hub
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+regex==2024.11.6
+    # via tiktoken
+requests==2.32.3
+    # via
+    #   huggingface-hub
+    #   langchain
+    #   langchain-community
+    #   langsmith
+    #   requests-toolbelt
+    #   tiktoken
+requests-toolbelt==1.0.0
+    # via langsmith
+rich==14.0.0
+    # via typer
+ruff==0.11.7
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   openai
+sqlalchemy==2.0.40
+    # via
+    #   langchain
+    #   langchain-community
+starlette==0.46.2
+    # via
+    #   fastapi
+    #   gradio
+tenacity==9.1.2
+    # via
+    #   langchain-community
+    #   langchain-core
+tiktoken==0.9.0
+    # via langchain-openai
+tomlkit==0.13.2
+    # via gradio
+tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   openai
+typer==0.15.2
+    # via gradio
+typing-extensions==4.13.2
+    # via
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   langchain-core
+    #   openai
+    #   pydantic
+    #   pydantic-core
+    #   sqlalchemy
+    #   typer
+    #   typing-inspect
+    #   typing-inspection
+typing-inspect==0.9.0
+    # via dataclasses-json
+typing-inspection==0.4.0
+    # via
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.2
+    # via pandas
+urllib3==2.4.0
+    # via requests
+uvicorn==0.34.2
+    # via gradio
+websockets==15.0.1
+    # via gradio-client
+xxhash==3.5.0
+    # via langgraph
+yarl==1.20.0
+    # via aiohttp
+zstandard==0.23.0
+    # via langsmith

researchgraph/configuration.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""Define the configurable parameters for the agent."""
+from __future__ import annotations
+from dataclasses import dataclass, field, fields
+from typing import Annotated, Optional
+from langchain_core.runnables import RunnableConfig, ensure_config
+from researchgraph import prompts
+from researchgraph import schema
+@dataclass(kw_only=True)
+class Configuration:
+    """The configuration for the agent."""
+    model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field(
+        default="openai/gpt-4.1",
+        metadata={
+            "description": "The name of the language model to use for the agent. "
+            "Should be in the form: provider/model-name."
+        },
+    )
+    prompt: str = field(
+        default=prompts.MAIN_PROMPT,
+        metadata={
+            "description": "The main prompt template to use for the agent's interactions. "
+            "Expects two f-string arguments: {info} and {question}."
+        },
+    )
+    extraction_schema: dict = field(
+        default_factory=lambda: schema.extraction_schema,
+        metadata={
+            "description": "The schema to use for extracting information from the agent's responses. "
+            "Should be a valid JSON schema."
+        },
+    )
+    max_search_results: int = field(
+        default=25,
+        metadata={
+            "description": "The maximum number of search results to return for each search query."
+        },
+    )
+    max_info_tool_calls: int = field(
+        default=25,
+        metadata={
+            "description": "The maximum number of times the Info tool can be called during a single interaction."
+        },
+    )
+    max_loops: int = field(
+        default=25,
+        metadata={
+            "description": "The maximum number of interaction loops allowed before the agent terminates."
+        },
+    )
+    @classmethod
+    def from_runnable_config(
+        cls, config: Optional[RunnableConfig] = None
+    ) -> Configuration:
+        """Load configuration w/ defaults for the given invocation."""
+        config = ensure_config(config)
+        configurable = config.get("configurable") or {}
+        _fields = {f.name for f in fields(cls) if f.init}
+        return cls(**{k: v for k, v in configurable.items() if k in _fields})

researchgraph/graph.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""Define a data enrichment agent.
+Works with a chat model with tool calling support.
+"""
+import json
+from typing import Any, Dict, List, Literal, Optional, cast
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
+from langchain_core.runnables import RunnableConfig
+from langgraph.graph import StateGraph
+from langgraph.prebuilt import ToolNode
+from pydantic import BaseModel, Field
+from researchgraph import prompts
+from researchgraph.configuration import Configuration
+from researchgraph.state import InputState, OutputState, State
+from researchgraph.tools import scrape_website, search, get_file_content
+from researchgraph.utils import init_model
+async def call_agent_model(
+    state: State, *, config: Optional[RunnableConfig] = None
+) -> Dict[str, Any]:
+    """Call the primary Language Model (LLM) to decide on the next research action.
+    This asynchronous function performs the following steps:
+    1. Initializes configuration and sets up the 'Info' tool, which is the user-defined extraction schema.
+    2. Prepares the prompt and message history for the LLM.
+    3. Initializes and configures the LLM with available tools.
+    4. Invokes the LLM and processes its response.
+    5. Handles the LLM's decision to either continue research or submit final info.
+    """
+    # Load configuration from the provided RunnableConfig
+    configuration = Configuration.from_runnable_config(config)
+    # Define the 'Info' tool, which is the user-defined extraction schema
+    info_tool = {
+        "name": "Info",
+        "description": "Call this when you have gathered all the relevant info",
+        "parameters": configuration.extraction_schema,
+    }
+    # Define the GetFile tool
+    get_file_tool = {
+        "name": "GetFile",
+        "description": "Fetch content from the scoring system for a given task ID",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "task_id": {
+                    "type": "string",
+                    "description": "The ID of the task/file to fetch",
+                }
+            },
+            "required": ["task_id"],
+        },
+    }
+    # Format the prompt defined in prompts.py with the extraction schema, question and task_id
+    p = configuration.prompt.format(
+        info=json.dumps(configuration.extraction_schema, indent=2),
+        question=state.question,
+        task_id=state.task_id,
+    )
+    # Create the messages list with the formatted prompt and the previous messages
+    messages = [HumanMessage(content=p)] + state.messages
+    # Initialize the raw model with the provided configuration and bind the tools
+    raw_model = init_model(config)
+    model = raw_model.bind_tools(
+        [scrape_website, search, get_file_content, info_tool, get_file_tool],
+        tool_choice="any",
+    )
+    response = cast(AIMessage, await model.ainvoke(messages))
+    # Initialize info to None
+    info = None
+    # Check if the response has tool calls
+    if response.tool_calls:
+        for tool_call in response.tool_calls:
+            if tool_call["name"] == "Info":
+                info = tool_call["args"]
+                break
+    if info is not None:
+        # The agent is submitting their answer;
+        # ensure it isn't erroneously attempting to simultaneously perform research
+        response.tool_calls = [
+            next(tc for tc in response.tool_calls if tc["name"] == "Info")
+        ]
+    response_messages: List[BaseMessage] = [response]
+    if not response.tool_calls:  # If LLM didn't respect the tool_choice
+        response_messages.append(
+            HumanMessage(content="Please respond by calling one of the provided tools.")
+        )
+    return {
+        "messages": response_messages,
+        "info": info,
+        # Add 1 to the step count
+        "loop_step": 1,
+    }
+class InfoIsSatisfactory(BaseModel):
+    """Validate whether the current extracted info is satisfactory and complete."""
+    reason: List[str] = Field(
+        description="First, provide reasoning for why this is either good or bad as a final result. Must include at least 3 reasons."
+    )
+    is_satisfactory: bool = Field(
+        description="After providing your reasoning, provide a value indicating whether the result is satisfactory. If not, you will continue researching."
+    )
+    improvement_instructions: Optional[str] = Field(
+        description="If the result is not satisfactory, provide clear and specific instructions on what needs to be improved or added to make the information satisfactory."
+        " This should include details on missing information, areas that need more depth, or specific aspects to focus on in further research.",
+        default=None,
+    )
+async def reflect(
+    state: State, *, config: Optional[RunnableConfig] = None
+) -> Dict[str, Any]:
+    """Validate the quality of the data enrichment agent's output.
+    This asynchronous function performs the following steps:
+    1. Prepares the initial prompt using the main prompt template.
+    2. Constructs a message history for the model.
+    3. Prepares a checker prompt to evaluate the presumed info.
+    4. Initializes and configures a language model with structured output.
+    5. Invokes the model to assess the quality of the gathered information.
+    6. Processes the model's response and determines if the info is satisfactory.
+    """
+    configuration = Configuration.from_runnable_config(config)
+    p = prompts.MAIN_PROMPT.format(
+        info=json.dumps(configuration.extraction_schema, indent=2),
+        question=state.question,
+        task_id=state.task_id,
+    )
+    last_message = state.messages[-1]
+    if not isinstance(last_message, AIMessage):
+        raise ValueError(
+            f"{reflect.__name__} expects the last message in the state to be an AI message with tool calls."
+            f" Got: {type(last_message)}"
+        )
+    messages = [HumanMessage(content=p)] + state.messages[:-1]
+    presumed_info = state.info
+    checker_prompt = """I am thinking of calling the info tool with the info below. \
+Is this good? Give your reasoning as well. \
+You can encourage the Assistant to look at specific URLs if that seems relevant, or do more searches.
+If you don't think it is good, you should be very specific about what could be improved.
+{presumed_info}"""
+    p1 = checker_prompt.format(presumed_info=json.dumps(presumed_info or {}, indent=2))
+    messages.append(HumanMessage(content=p1))
+    raw_model = init_model(config)
+    bound_model = raw_model.with_structured_output(InfoIsSatisfactory)
+    response = cast(InfoIsSatisfactory, await bound_model.ainvoke(messages))
+    if response.is_satisfactory and presumed_info:
+        return {
+            "info": presumed_info,
+            "messages": [
+                ToolMessage(
+                    tool_call_id=last_message.tool_calls[0]["id"],
+                    content="\n".join(response.reason),
+                    name="Info",
+                    additional_kwargs={"artifact": response.model_dump()},
+                    status="success",
+                )
+            ],
+        }
+    else:
+        return {
+            "messages": [
+                ToolMessage(
+                    tool_call_id=last_message.tool_calls[0]["id"],
+                    content=f"Unsatisfactory response:\n{response.improvement_instructions}",
+                    name="Info",
+                    additional_kwargs={"artifact": response.model_dump()},
+                    status="error",
+                )
+            ]
+        }
+def route_after_agent(
+    state: State,
+) -> Literal["reflect", "tools", "call_agent_model", "__end__"]:
+    """Schedule the next node after the agent's action.
+    This function determines the next step in the research process based on the
+    last message in the state. It handles three main scenarios:
+    1. Error recovery: If the last message is unexpectedly not an AIMessage.
+    2. Info submission: If the agent has called the "Info" tool to submit findings.
+    3. Continued research: If the agent has called any other tool.
+    """
+    last_message = state.messages[-1]
+    # "If for some reason the last message is not an AIMessage (due to a bug or unexpected behavior elsewhere in the code),
+    # it ensures the system doesn't crash but instead tries to recover by calling the agent model again.
+    if not isinstance(last_message, AIMessage):
+        return "call_agent_model"
+    # If the "Info" tool was called, then the model provided its extraction output. Reflect on the result
+    if last_message.tool_calls and last_message.tool_calls[0]["name"] == "Info":
+        return "reflect"
+    # The last message is a tool call that is not "Info" (extraction output)
+    else:
+        return "tools"
+def route_after_checker(
+    state: State, config: RunnableConfig
+) -> Literal["__end__", "call_agent_model"]:
+    """Schedule the next node after the checker's evaluation.
+    This function determines whether to continue the research process or end it
+    based on the checker's evaluation and the current state of the research.
+    """
+    configurable = Configuration.from_runnable_config(config)
+    last_message = state.messages[-1]
+    if state.loop_step < configurable.max_loops:
+        if not state.info:
+            return "call_agent_model"
+        if not isinstance(last_message, ToolMessage):
+            raise ValueError(
+                f"{route_after_checker.__name__} expected a tool messages. Received: {type(last_message)}."
+            )
+        if last_message.status == "error":
+            # Research deemed unsatisfactory
+            return "call_agent_model"
+        # It's great!
+        return "__end__"
+    else:
+        return "__end__"
+# Create the researcher graph
+researcher_workflow = StateGraph(
+    State, input=InputState, output=OutputState, config_schema=Configuration
+)
+researcher_workflow.add_node(call_agent_model)
+researcher_workflow.add_node(reflect)
+researcher_workflow.add_node(
+    "tools", ToolNode([search, scrape_website, get_file_content])
+)
+researcher_workflow.add_edge("__start__", "call_agent_model")
+researcher_workflow.add_conditional_edges("call_agent_model", route_after_agent)
+researcher_workflow.add_edge("tools", "call_agent_model")
+researcher_workflow.add_conditional_edges("reflect", route_after_checker)
+researchgraph = researcher_workflow.compile()
+researchgraph.name = "Agent"

researchgraph/prompts.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""Default prompts used in this project."""
+MAIN_PROMPT = """You are a general AI assistant. I will ask you a question.
+Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+<info>
+{info}
+</info>
+You have access to the following tools:
+- `Search`: call this tool to find relevant web sources.
+- `ScrapeWebsite`: use this to extract detailed insights from specific web pages. This will update your notes.
+- `GetFile`: use this to fetch specific task file content. You can access the file using the task ID: {task_id}
+- `Info`: call this when you have collected and structured all the necessary information.
+Here is the question you need to uncover:
+question: {question}
+Be thorough, organize your findings according to the above structure, and validate for accuracy and completeness.
+"""

researchgraph/schema.py ADDED Viewed

	@@ -0,0 +1,10 @@

+extraction_schema = {
+    "type": "object",
+    "properties": {
+        "result": {
+            "type": "string",
+            "description": "The answer to the question"
+        }
+    },
+    "required": ["result"]
+}

researchgraph/state.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""State definitions.
+State is the interface between the graph and end user as well as the
+data model used internally by the graph.
+"""
+import operator
+from dataclasses import dataclass, field
+from typing import Annotated, Any, List, Optional
+from langchain_core.messages import BaseMessage
+from langgraph.graph import add_messages
+@dataclass(kw_only=True)
+class InputState:
+    """Input state defines the interface between the graph and the user (external API)."""
+    question: str
+    "The question for which the agent is tasked to gather information."
+    task_id: str
+    "The ID of the task being processed"
+    info: Optional[dict[str, Any]] = field(default=None)
+    "The info state tracks the current extracted data for the given question, conforming to the provided schema. This is primarily populated by the agent."
+@dataclass(kw_only=True)
+class State(InputState):
+    """A graph's State defines three main things.
+    1. The structure of the data to be passed between nodes (which "channels" to read from/write to and their types)
+    2. Default values for each field
+    3. Reducers for the state's fields. Reducers are functions that determine how to apply updates to the state.
+    See [Reducers](https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers) for more information.
+    """
+    messages: Annotated[List[BaseMessage], add_messages] = field(default_factory=list)
+    """
+    Messages track the primary execution state of the agent.
+    Typically accumulates a pattern of:
+    1. HumanMessage - user input
+    2. AIMessage with .tool_calls - agent picking tool(s) to use to collect
+        information
+    3. ToolMessage(s) - the responses (or errors) from the executed tools
+        (... repeat steps 2 and 3 as needed ...)
+    4. AIMessage without .tool_calls - agent responding in unstructured
+        format to the user.
+    5. HumanMessage - user responds with the next conversational turn.
+        (... repeat steps 2-5 as needed ... )
+    Merges two lists of messages, updating existing messages by ID.
+    By default, this ensures the state is "append-only", unless the
+    new message has the same ID as an existing message.
+    Returns:
+        A new list of messages with the messages from `right` merged into `left`.
+        If a message in `right` has the same ID as a message in `left`, the
+        message from `right` will replace the message from `left`.
+        """
+    loop_step: Annotated[int, operator.add] = field(default=0)
+    # Feel free to add additional attributes to your state as needed.
+    # Common examples include retrieved documents, extracted entities, API connections, etc.
+@dataclass(kw_only=True)
+class OutputState:
+    """The response object for the end user.
+    This class defines the structure of the output that will be provided
+    to the user after the graph's execution is complete.
+    """
+    info: dict[str, Any]
+    """
+    A dictionary containing the extracted and processed information
+    based on the user's query and the graph's execution.
+    This is the primary output of the enrichment process.
+    """

researchgraph/tools.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""Tools for data enrichment.
+This module contains functions that are directly exposed to the LLM as tools.
+These tools can be used for tasks such as web searching and scraping.
+Users can edit and extend these tools as needed.
+"""
+import json
+from typing import Any, Optional, cast
+import aiohttp
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_core.runnables import RunnableConfig
+from langchain_core.tools import InjectedToolArg
+from langgraph.prebuilt import InjectedState
+from typing_extensions import Annotated
+from researchgraph.configuration import Configuration
+from researchgraph.state import State
+from researchgraph.utils import init_model
+async def get_file_content(
+    task_id: str, *, config: Annotated[RunnableConfig, InjectedToolArg]
+) -> Optional[str]:
+    """Fetch and process a file from the scoring system.
+    Args:
+        task_id: The ID of the task/file to fetch.
+        config: Runtime configuration.
+    Returns:
+        Optional[str]: The content of the file if successful, None otherwise.
+    """
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as response:
+            if response.status == 200:
+                return await response.text()
+    return None
+async def search(
+    query: str, *, config: Annotated[RunnableConfig, InjectedToolArg]
+) -> Optional[list[dict[str, Any]]]:
+    """Query a search engine.
+    This function queries the web to fetch comprehensive, accurate, and trusted results. It's particularly useful
+    for answering questions about current events. Provide as much context in the query as needed to ensure high recall.
+    """
+    configuration = Configuration.from_runnable_config(config)
+    wrapped = TavilySearchResults(max_results=configuration.max_search_results)
+    result = await wrapped.ainvoke({"query": query})
+    return cast(list[dict[str, Any]], result)
+_INFO_PROMPT = """You are doing web research on behalf of a user. You are trying to find out this information:
+<info>
+{info}
+</info>
+You just scraped the following website: {url}
+Based on the website content below, jot down some notes about the website.
+<Website content>
+{content}
+</Website content>"""
+async def scrape_website(
+    url: str,
+    *,
+    state: Annotated[State, InjectedState],
+    config: Annotated[RunnableConfig, InjectedToolArg],
+) -> str:
+    """Scrape and summarize content from a given URL.
+    Returns:
+        str: A summary of the scraped content, tailored to the extraction schema.
+    """
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as response:
+            content = await response.text()
+    configuration = Configuration.from_runnable_config(config)
+    p = _INFO_PROMPT.format(
+        info=json.dumps(configuration.extraction_schema, indent=2),
+        url=url,
+        content=content[:40_000],
+    )
+    raw_model = init_model(config)
+    result = await raw_model.ainvoke(p)
+    return str(result.content)

researchgraph/utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""Utility functions used in our graph."""
+from typing import Optional
+from langchain.chat_models import init_chat_model
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AnyMessage
+from langchain_core.runnables import RunnableConfig
+from researchgraph.configuration import Configuration
+def get_message_text(msg: AnyMessage) -> str:
+    """Get the text content of a message."""
+    content = msg.content
+    if isinstance(content, str):
+        return content
+    elif isinstance(content, dict):
+        return content.get("text", "")
+    else:
+        txts = [c if isinstance(c, str) else (c.get("text") or "") for c in content]
+        return "".join(txts).strip()
+def init_model(config: Optional[RunnableConfig] = None) -> BaseChatModel:
+    """Initialize the configured chat model."""
+    configuration = Configuration.from_runnable_config(config)
+    fully_specified_name = configuration.model
+    if "/" in fully_specified_name:
+        provider, model = fully_specified_name.split("/", maxsplit=1)
+    else:
+        provider = None
+        model = fully_specified_name
+    return init_chat_model(model, model_provider=provider)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff