from smolagents import Tool class SimpleTool(Tool): name = "pdf_extraction" description = """Reads and extracts the text from all PDF files in the given folder and returns the combined text.""" inputs = { "path": { "type": "string", "description": "Folder location of PDF files", "default": "pdfs", "nullable": True } } output_type = "string" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) try: from pypdf import PdfReader except ImportError: raise ImportError( "You must install package `pypdf` to run this tool: for instance, run `pip install pypdf`." ) self.reader_class = PdfReader def forward(self, path: str = "pdfs") -> str: # Ensure the folder exists if not os.path.exists(path): return f"Error: The folder '{path}' does not exist." # Find all PDF files in the folder pdf_files = [file for file in os.listdir(path) if file.endswith(".pdf")] if not pdf_files: return f"No PDF files found in the folder '{path}'." combined_text = [] # Iterate over each PDF file and extract its text for pdf_file in pdf_files: pdf_path = os.path.join(path, pdf_file) try: reader = self.reader_class(pdf_path) file_text = "" for page in reader.pages: file_text += page.extract_text() # Extract text from each page combined_text.append(f"### File: {pdf_file}\n{file_text.strip()}") except Exception as e: combined_text.append(f"### File: {pdf_file}\nError reading file: {str(e)}") # Return all combined results return "\n\n".join(combined_text)