import os from cerebras.cloud.sdk import Cerebras from PyPDF2 import PdfReader from docx import Document import gradio as gr Cerekey = os.getenv("LitReview") # Initialize Cerebras AI client with the API key client = Cerebras(api_key = Cerekey) def extract_text_from_file(file): """Extracts text from uploaded PDF or DOCX files.""" if file.name.endswith(".pdf"): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text elif file.name.endswith(".docx"): doc = Document(file) text = "\n".join([p.text for p in doc.paragraphs]) return text else: return "Unsupported file format. Please upload a PDF or DOCX file." def chunk_text(text, max_tokens=4000): """Splits text into manageable chunks.""" words = text.split() chunks = [] current_chunk = [] for word in words: current_chunk.append(word) if len(" ".join(current_chunk)) > max_tokens: chunks.append(" ".join(current_chunk)) current_chunk = [] if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def analyze_chunk(chunk): """Analyzes a single chunk using the Cerebras model.""" messages = [ { "role": "system", "content": ( "You are an experienced scholar tasked with analyzing research articles. " "Focus on extracting insights such as Author, Year, Title; Problem addressed; " "Methodology; Results; and Remarks. Only include relevant content." ) }, {"role": "user", "content": chunk} ] try: stream = client.chat.completions.create( messages=messages, model="llama-3.3-70b", stream=False, max_completion_tokens=1024, temperature=0.2, top_p=1 ) return stream.choices[0].message.content except Exception as e: return f"An error occurred while processing a chunk: {e}" def save_as_docx(content): """Generates and saves a DOCX file.""" document = Document() document.add_heading("Literature Analysis", level=1) document.add_paragraph(content) file_path = "Literature_Analysis.docx" document.save(file_path) return file_path def analyze_document(file): """Processes and analyzes the uploaded document.""" text = extract_text_from_file(file) if text.startswith("Unsupported file format"): return "**Error:** Unsupported file format. Please upload a valid PDF or DOCX file.", None chunks = chunk_text(text) all_insights = [] progress_output = "" for i, chunk in enumerate(chunks, 1): progress_output += f"**Processing chunk {i} of {len(chunks)}...**\n" result = analyze_chunk(chunk) if result.strip(): # Only append non-empty results all_insights.append(result) progress_output += f"**Chunk {i} Analysis Complete:**\n{result}\n\n" if not all_insights: return "**Error:** No valid insights were extracted from the document.", None # Consolidate final summary consolidated_summary_prompt = ( "Below are insights extracted from multiple chunks. " "Consolidate these insights into a single output organized as follows: " "Author, Year, Title; Problem addressed; Methodology; Results; and Remarks. " "Make the output concise and coherent." ) try: stream = client.chat.completions.create( messages=[ {"role": "system", "content": consolidated_summary_prompt}, {"role": "user", "content": "\n\n".join(all_insights)} ], model="llama-3.3-70b", stream=True, max_completion_tokens=1024, temperature=0.2, top_p=1 ) final_summary = "" for chunk in stream: content = chunk.choices[0].delta.content or "" final_summary += content progress_output += f"**Final Consolidated Summary:**\n\n{final_summary}" # Generate DOCX file after processing docx_file = save_as_docx(final_summary) return progress_output, docx_file except Exception as e: return f"**Error:** An error occurred during consolidation: {e}", None # Define Gradio interface def interface_logic(file): markdown_output, docx_file = analyze_document(file) return markdown_output, docx_file interface = gr.Interface( fn=interface_logic, inputs=gr.File(label="Upload a PDF or DOCX file"), outputs=[ gr.Markdown(label="Progress and Analysis"), gr.File(label="Download Analysis as DOCX") ], title="Automated Literature Review", description=( "Upload a PDF or DOCX document. The tool will analyze it chunk by chunk, display progress, and generate a final summary. " "You can download the consolidated report as a DOCX file after processing." ), ) # Launch the interface if __name__ == "__main__": interface.launch()