Spaces:

Afeezee
/

Literature_Review_App

Sleeping

App Files Files Community

Afeezee commited on Dec 27, 2024

Commit

4740109

verified ·

1 Parent(s): 9fc8a1b

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -48

app.py CHANGED Viewed

@@ -26,11 +26,9 @@ def extract_text_from_file(file):
     else:
         return "Unsupported file format. Please upload a PDF or DOCX file."
 def chunk_text(text, max_tokens=4000):
-    """
-    Splits text into chunks small enough for the Llama model to process.
-    Each chunk is limited to `max_tokens` for safe processing.
-    """
     words = text.split()
     chunks = []
     current_chunk = []
@@ -45,25 +43,19 @@ def chunk_text(text, max_tokens=4000):
     return chunks
 def analyze_chunk(chunk):
-    """
-    Analyzes a single chunk of text using the Cerebras model.
-    """
     messages = [
         {
             "role": "system",
             "content": (
                 "You are an experienced scholar tasked with analyzing research articles. "
-                "Focus on extracting insights based on: Author (APA format with et al if applicable), Year of publication, Title of the article; "
-                "Problem addressed; Methodology (datasets, tools, techniques, algorithms); "
-                "Results (specific, quantifiable metrics); and Remarks (strengths, weaknesses, improvements). "
-                "Summarize only insights related to these fields and disregard irrelevant content."
             )
         },
-        {
-            "role": "user",
-            "content": chunk
-        }
     ]
     try:
@@ -77,49 +69,43 @@ def analyze_chunk(chunk):
         )
         return stream.choices[0].message.content
     except Exception as e:
-        return f"An error occurred while processing a chunk: {e}"
-def save_as_docx(content, file_name="Literature_Analysis.docx"):
-    """
-    Saves the given content to a DOCX file.
-    Parameters:
-        content (str): The text content to save.
-        file_name (str): The name of the DOCX file (default: 'Literature_Analysis.docx').
-    """
     document = Document()
     document.add_heading("Literature Analysis", level=1)
     document.add_paragraph(content)
-    document.save(file_name)
-    return file_name
 def analyze_document(file):
-    """Processes and analyzes the uploaded document."""
     text = extract_text_from_file(file)
     if text.startswith("Unsupported file format"):
-        yield f"**Error:** {text}"
-        return
     chunks = chunk_text(text)
     all_insights = []
-    yield "**Processing the document. Please wait...**\n"
     for i, chunk in enumerate(chunks, 1):
-        yield f"**Processing chunk {i} of {len(chunks)}...**"
         result = analyze_chunk(chunk)
         if result.strip():  # Only append non-empty results
             all_insights.append(result)
     if not all_insights:
-        yield "**Error:** No valid insights were extracted from the document."
-        return
-    yield "**Consolidating all insights into a final summary...**"
     consolidated_summary_prompt = (
-        "Below are insights extracted from multiple chunks of a document. "
         "Consolidate these insights into a single output organized as follows: "
         "Author, Year, Title; Problem addressed; Methodology; Results; and Remarks. "
-        "Make the final output concise and coherent."
     )
     try:
@@ -136,27 +122,37 @@ def analyze_document(file):
         )
         final_summary = ""
         for chunk in stream:
-            content = chunk.choices[0].delta.content or ""
-            final_summary += content
-        # Show the final summary first
-        yield f"**Final Summary:**\n\n{final_summary}"
-        # Save the final summary as a .docx file
-        docx_file = save_as_docx(final_summary)
-        yield f"**Download the DOCX file here:** [Download {docx_file}](file:///{docx_file})"
     except Exception as e:
-        yield f"**Error:** An error occurred during consolidation: {e}"
-# Define the Gradio interface
 interface = gr.Interface(
-    fn=analyze_document,
     inputs=gr.File(label="Upload a PDF or DOCX file"),
-    outputs=gr.Markdown(label="Literature Analysis"),
     title="Automated Literature Review",
     description=(
         "Upload a PDF or DOCX document, and this tool will analyze it to extract and consolidate its content. "
-        "It might take a while, be patient. You are advised to upload smaller documents with shorter text as it may take a while to process longer files."
     ),
 )

     else:
         return "Unsupported file format. Please upload a PDF or DOCX file."
 def chunk_text(text, max_tokens=4000):
+    """Splits text into manageable chunks."""
     words = text.split()
     chunks = []
     current_chunk = []
     return chunks
 def analyze_chunk(chunk):
+    """Analyzes a single chunk using the Cerebras model."""
     messages = [
         {
             "role": "system",
             "content": (
                 "You are an experienced scholar tasked with analyzing research articles. "
+                "Focus on extracting insights such as Author, Year, Title; Problem addressed; "
+                "Methodology; Results; and Remarks. Only include relevant content."
             )
         },
+        {"role": "user", "content": chunk}
     ]
     try:
         )
         return stream.choices[0].message.content
     except Exception as e:
+        return f"Error while processing chunk: {e}"
+def generate_docx(content):
+    """Generates a DOCX file from content."""
     document = Document()
     document.add_heading("Literature Analysis", level=1)
     document.add_paragraph(content)
+    file_path = "Literature_Analysis.docx"
+    document.save(file_path)
+    return file_path
 def analyze_document(file):
+    """Processes the document and generates insights."""
     text = extract_text_from_file(file)
     if text.startswith("Unsupported file format"):
+        return None, f"**Error:** {text}"
     chunks = chunk_text(text)
     all_insights = []
+    markdown_output = ""
     for i, chunk in enumerate(chunks, 1):
         result = analyze_chunk(chunk)
         if result.strip():  # Only append non-empty results
             all_insights.append(result)
+            markdown_output += f"### Chunk {i} Analysis\n{result}\n\n"
     if not all_insights:
+        return None, "**Error:** No valid insights were extracted from the document."
     consolidated_summary_prompt = (
+        "Below are insights extracted from multiple chunks. "
         "Consolidate these insights into a single output organized as follows: "
         "Author, Year, Title; Problem addressed; Methodology; Results; and Remarks. "
+        "Make the output concise and coherent."
     )
     try:
         )
         final_summary = ""
         for chunk in stream:
+            final_summary += chunk.choices[0].delta.content or ""
+        # Return the final summary for display, and the content for DOCX generation
+        return markdown_output + f"\n\n### Final Summary\n\n{final_summary}", final_summary
     except Exception as e:
+        return None, f"**Error:** An error occurred during consolidation: {e}"
+def interface_logic(file):
+    """Handles the Gradio interface logic."""
+    markdown_output, docx_content = analyze_document(file)
+    if docx_content:
+        # Generate the DOCX file after analysis is complete
+        docx_file = generate_docx(docx_content)
+        return markdown_output, docx_file
+    else:
+        return markdown_output, None
+# Define Gradio interface
 interface = gr.Interface(
+    fn=interface_logic,
     inputs=gr.File(label="Upload a PDF or DOCX file"),
+    outputs=[
+        gr.Markdown(label="Literature Analysis"),
+        gr.File(label="Download Analysis as DOCX")
+    ],
     title="Automated Literature Review",
     description=(
         "Upload a PDF or DOCX document, and this tool will analyze it to extract and consolidate its content. "
+        "Progress updates will be shown during processing. After analysis, you can download the report as a DOCX file."
     ),
 )