Afeezee commited on
Commit
7d64aa2
·
verified ·
1 Parent(s): 4740109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -69,11 +69,11 @@ def analyze_chunk(chunk):
69
  )
70
  return stream.choices[0].message.content
71
  except Exception as e:
72
- return f"Error while processing chunk: {e}"
73
 
74
 
75
- def generate_docx(content):
76
- """Generates a DOCX file from content."""
77
  document = Document()
78
  document.add_heading("Literature Analysis", level=1)
79
  document.add_paragraph(content)
@@ -83,24 +83,26 @@ def generate_docx(content):
83
 
84
 
85
  def analyze_document(file):
86
- """Processes the document and generates insights."""
87
  text = extract_text_from_file(file)
88
  if text.startswith("Unsupported file format"):
89
- return None, f"**Error:** {text}"
90
 
91
  chunks = chunk_text(text)
92
  all_insights = []
93
- markdown_output = ""
94
 
95
  for i, chunk in enumerate(chunks, 1):
 
96
  result = analyze_chunk(chunk)
97
  if result.strip(): # Only append non-empty results
98
  all_insights.append(result)
99
- markdown_output += f"### Chunk {i} Analysis\n{result}\n\n"
100
 
101
  if not all_insights:
102
- return None, "**Error:** No valid insights were extracted from the document."
103
 
 
104
  consolidated_summary_prompt = (
105
  "Below are insights extracted from multiple chunks. "
106
  "Consolidate these insights into a single output organized as follows: "
@@ -122,37 +124,35 @@ def analyze_document(file):
122
  )
123
  final_summary = ""
124
  for chunk in stream:
125
- final_summary += chunk.choices[0].delta.content or ""
 
126
 
127
- # Return the final summary for display, and the content for DOCX generation
128
- return markdown_output + f"\n\n### Final Summary\n\n{final_summary}", final_summary
 
 
 
129
  except Exception as e:
130
- return None, f"**Error:** An error occurred during consolidation: {e}"
131
 
132
 
 
133
  def interface_logic(file):
134
- """Handles the Gradio interface logic."""
135
- markdown_output, docx_content = analyze_document(file)
136
- if docx_content:
137
- # Generate the DOCX file after analysis is complete
138
- docx_file = generate_docx(docx_content)
139
- return markdown_output, docx_file
140
- else:
141
- return markdown_output, None
142
 
143
 
144
- # Define Gradio interface
145
  interface = gr.Interface(
146
  fn=interface_logic,
147
  inputs=gr.File(label="Upload a PDF or DOCX file"),
148
  outputs=[
149
- gr.Markdown(label="Literature Analysis"),
150
  gr.File(label="Download Analysis as DOCX")
151
  ],
152
  title="Automated Literature Review",
153
  description=(
154
- "Upload a PDF or DOCX document, and this tool will analyze it to extract and consolidate its content. "
155
- "Progress updates will be shown during processing. After analysis, you can download the report as a DOCX file."
156
  ),
157
  )
158
 
 
69
  )
70
  return stream.choices[0].message.content
71
  except Exception as e:
72
+ return f"An error occurred while processing a chunk: {e}"
73
 
74
 
75
+ def save_as_docx(content):
76
+ """Generates and saves a DOCX file."""
77
  document = Document()
78
  document.add_heading("Literature Analysis", level=1)
79
  document.add_paragraph(content)
 
83
 
84
 
85
  def analyze_document(file):
86
+ """Processes and analyzes the uploaded document."""
87
  text = extract_text_from_file(file)
88
  if text.startswith("Unsupported file format"):
89
+ return "**Error:** Unsupported file format. Please upload a valid PDF or DOCX file.", None
90
 
91
  chunks = chunk_text(text)
92
  all_insights = []
93
+ progress_output = ""
94
 
95
  for i, chunk in enumerate(chunks, 1):
96
+ progress_output += f"**Processing chunk {i} of {len(chunks)}...**\n"
97
  result = analyze_chunk(chunk)
98
  if result.strip(): # Only append non-empty results
99
  all_insights.append(result)
100
+ progress_output += f"**Chunk {i} Analysis Complete:**\n{result}\n\n"
101
 
102
  if not all_insights:
103
+ return "**Error:** No valid insights were extracted from the document.", None
104
 
105
+ # Consolidate final summary
106
  consolidated_summary_prompt = (
107
  "Below are insights extracted from multiple chunks. "
108
  "Consolidate these insights into a single output organized as follows: "
 
124
  )
125
  final_summary = ""
126
  for chunk in stream:
127
+ content = chunk.choices[0].delta.content or ""
128
+ final_summary += content
129
 
130
+ progress_output += f"**Final Consolidated Summary:**\n\n{final_summary}"
131
+
132
+ # Generate DOCX file after processing
133
+ docx_file = save_as_docx(final_summary)
134
+ return progress_output, docx_file
135
  except Exception as e:
136
+ return f"**Error:** An error occurred during consolidation: {e}", None
137
 
138
 
139
+ # Define Gradio interface
140
  def interface_logic(file):
141
+ markdown_output, docx_file = analyze_document(file)
142
+ return markdown_output, docx_file
 
 
 
 
 
 
143
 
144
 
 
145
  interface = gr.Interface(
146
  fn=interface_logic,
147
  inputs=gr.File(label="Upload a PDF or DOCX file"),
148
  outputs=[
149
+ gr.Markdown(label="Progress and Analysis"),
150
  gr.File(label="Download Analysis as DOCX")
151
  ],
152
  title="Automated Literature Review",
153
  description=(
154
+ "Upload a PDF or DOCX document. The tool will analyze it chunk by chunk, display progress, and generate a final summary. "
155
+ "You can download the consolidated report as a DOCX file after processing."
156
  ),
157
  )
158