Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -69,11 +69,11 @@ def analyze_chunk(chunk):
|
|
69 |
)
|
70 |
return stream.choices[0].message.content
|
71 |
except Exception as e:
|
72 |
-
return f"
|
73 |
|
74 |
|
75 |
-
def
|
76 |
-
"""Generates a DOCX file
|
77 |
document = Document()
|
78 |
document.add_heading("Literature Analysis", level=1)
|
79 |
document.add_paragraph(content)
|
@@ -83,24 +83,26 @@ def generate_docx(content):
|
|
83 |
|
84 |
|
85 |
def analyze_document(file):
|
86 |
-
"""Processes
|
87 |
text = extract_text_from_file(file)
|
88 |
if text.startswith("Unsupported file format"):
|
89 |
-
return
|
90 |
|
91 |
chunks = chunk_text(text)
|
92 |
all_insights = []
|
93 |
-
|
94 |
|
95 |
for i, chunk in enumerate(chunks, 1):
|
|
|
96 |
result = analyze_chunk(chunk)
|
97 |
if result.strip(): # Only append non-empty results
|
98 |
all_insights.append(result)
|
99 |
-
|
100 |
|
101 |
if not all_insights:
|
102 |
-
return
|
103 |
|
|
|
104 |
consolidated_summary_prompt = (
|
105 |
"Below are insights extracted from multiple chunks. "
|
106 |
"Consolidate these insights into a single output organized as follows: "
|
@@ -122,37 +124,35 @@ def analyze_document(file):
|
|
122 |
)
|
123 |
final_summary = ""
|
124 |
for chunk in stream:
|
125 |
-
|
|
|
126 |
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
129 |
except Exception as e:
|
130 |
-
return
|
131 |
|
132 |
|
|
|
133 |
def interface_logic(file):
|
134 |
-
|
135 |
-
markdown_output,
|
136 |
-
if docx_content:
|
137 |
-
# Generate the DOCX file after analysis is complete
|
138 |
-
docx_file = generate_docx(docx_content)
|
139 |
-
return markdown_output, docx_file
|
140 |
-
else:
|
141 |
-
return markdown_output, None
|
142 |
|
143 |
|
144 |
-
# Define Gradio interface
|
145 |
interface = gr.Interface(
|
146 |
fn=interface_logic,
|
147 |
inputs=gr.File(label="Upload a PDF or DOCX file"),
|
148 |
outputs=[
|
149 |
-
gr.Markdown(label="
|
150 |
gr.File(label="Download Analysis as DOCX")
|
151 |
],
|
152 |
title="Automated Literature Review",
|
153 |
description=(
|
154 |
-
"Upload a PDF or DOCX document
|
155 |
-
"
|
156 |
),
|
157 |
)
|
158 |
|
|
|
69 |
)
|
70 |
return stream.choices[0].message.content
|
71 |
except Exception as e:
|
72 |
+
return f"An error occurred while processing a chunk: {e}"
|
73 |
|
74 |
|
75 |
+
def save_as_docx(content):
|
76 |
+
"""Generates and saves a DOCX file."""
|
77 |
document = Document()
|
78 |
document.add_heading("Literature Analysis", level=1)
|
79 |
document.add_paragraph(content)
|
|
|
83 |
|
84 |
|
85 |
def analyze_document(file):
|
86 |
+
"""Processes and analyzes the uploaded document."""
|
87 |
text = extract_text_from_file(file)
|
88 |
if text.startswith("Unsupported file format"):
|
89 |
+
return "**Error:** Unsupported file format. Please upload a valid PDF or DOCX file.", None
|
90 |
|
91 |
chunks = chunk_text(text)
|
92 |
all_insights = []
|
93 |
+
progress_output = ""
|
94 |
|
95 |
for i, chunk in enumerate(chunks, 1):
|
96 |
+
progress_output += f"**Processing chunk {i} of {len(chunks)}...**\n"
|
97 |
result = analyze_chunk(chunk)
|
98 |
if result.strip(): # Only append non-empty results
|
99 |
all_insights.append(result)
|
100 |
+
progress_output += f"**Chunk {i} Analysis Complete:**\n{result}\n\n"
|
101 |
|
102 |
if not all_insights:
|
103 |
+
return "**Error:** No valid insights were extracted from the document.", None
|
104 |
|
105 |
+
# Consolidate final summary
|
106 |
consolidated_summary_prompt = (
|
107 |
"Below are insights extracted from multiple chunks. "
|
108 |
"Consolidate these insights into a single output organized as follows: "
|
|
|
124 |
)
|
125 |
final_summary = ""
|
126 |
for chunk in stream:
|
127 |
+
content = chunk.choices[0].delta.content or ""
|
128 |
+
final_summary += content
|
129 |
|
130 |
+
progress_output += f"**Final Consolidated Summary:**\n\n{final_summary}"
|
131 |
+
|
132 |
+
# Generate DOCX file after processing
|
133 |
+
docx_file = save_as_docx(final_summary)
|
134 |
+
return progress_output, docx_file
|
135 |
except Exception as e:
|
136 |
+
return f"**Error:** An error occurred during consolidation: {e}", None
|
137 |
|
138 |
|
139 |
+
# Define Gradio interface
|
140 |
def interface_logic(file):
|
141 |
+
markdown_output, docx_file = analyze_document(file)
|
142 |
+
return markdown_output, docx_file
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
|
|
|
145 |
interface = gr.Interface(
|
146 |
fn=interface_logic,
|
147 |
inputs=gr.File(label="Upload a PDF or DOCX file"),
|
148 |
outputs=[
|
149 |
+
gr.Markdown(label="Progress and Analysis"),
|
150 |
gr.File(label="Download Analysis as DOCX")
|
151 |
],
|
152 |
title="Automated Literature Review",
|
153 |
description=(
|
154 |
+
"Upload a PDF or DOCX document. The tool will analyze it chunk by chunk, display progress, and generate a final summary. "
|
155 |
+
"You can download the consolidated report as a DOCX file after processing."
|
156 |
),
|
157 |
)
|
158 |
|