Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ Cerekey = os.getenv("LitReview")
|
|
9 |
# Initialize Cerebras AI client with the API key
|
10 |
client = Cerebras(api_key=Cerekey)
|
11 |
|
12 |
-
|
13 |
def extract_text_from_file(file):
|
14 |
"""Extracts text from uploaded PDF or DOCX files."""
|
15 |
if file.name.endswith(".pdf"):
|
@@ -99,19 +98,20 @@ def analyze_document(file):
|
|
99 |
|
100 |
chunks = chunk_text(text)
|
101 |
all_insights = []
|
102 |
-
progress = []
|
103 |
|
104 |
-
progress
|
|
|
105 |
for i, chunk in enumerate(chunks, 1):
|
106 |
-
|
107 |
result = analyze_chunk(chunk)
|
108 |
if result.strip(): # Only append non-empty results
|
109 |
all_insights.append(result)
|
110 |
|
111 |
if not all_insights:
|
112 |
-
|
|
|
113 |
|
114 |
-
|
115 |
consolidated_summary_prompt = (
|
116 |
"Below are insights extracted from multiple chunks of a document. "
|
117 |
"Consolidate these insights into a single output organized as follows: "
|
@@ -134,35 +134,28 @@ def analyze_document(file):
|
|
134 |
final_summary = ""
|
135 |
for chunk in stream:
|
136 |
final_summary += chunk.choices[0].delta.content or ""
|
137 |
-
|
138 |
-
|
139 |
|
140 |
# Generate DOCX file after processing
|
141 |
docx_file = save_as_docx(final_summary)
|
142 |
-
return
|
143 |
-
|
144 |
except Exception as e:
|
145 |
-
|
146 |
|
147 |
|
148 |
# Define the Gradio interface
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
submit_button = gr.Button("Analyze Document")
|
163 |
-
submit_button.click(process_input, [file_input], [analysis_output, download_button])
|
164 |
-
|
165 |
-
iface.launch(share=True)
|
166 |
-
|
167 |
if __name__ == "__main__":
|
168 |
-
|
|
|
9 |
# Initialize Cerebras AI client with the API key
|
10 |
client = Cerebras(api_key=Cerekey)
|
11 |
|
|
|
12 |
def extract_text_from_file(file):
|
13 |
"""Extracts text from uploaded PDF or DOCX files."""
|
14 |
if file.name.endswith(".pdf"):
|
|
|
98 |
|
99 |
chunks = chunk_text(text)
|
100 |
all_insights = []
|
|
|
101 |
|
102 |
+
# Yield progress for chunking and analysis
|
103 |
+
yield "**Processing the document. Please wait...**\n"
|
104 |
for i, chunk in enumerate(chunks, 1):
|
105 |
+
yield f"**Processing chunk {i} of {len(chunks)}...**"
|
106 |
result = analyze_chunk(chunk)
|
107 |
if result.strip(): # Only append non-empty results
|
108 |
all_insights.append(result)
|
109 |
|
110 |
if not all_insights:
|
111 |
+
yield "**Error:** No valid insights were extracted from the document."
|
112 |
+
return
|
113 |
|
114 |
+
yield "**Consolidating all insights into a final summary...**\n"
|
115 |
consolidated_summary_prompt = (
|
116 |
"Below are insights extracted from multiple chunks of a document. "
|
117 |
"Consolidate these insights into a single output organized as follows: "
|
|
|
134 |
final_summary = ""
|
135 |
for chunk in stream:
|
136 |
final_summary += chunk.choices[0].delta.content or ""
|
137 |
+
|
138 |
+
yield f"**Final Summary:**\n\n{final_summary}"
|
139 |
|
140 |
# Generate DOCX file after processing
|
141 |
docx_file = save_as_docx(final_summary)
|
142 |
+
return final_summary, docx_file
|
|
|
143 |
except Exception as e:
|
144 |
+
yield f"**Error:** An error occurred during consolidation: {e}", None
|
145 |
|
146 |
|
147 |
# Define the Gradio interface
|
148 |
+
interface = gr.Interface(
|
149 |
+
fn=analyze_document,
|
150 |
+
inputs=gr.File(label="Upload a PDF or DOCX file"),
|
151 |
+
outputs=[gr.Markdown(label="Progress and Analysis"), gr.File(label="Download the DOCX report")],
|
152 |
+
title="Automated Literature Review",
|
153 |
+
description=(
|
154 |
+
"Upload a PDF or DOCX document, and this tool will analyze it to extract and consolidate its content. "
|
155 |
+
"It might take a while, be patient. You are advised to upload smaller documents with shorter text as it may take a while to process longer files."
|
156 |
+
),
|
157 |
+
)
|
158 |
+
|
159 |
+
# Launch the interface
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
if __name__ == "__main__":
|
161 |
+
interface.launch()
|