Update main.py
Browse files
main.py
CHANGED
@@ -181,35 +181,41 @@ def query_index(question, top_k=5):
|
|
181 |
|
182 |
# === Main Execution ===
|
183 |
if __name__ == "__main__":
|
184 |
-
|
185 |
-
REBUILD_ALL = True # Set True to force re-indexing everything
|
186 |
|
187 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
processed_pdfs = load_cache(PDF_CACHE_FILE)
|
189 |
processed_urls = load_cache(URL_CACHE_FILE)
|
190 |
|
191 |
-
if
|
192 |
print("π Rebuilding index from scratch...")
|
193 |
processed_pdfs = set()
|
194 |
processed_urls = set()
|
195 |
|
196 |
index_loaded = load_index()
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
|
203 |
-
if REBUILD_ALL or not index_loaded:
|
204 |
-
print("π Creating or updating index...")
|
205 |
process_pdf_folder(processed_files=processed_pdfs)
|
206 |
for url in load_urls():
|
207 |
crawl_url(url, depth=1, processed_urls=processed_urls)
|
|
|
208 |
save_index()
|
209 |
save_cache(processed_pdfs, PDF_CACHE_FILE)
|
210 |
save_cache(processed_urls, URL_CACHE_FILE)
|
211 |
else:
|
212 |
-
print(f"β
Loaded
|
|
|
213 |
|
214 |
print("\nβ Ready to query your legal database (type 'exit' to quit)")
|
215 |
while True:
|
|
|
181 |
|
182 |
# === Main Execution ===
|
183 |
if __name__ == "__main__":
|
184 |
+
print("π Starting BC Land Survey Legal Assistant")
|
|
|
185 |
|
186 |
+
# Default behavior: load existing index
|
187 |
+
update_mode = "none" # can be "none", "update", or "updateall"
|
188 |
+
if args.updateall:
|
189 |
+
update_mode = "updateall"
|
190 |
+
elif args.update:
|
191 |
+
update_mode = "update"
|
192 |
+
|
193 |
+
# Load caches for local PDF and URL tracking
|
194 |
processed_pdfs = load_cache(PDF_CACHE_FILE)
|
195 |
processed_urls = load_cache(URL_CACHE_FILE)
|
196 |
|
197 |
+
if update_mode == "updateall":
|
198 |
print("π Rebuilding index from scratch...")
|
199 |
processed_pdfs = set()
|
200 |
processed_urls = set()
|
201 |
|
202 |
index_loaded = load_index()
|
203 |
|
204 |
+
if update_mode == "updateall" or not index_loaded or update_mode == "update":
|
205 |
+
if not index_loaded:
|
206 |
+
print("β οΈ Index not found β will rebuild from source.")
|
207 |
+
print("π Indexing content...")
|
208 |
|
|
|
|
|
209 |
process_pdf_folder(processed_files=processed_pdfs)
|
210 |
for url in load_urls():
|
211 |
crawl_url(url, depth=1, processed_urls=processed_urls)
|
212 |
+
|
213 |
save_index()
|
214 |
save_cache(processed_pdfs, PDF_CACHE_FILE)
|
215 |
save_cache(processed_urls, URL_CACHE_FILE)
|
216 |
else:
|
217 |
+
print(f"β
Loaded FAISS index with {vector_index.ntotal} vectors.")
|
218 |
+
print(f"β
Loaded {len(documents)} legal chunks.")
|
219 |
|
220 |
print("\nβ Ready to query your legal database (type 'exit' to quit)")
|
221 |
while True:
|