Spaces:

allenlsl
/

legal_rag

Running

allenlsl commited on Apr 10

Commit

f9e4ddb

verified ·

1 Parent(s): 59802e3

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -181,35 +181,41 @@ def query_index(question, top_k=5):
 # === Main Execution ===
 if __name__ == "__main__":
-    # Optional manual override for full rebuild
-    REBUILD_ALL = True  # Set True to force re-indexing everything
-    # Try to load cache
     processed_pdfs = load_cache(PDF_CACHE_FILE)
     processed_urls = load_cache(URL_CACHE_FILE)
-    if REBUILD_ALL:
         print("🔁 Rebuilding index from scratch...")
         processed_pdfs = set()
         processed_urls = set()
     index_loaded = load_index()
-    # started = start_ollama_model()
-    # if not started:
-    #     print("❌ Could not connect to Ollama")
-    #     exit(1)
-    if REBUILD_ALL or not index_loaded:
-        print("🔄 Creating or updating index...")
         process_pdf_folder(processed_files=processed_pdfs)
         for url in load_urls():
             crawl_url(url, depth=1, processed_urls=processed_urls)
         save_index()
         save_cache(processed_pdfs, PDF_CACHE_FILE)
         save_cache(processed_urls, URL_CACHE_FILE)
     else:
-        print(f"✅ Loaded existing index with {len(documents)} chunks. Ready to query.")
     print("\n❓ Ready to query your legal database (type 'exit' to quit)")
     while True:

 # === Main Execution ===
 if __name__ == "__main__":
+    print("🚀 Starting BC Land Survey Legal Assistant")
+    # Default behavior: load existing index
+    update_mode = "none"  # can be "none", "update", or "updateall"
+    if args.updateall:
+        update_mode = "updateall"
+    elif args.update:
+        update_mode = "update"
+    # Load caches for local PDF and URL tracking
     processed_pdfs = load_cache(PDF_CACHE_FILE)
     processed_urls = load_cache(URL_CACHE_FILE)
+    if update_mode == "updateall":
         print("🔁 Rebuilding index from scratch...")
         processed_pdfs = set()
         processed_urls = set()
     index_loaded = load_index()
+    if update_mode == "updateall" or not index_loaded or update_mode == "update":
+        if not index_loaded:
+            print("⚠️ Index not found — will rebuild from source.")
+        print("🔄 Indexing content...")
         process_pdf_folder(processed_files=processed_pdfs)
         for url in load_urls():
             crawl_url(url, depth=1, processed_urls=processed_urls)
         save_index()
         save_cache(processed_pdfs, PDF_CACHE_FILE)
         save_cache(processed_urls, URL_CACHE_FILE)
     else:
+        print(f"✅ Loaded FAISS index with {vector_index.ntotal} vectors.")
+        print(f"✅ Loaded {len(documents)} legal chunks.")
     print("\n❓ Ready to query your legal database (type 'exit' to quit)")
     while True: