allenlsl commited on
Commit
f9e4ddb
Β·
verified Β·
1 Parent(s): 59802e3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -11
main.py CHANGED
@@ -181,35 +181,41 @@ def query_index(question, top_k=5):
181
 
182
  # === Main Execution ===
183
  if __name__ == "__main__":
184
- # Optional manual override for full rebuild
185
- REBUILD_ALL = True # Set True to force re-indexing everything
186
 
187
- # Try to load cache
 
 
 
 
 
 
 
188
  processed_pdfs = load_cache(PDF_CACHE_FILE)
189
  processed_urls = load_cache(URL_CACHE_FILE)
190
 
191
- if REBUILD_ALL:
192
  print("πŸ” Rebuilding index from scratch...")
193
  processed_pdfs = set()
194
  processed_urls = set()
195
 
196
  index_loaded = load_index()
197
 
198
- # started = start_ollama_model()
199
- # if not started:
200
- # print("❌ Could not connect to Ollama")
201
- # exit(1)
202
 
203
- if REBUILD_ALL or not index_loaded:
204
- print("πŸ”„ Creating or updating index...")
205
  process_pdf_folder(processed_files=processed_pdfs)
206
  for url in load_urls():
207
  crawl_url(url, depth=1, processed_urls=processed_urls)
 
208
  save_index()
209
  save_cache(processed_pdfs, PDF_CACHE_FILE)
210
  save_cache(processed_urls, URL_CACHE_FILE)
211
  else:
212
- print(f"βœ… Loaded existing index with {len(documents)} chunks. Ready to query.")
 
213
 
214
  print("\n❓ Ready to query your legal database (type 'exit' to quit)")
215
  while True:
 
181
 
182
  # === Main Execution ===
183
  if __name__ == "__main__":
184
+ print("πŸš€ Starting BC Land Survey Legal Assistant")
 
185
 
186
+ # Default behavior: load existing index
187
+ update_mode = "none" # can be "none", "update", or "updateall"
188
+ if args.updateall:
189
+ update_mode = "updateall"
190
+ elif args.update:
191
+ update_mode = "update"
192
+
193
+ # Load caches for local PDF and URL tracking
194
  processed_pdfs = load_cache(PDF_CACHE_FILE)
195
  processed_urls = load_cache(URL_CACHE_FILE)
196
 
197
+ if update_mode == "updateall":
198
  print("πŸ” Rebuilding index from scratch...")
199
  processed_pdfs = set()
200
  processed_urls = set()
201
 
202
  index_loaded = load_index()
203
 
204
+ if update_mode == "updateall" or not index_loaded or update_mode == "update":
205
+ if not index_loaded:
206
+ print("⚠️ Index not found β€” will rebuild from source.")
207
+ print("πŸ”„ Indexing content...")
208
 
 
 
209
  process_pdf_folder(processed_files=processed_pdfs)
210
  for url in load_urls():
211
  crawl_url(url, depth=1, processed_urls=processed_urls)
212
+
213
  save_index()
214
  save_cache(processed_pdfs, PDF_CACHE_FILE)
215
  save_cache(processed_urls, URL_CACHE_FILE)
216
  else:
217
+ print(f"βœ… Loaded FAISS index with {vector_index.ntotal} vectors.")
218
+ print(f"βœ… Loaded {len(documents)} legal chunks.")
219
 
220
  print("\n❓ Ready to query your legal database (type 'exit' to quit)")
221
  while True: