Spaces:

naamaslomi
/

subwalls-demo

Sleeping

App Files Files Community

naamaslomi commited on Apr 22

Commit

5c4dda7

verified ·

1 Parent(s): 979de1b

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -102

app.py CHANGED Viewed

@@ -8,8 +8,6 @@ import pickle
 from tqdm import tqdm
 from datetime import datetime
 from collections import OrderedDict
-import threading
 # ==== CONFIG ==== #
 PERSISTENT_DIR = "/data" if os.path.exists("/data") else "."
@@ -17,31 +15,25 @@ RESULTS_FILE = os.path.join(PERSISTENT_DIR, "review_results.csv")
 GROUPS_FILE = "subgroups_4876.json"
 MAPPING_FILE = "file_mapping.csv"
 DRIVE_LINK_TEMPLATE = "https://drive.google.com/uc?id={}"
-CACHE_FILE = os.path.join(PERSISTENT_DIR, "groups_cache.pkl")
-RESET = False  # Set to True to clear previous results and cache
-CACHE_LIMIT = 30  # Feel free to tweak this
 image_cache = OrderedDict()
-# ==== Optional Reset ====
-if RESET:
-    for filename in [RESULTS_FILE, CACHE_FILE]:
-        path = os.path.join(PERSISTENT_DIR, filename)
-        if os.path.exists(path):
-            os.remove(path)
-            print(f"🗑️ Deleted {path}")
-def preload_next_group(remaining_groups):
-    if len(remaining_groups) >= 2:
-        next_group = remaining_groups[1]  # next after the one being shown
-        load_group_with_cache(next_group)  # this fills the cache
 def load_group_with_cache(group, resize=(256, 256)):
     key = tuple(group)
     if key in image_cache:
         return image_cache[key]
     imgs = []
     for file_name in group:
         try:
@@ -52,16 +44,20 @@ def load_group_with_cache(group, resize=(256, 256)):
         except Exception as e:
             print(f"❌ Error loading {file_name}: {e}")
             imgs.append(None)
     image_cache[key] = imgs
     if len(image_cache) > CACHE_LIMIT:
-        image_cache.popitem(last=False)  # Remove oldest group
     return imgs
-# ==== Helpers ====
-def get_drive_image_url(file_name):
-    file_id = file_dict.get(file_name)
-    return DRIVE_LINK_TEMPLATE.format(file_id) if file_id else None
 def load_reviewed_ids():
     try:
@@ -73,69 +69,44 @@ def load_reviewed_ids():
 def get_remaining_groups():
     reviewed, reviewed_ids = load_reviewed_ids()
     remaining = [g for g in sample_names if tuple(g) not in reviewed_ids]
-    return reviewed, reviewed_ids, remaining
-def review_group(decision, group):
-    reviewed, reviewed_ids = load_reviewed_ids()
-    reviewed.append({
-        "group": json.dumps(group),
-        "decision": decision
-    })
-    try:
-        os.makedirs(os.path.dirname(RESULTS_FILE), exist_ok=True)
-        pd.DataFrame(reviewed).to_csv(RESULTS_FILE, index=False)
-        print(f"✅ Saved to {RESULTS_FILE}")
-    except Exception as e:
-        print(f"❌ Error saving results: {e}")
-    _, _, remaining = get_remaining_groups()
-    if remaining:
-        current_group = remaining[0]
-        next_images = load_group_with_cache(current_group)
-        threading.Thread(target=preload_next_group, args=(remaining,)).start()
-        return next_images, current_group, f"Group {len(reviewed)+1} / {len(sample_names)}"
     else:
-        return [], None, "✅ All groups reviewed!"
 def prepare_download():
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     filename = f"review_results_{timestamp}.csv"
-    src = RESULTS_FILE
     dst = os.path.join(PERSISTENT_DIR, filename)
-    try:
-        pd.read_csv(src).to_csv(dst, index=False)
-        print(f"📁 Prepared file: {dst}")
-        return dst
-    except Exception as e:
-        print(f"⚠️ Error preparing download: {e}")
-        return None
-def get_first_group():
-    reviewed, _, remaining = get_remaining_groups()
-    if remaining:
-        group = remaining[0]
-        return load_group_with_cache(group), group, f"Group {len(reviewed)+1} / {len(sample_names)}"
-    else:
-        return [], None, "✅ All groups reviewed!"
-# ==== Load Data ====
-file_dict = pd.read_csv(MAPPING_FILE).set_index("name")["id"].to_dict()
-with open(GROUPS_FILE) as f:
-    sample_names = json.load(f)
-# ==== Gradio UI ====
 with gr.Blocks() as demo:
-    current_group = gr.State(value=None)
-    gallery = gr.Gallery(label="Group", columns=4, height="auto")
-    progress_text = gr.Markdown()
-with gr.Blocks() as demo:
-    current_group = gr.State(value=None)
     gallery = gr.Gallery(label="Group", columns=4, height="auto")
     progress_text = gr.Markdown()
@@ -143,34 +114,21 @@ with gr.Blocks() as demo:
         like = gr.Button("👍 Like")
         dislike = gr.Button("👎 Dislike")
         download_btn = gr.Button("⬇️ Download Results")
         download_file = gr.File(label="Download CSV")
     like.click(
-        fn=lambda group: review_group("like", group),
-        inputs=[current_group],
-        outputs=[gallery, current_group, progress_text]
     )
     dislike.click(
-        fn=lambda group: review_group("dislike", group),
-        inputs=[current_group],
-        outputs=[gallery, current_group, progress_text]
-    )
-    download_btn.click(
-            fn=prepare_download,
-            inputs=[],
-            outputs=[download_file]
-        )
-    demo.load(
-        fn=get_first_group,
-        outputs=[gallery, current_group, progress_text]
     )
 if __name__ == "__main__":
     demo.launch(allowed_paths=["/data"])

 from tqdm import tqdm
 from datetime import datetime
 from collections import OrderedDict
 # ==== CONFIG ==== #
 PERSISTENT_DIR = "/data" if os.path.exists("/data") else "."
 GROUPS_FILE = "subgroups_4876.json"
 MAPPING_FILE = "file_mapping.csv"
 DRIVE_LINK_TEMPLATE = "https://drive.google.com/uc?id={}"
+BATCH_SIZE = 50
+CACHE_LIMIT = 100  # Cache up to 100 groups in memory
+# ==== Globals ==== #
 image_cache = OrderedDict()
+file_dict = pd.read_csv(MAPPING_FILE).set_index("name")["id"].to_dict()
+with open(GROUPS_FILE) as f:
+    sample_names = json.load(f)
+# ==== Core Functions ==== #
+def get_drive_image_url(file_name):
+    file_id = file_dict.get(file_name)
+    return DRIVE_LINK_TEMPLATE.format(file_id) if file_id else None
 def load_group_with_cache(group, resize=(256, 256)):
     key = tuple(group)
     if key in image_cache:
         return image_cache[key]
     imgs = []
     for file_name in group:
         try:
         except Exception as e:
             print(f"❌ Error loading {file_name}: {e}")
             imgs.append(None)
     image_cache[key] = imgs
     if len(image_cache) > CACHE_LIMIT:
+        image_cache.popitem(last=False)
     return imgs
+def preload_batch(start_idx, batch_size=BATCH_SIZE):
+    end_idx = min(start_idx + batch_size, len(sample_names))
+    batch_groups = sample_names[start_idx:end_idx]
+    preloaded = []
+    for group in tqdm(batch_groups, desc="Preloading batch"):
+        preloaded.append(load_group_with_cache(group))
+    return preloaded
 def load_reviewed_ids():
     try:
 def get_remaining_groups():
     reviewed, reviewed_ids = load_reviewed_ids()
     remaining = [g for g in sample_names if tuple(g) not in reviewed_ids]
+    return reviewed, remaining
+def review_group(decision, current_index, preloaded_batch):
+    reviewed, _ = load_reviewed_ids()
+    current_group = sample_names[current_index]
+    reviewed.append({"group": json.dumps(current_group), "decision": decision})
+    pd.DataFrame(reviewed).to_csv(RESULTS_FILE, index=False)
+    next_index = current_index + 1
+    if next_index < len(sample_names):
+        batch_start = (next_index // BATCH_SIZE) * BATCH_SIZE
+        if next_index % BATCH_SIZE == 0:
+            return [], next_index, [], f"⏳ Preloading next batch..."
+        else:
+            next_group = sample_names[next_index]
+            return load_group_with_cache(next_group), next_index, preloaded_batch, f"Group {next_index+1} / {len(sample_names)}"
     else:
+        return [], next_index, preloaded_batch, "✅ All groups reviewed!"
 def prepare_download():
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     filename = f"review_results_{timestamp}.csv"
     dst = os.path.join(PERSISTENT_DIR, filename)
+    pd.read_csv(RESULTS_FILE).to_csv(dst, index=False)
+    return dst
+def get_first_batch():
+    reviewed, remaining = get_remaining_groups()
+    current_index = len(reviewed)
+    batch_start = (current_index // BATCH_SIZE) * BATCH_SIZE
+    preloaded_batch = preload_batch(batch_start)
+    group = sample_names[current_index]
+    return load_group_with_cache(group), current_index, preloaded_batch, f"Group {current_index+1} / {len(sample_names)}"
+# ==== Gradio UI ==== #
 with gr.Blocks() as demo:
+    current_index = gr.State(0)
+    preloaded_batch = gr.State([])
     gallery = gr.Gallery(label="Group", columns=4, height="auto")
     progress_text = gr.Markdown()
         like = gr.Button("👍 Like")
         dislike = gr.Button("👎 Dislike")
         download_btn = gr.Button("⬇️ Download Results")
         download_file = gr.File(label="Download CSV")
     like.click(
+        fn=lambda idx, batch: review_group("like", idx, batch),
+        inputs=[current_index, preloaded_batch],
+        outputs=[gallery, current_index, preloaded_batch, progress_text]
     )
     dislike.click(
+        fn=lambda idx, batch: review_group("dislike", idx, batch),
+        inputs=[current_index, preloaded_batch],
+        outputs=[gallery, current_index, preloaded_batch, progress_text]
     )
+    download_btn.click(fn=prepare_download, inputs=[], outputs=[download_file])
+    demo.load(fn=get_first_batch, outputs=[gallery, current_index, preloaded_batch, progress_text])
 if __name__ == "__main__":
     demo.launch(allowed_paths=["/data"])