naamaslomi commited on
Commit
364e3c8
Β·
verified Β·
1 Parent(s): 7b1b0fd

Update app.py

Browse files

changed caching method, more suitable for the large amount of subgroups we have..

Files changed (1) hide show
  1. app.py +34 -31
app.py CHANGED
@@ -7,6 +7,8 @@ import requests
7
  import pickle
8
  from tqdm import tqdm
9
  from datetime import datetime
 
 
10
 
11
  # ==== CONFIG ==== #
12
  PERSISTENT_DIR = "/data" if os.path.exists("/data") else "."
@@ -16,6 +18,9 @@ MAPPING_FILE = "file_mapping.csv"
16
  DRIVE_LINK_TEMPLATE = "https://drive.google.com/uc?id={}"
17
  CACHE_FILE = os.path.join(PERSISTENT_DIR, "groups_cache.pkl")
18
  RESET = False # Set to True to clear previous results and cache
 
 
 
19
 
20
  # ==== Optional Reset ====
21
  if RESET:
@@ -25,33 +30,32 @@ if RESET:
25
  os.remove(path)
26
  print(f"πŸ—‘οΈ Deleted {path}")
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # ==== Helpers ====
29
  def get_drive_image_url(file_name):
30
  file_id = file_dict.get(file_name)
31
  return DRIVE_LINK_TEMPLATE.format(file_id) if file_id else None
32
 
33
- def preload_groups(groups, resize=(256, 256)):
34
- if os.path.exists(CACHE_FILE):
35
- with open(CACHE_FILE, "rb") as f:
36
- return pickle.load(f)
37
-
38
- print("πŸ–ΌοΈ Preloading groups from Drive...")
39
- preloaded = []
40
- for group in tqdm(groups):
41
- try:
42
- imgs = []
43
- for file_name in group:
44
- url = get_drive_image_url(file_name)
45
- response = requests.get(url, stream=True, timeout=10)
46
- img = Image.open(response.raw).convert("RGB").resize(resize)
47
- imgs.append(img)
48
- preloaded.append(imgs)
49
- except Exception as e:
50
- print(f"Error loading group {group}: {e}")
51
- with open(CACHE_FILE, "wb") as f:
52
- pickle.dump(preloaded, f)
53
- return preloaded
54
-
55
  def load_reviewed_ids():
56
  try:
57
  reviewed = pd.read_csv(RESULTS_FILE).to_dict(orient="records")
@@ -62,13 +66,11 @@ def load_reviewed_ids():
62
  def get_remaining_groups():
63
  reviewed, reviewed_ids = load_reviewed_ids()
64
  remaining = [g for g in sample_names if tuple(g) not in reviewed_ids]
65
- filtered_preloaded = [pg for g, pg in zip(sample_names, preloaded_groups) if tuple(g) not in reviewed_ids]
66
- return reviewed, reviewed_ids, remaining, filtered_preloaded
67
 
68
  def review_group(decision, group):
69
  reviewed, reviewed_ids = load_reviewed_ids()
70
 
71
- # Save updated results
72
  reviewed.append({
73
  "group": json.dumps(group),
74
  "decision": decision
@@ -81,14 +83,15 @@ def review_group(decision, group):
81
  except Exception as e:
82
  print(f"❌ Error saving results: {e}")
83
 
84
- # Get updated remaining groups
85
- _, _, remaining, filtered_preloaded = get_remaining_groups()
86
 
87
  if remaining:
88
- return filtered_preloaded[0], remaining[0], f"Group {len(reviewed)+1} / {len(preloaded_groups)}"
 
89
  else:
90
  return [], None, "βœ… All groups reviewed!"
91
 
 
92
  def prepare_download():
93
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
94
  filename = f"review_results_{timestamp}.csv"
@@ -104,9 +107,10 @@ def prepare_download():
104
  return None
105
 
106
  def get_first_group():
107
- reviewed, _, remaining, filtered_preloaded = get_remaining_groups()
108
  if remaining:
109
- return filtered_preloaded[0], remaining[0], f"Group {len(reviewed)+1} / {len(preloaded_groups)}"
 
110
  else:
111
  return [], None, "βœ… All groups reviewed!"
112
 
@@ -114,7 +118,6 @@ def get_first_group():
114
  file_dict = pd.read_csv(MAPPING_FILE).set_index("name")["id"].to_dict()
115
  with open(GROUPS_FILE) as f:
116
  sample_names = json.load(f)
117
- preloaded_groups = preload_groups(sample_names)
118
 
119
  # ==== Gradio UI ====
120
  with gr.Blocks() as demo:
 
7
  import pickle
8
  from tqdm import tqdm
9
  from datetime import datetime
10
+ from collections import OrderedDict
11
+
12
 
13
  # ==== CONFIG ==== #
14
  PERSISTENT_DIR = "/data" if os.path.exists("/data") else "."
 
18
  DRIVE_LINK_TEMPLATE = "https://drive.google.com/uc?id={}"
19
  CACHE_FILE = os.path.join(PERSISTENT_DIR, "groups_cache.pkl")
20
  RESET = False # Set to True to clear previous results and cache
21
+ CACHE_LIMIT = 30 # Feel free to tweak this
22
+
23
+ image_cache = OrderedDict()
24
 
25
  # ==== Optional Reset ====
26
  if RESET:
 
30
  os.remove(path)
31
  print(f"πŸ—‘οΈ Deleted {path}")
32
 
33
+ def load_group_with_cache(group, resize=(256, 256)):
34
+ key = tuple(group)
35
+ if key in image_cache:
36
+ return image_cache[key]
37
+
38
+ imgs = []
39
+ for file_name in group:
40
+ try:
41
+ url = get_drive_image_url(file_name)
42
+ response = requests.get(url, stream=True, timeout=10)
43
+ img = Image.open(response.raw).convert("RGB").resize(resize)
44
+ imgs.append(img)
45
+ except Exception as e:
46
+ print(f"❌ Error loading {file_name}: {e}")
47
+ imgs.append(None)
48
+
49
+ image_cache[key] = imgs
50
+ if len(image_cache) > CACHE_LIMIT:
51
+ image_cache.popitem(last=False) # Remove oldest group
52
+
53
+ return imgs
54
  # ==== Helpers ====
55
  def get_drive_image_url(file_name):
56
  file_id = file_dict.get(file_name)
57
  return DRIVE_LINK_TEMPLATE.format(file_id) if file_id else None
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def load_reviewed_ids():
60
  try:
61
  reviewed = pd.read_csv(RESULTS_FILE).to_dict(orient="records")
 
66
  def get_remaining_groups():
67
  reviewed, reviewed_ids = load_reviewed_ids()
68
  remaining = [g for g in sample_names if tuple(g) not in reviewed_ids]
69
+ return reviewed, reviewed_ids, remaining
 
70
 
71
  def review_group(decision, group):
72
  reviewed, reviewed_ids = load_reviewed_ids()
73
 
 
74
  reviewed.append({
75
  "group": json.dumps(group),
76
  "decision": decision
 
83
  except Exception as e:
84
  print(f"❌ Error saving results: {e}")
85
 
86
+ _, _, remaining = get_remaining_groups()
 
87
 
88
  if remaining:
89
+ group = remaining[0]
90
+ return load_group_with_cache(group), group, f"Group {len(reviewed)+1} / {len(sample_names)}"
91
  else:
92
  return [], None, "βœ… All groups reviewed!"
93
 
94
+
95
  def prepare_download():
96
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
97
  filename = f"review_results_{timestamp}.csv"
 
107
  return None
108
 
109
  def get_first_group():
110
+ reviewed, _, remaining = get_remaining_groups()
111
  if remaining:
112
+ group = remaining[0]
113
+ return load_group_with_cache(group), group, f"Group {len(reviewed)+1} / {len(sample_names)}"
114
  else:
115
  return [], None, "βœ… All groups reviewed!"
116
 
 
118
  file_dict = pd.read_csv(MAPPING_FILE).set_index("name")["id"].to_dict()
119
  with open(GROUPS_FILE) as f:
120
  sample_names = json.load(f)
 
121
 
122
  # ==== Gradio UI ====
123
  with gr.Blocks() as demo: