1990two commited on
Commit
1c69333
·
verified ·
1 Parent(s): e049698

Upload 2 files

Browse files
Files changed (2) hide show
  1. hebbian_bloom.py +503 -0
  2. hebbian_bloom_docs.py +888 -0
hebbian_bloom.py ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###########################################################################################################################################
2
+ #||- - - |6.25.2025| - - - || HEBBIAN BLOOM || - - - | 1990two | - - -||#
3
+ ###########################################################################################################################################
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ import numpy as np
8
+ import math
9
+ import hashlib
10
+ from collections import defaultdict, deque
11
+ from typing import List, Dict, Tuple, Optional, Union
12
+
13
+ SAFE_MIN = -1e6
14
+ SAFE_MAX = 1e6
15
+ EPS = 1e-8
16
+
17
+ #||- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 𓅸 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -||#
18
+
19
+ def make_safe(tensor, min_val=SAFE_MIN, max_val=SAFE_MAX):
20
+ tensor = torch.where(torch.isnan(tensor), torch.tensor(0.0, device=tensor.device, dtype=tensor.dtype), tensor)
21
+ tensor = torch.where(torch.isinf(tensor), torch.tensor(max_val, device=tensor.device, dtype=tensor.dtype), tensor)
22
+ return torch.clamp(tensor, min_val, max_val)
23
+
24
+ def safe_cosine_similarity(a, b, dim=-1, eps=EPS):
25
+ if a.dtype != torch.float32:
26
+ a = a.float()
27
+ if b.dtype != torch.float32:
28
+ b = b.float()
29
+ a_norm = torch.norm(a, dim=dim, keepdim=True).clamp(min=eps)
30
+ b_norm = torch.norm(b, dim=dim, keepdim=True).clamp(min=eps)
31
+ return torch.sum(a * b, dim=dim, keepdim=True) / (a_norm * b_norm)
32
+
33
+ def item_to_vector(item, vector_dim=64):
34
+ if isinstance(item, str):
35
+ hash_obj = hashlib.md5(item.encode())
36
+ hash_bytes = hash_obj.digest()
37
+ vector = torch.tensor([b / 255.0 for b in hash_bytes], dtype=torch.float32)
38
+ if len(vector) < vector_dim:
39
+ padding = torch.zeros(vector_dim - len(vector), dtype=torch.float32)
40
+ vector = torch.cat([vector, padding])
41
+ else:
42
+ vector = vector[:vector_dim]
43
+ elif isinstance(item, (int, float)):
44
+ vector = torch.zeros(vector_dim, dtype=torch.float32)
45
+ for i in range(vector_dim // 2):
46
+ freq = 10000 ** (-2 * i / vector_dim)
47
+ vector[2*i] = math.sin(item * freq)
48
+ vector[2*i + 1] = math.cos(item * freq)
49
+ elif torch.is_tensor(item):
50
+ vector = item.flatten().float()
51
+ if len(vector) < vector_dim:
52
+ padding = torch.zeros(vector_dim - len(vector), dtype=torch.float32, device=vector.device)
53
+ vector = torch.cat([vector, padding])
54
+ else:
55
+ vector = vector[:vector_dim]
56
+ else:
57
+ hash_val = hash(str(item)) % (2**31)
58
+ gen = torch.Generator(device='cpu')
59
+ gen.manual_seed(hash_val)
60
+ vector = torch.randn(vector_dim, generator=gen, dtype=torch.float32)
61
+
62
+ return make_safe(vector)
63
+
64
+ ###########################################################################################################################################
65
+ ###############################################- - - LEARNABLE HASH FUNCTION - - -#####################################################
66
+
67
+ class LearnableHashFunction(nn.Module):
68
+ def __init__(self, input_dim, hash_output_bits=32, learning_rate=0.01):
69
+ super().__init__()
70
+ self.input_dim = input_dim
71
+ self.hash_output_bits = hash_output_bits
72
+ self.learning_rate = learning_rate
73
+
74
+ self.hash_network = nn.Sequential(
75
+ nn.Linear(input_dim, input_dim * 2),
76
+ nn.LayerNorm(input_dim * 2),
77
+ nn.Tanh(),
78
+ nn.Linear(input_dim * 2, hash_output_bits),
79
+ nn.Tanh() # Output in [-1, 1]
80
+ )
81
+
82
+ self.hebbian_weights = nn.Parameter(torch.ones(hash_output_bits) * 0.1)
83
+ self.plasticity_rate = nn.Parameter(torch.tensor(learning_rate))
84
+
85
+ self.register_buffer('activity_history', torch.zeros(100, hash_output_bits))
86
+ self.register_buffer('history_pointer', torch.tensor(0, dtype=torch.long))
87
+
88
+ self.coactivation_matrix = nn.Parameter(torch.eye(hash_output_bits) * 0.1)
89
+
90
+ self.activation_threshold = nn.Parameter(torch.zeros(hash_output_bits))
91
+
92
+ def compute_hash_activation(self, item_vector):
93
+ if item_vector.dim() == 1:
94
+ item_vector = item_vector.unsqueeze(0)
95
+ item_vector = item_vector.to(next(self.hash_network.parameters()).device, dtype=torch.float32)
96
+
97
+ base_hash = self.hash_network(item_vector).squeeze(0)
98
+
99
+ hebbian_modulation = torch.tanh(self.hebbian_weights)
100
+ modulated_hash = base_hash * hebbian_modulation
101
+
102
+ thresholded = modulated_hash - self.activation_threshold
103
+
104
+ hash_probs = torch.sigmoid(thresholded * 10.0) # Sharp sigmoid
105
+
106
+ return hash_probs, modulated_hash
107
+
108
+ def get_hash_bits(self, item_vector, deterministic=False):
109
+ hash_probs, _ = self.compute_hash_activation(item_vector)
110
+
111
+ if deterministic:
112
+ hash_bits = (hash_probs > 0.5).float()
113
+ else:
114
+ hash_bits = torch.bernoulli(hash_probs)
115
+
116
+ return hash_bits
117
+
118
+ def hebbian_update(self, item_vector, co_occurring_items=None):
119
+ hash_probs, modulated_hash = self.compute_hash_activation(item_vector)
120
+
121
+ with torch.no_grad():
122
+ ptr = int(self.history_pointer.item())
123
+ self.activity_history[ptr % self.activity_history.size(0)].copy_(hash_probs.detach())
124
+ self.history_pointer.add_(1)
125
+ self.history_pointer.remainder_(self.activity_history.size(0))
126
+
127
+ plasticity_rate = torch.clamp(self.plasticity_rate, 0.001, 0.1)
128
+
129
+ activity_strength = torch.abs(modulated_hash)
130
+ hebbian_delta = plasticity_rate * activity_strength * hash_probs
131
+
132
+ with torch.no_grad():
133
+ self.hebbian_weights.data.add_(hebbian_delta * 0.05)
134
+ self.hebbian_weights.data.clamp_(-2.0, 2.0)
135
+
136
+ if co_occurring_items is not None:
137
+ self.update_coactivation_matrix(hash_probs, co_occurring_items)
138
+
139
+ return hash_probs
140
+
141
+ def update_coactivation_matrix(self, current_activation, co_occurring_items):
142
+ with torch.no_grad():
143
+ for co_item in co_occurring_items:
144
+ co_item_vector = item_to_vector(co_item, self.input_dim).to(current_activation.device)
145
+ co_activation, _ = self.compute_hash_activation(co_item_vector)
146
+
147
+ coactivation_update = torch.outer(current_activation, co_activation)
148
+
149
+ learning_rate = 0.01
150
+ self.coactivation_matrix.data.add_(learning_rate * coactivation_update)
151
+ self.coactivation_matrix.data.clamp_(-1.0, 1.0)
152
+
153
+ def get_similar_patterns(self, item_vector, top_k=5):
154
+ current_probs, _ = self.compute_hash_activation(item_vector)
155
+
156
+ similarities = []
157
+ for i in range(self.activity_history.shape[0]):
158
+ hist_pattern = self.activity_history[i]
159
+ if torch.sum(hist_pattern) > 0: # Non-zero pattern
160
+ similarity = safe_cosine_similarity(
161
+ current_probs.unsqueeze(0),
162
+ hist_pattern.unsqueeze(0)
163
+ ).squeeze()
164
+ similarities.append((i, float(similarity.item())))
165
+
166
+ similarities.sort(key=lambda x: x[1], reverse=True)
167
+
168
+ return similarities[:top_k]
169
+
170
+ def apply_forgetting(self, forget_rate=0.99):
171
+ with torch.no_grad():
172
+ self.hebbian_weights.data.mul_(forget_rate)
173
+ self.coactivation_matrix.data.mul_(forget_rate)
174
+
175
+ ###########################################################################################################################################
176
+ ################################################- - - HEBBIAN BLOOM FILTER - - -#######################################################
177
+
178
+ class HebbianBloomFilter(nn.Module):
179
+ def __init__(self, capacity=10000, error_rate=0.01, vector_dim=64, num_hash_functions=8):
180
+ super().__init__()
181
+ self.capacity = capacity
182
+ self.error_rate = error_rate
183
+ self.vector_dim = vector_dim
184
+ self.num_hash_functions = num_hash_functions
185
+
186
+ self.bit_array_size = self._calculate_bit_array_size(capacity, error_rate)
187
+
188
+ self.hash_functions = nn.ModuleList([
189
+ LearnableHashFunction(vector_dim, hash_output_bits=32)
190
+ for _ in range(num_hash_functions)
191
+ ])
192
+
193
+ self.register_buffer('bit_array', torch.zeros(self.bit_array_size))
194
+ self.register_buffer('confidence_array', torch.zeros(self.bit_array_size))
195
+
196
+ self.stored_items = {}
197
+ self.item_vectors = {}
198
+
199
+ self.register_buffer('access_counts', torch.zeros(self.bit_array_size))
200
+ self.register_buffer('total_items_added', torch.tensor(0, dtype=torch.long))
201
+
202
+ self.association_strength = nn.Parameter(torch.tensor(0.1))
203
+ self.confidence_threshold = nn.Parameter(torch.tensor(0.5))
204
+
205
+ self.decay_rate = nn.Parameter(torch.tensor(0.999))
206
+
207
+ def _calculate_bit_array_size(self, capacity, error_rate):
208
+ return int(-capacity * math.log(error_rate) / (math.log(2) ** 2))
209
+
210
+ def _get_bit_indices(self, item_vector):
211
+ indices = []
212
+ confidences = []
213
+
214
+ for hash_func in self.hash_functions:
215
+ hash_bits = hash_func.get_hash_bits(item_vector, deterministic=True)
216
+
217
+ weights = (1 << torch.arange(len(hash_bits), device=hash_bits.device, dtype=torch.int64))
218
+ bit_index = int((hash_bits.to(dtype=torch.int64) * weights).sum().item())
219
+ bit_index = bit_index % self.bit_array_size
220
+
221
+ hash_probs, _ = hash_func.compute_hash_activation(item_vector)
222
+ confidence = torch.mean(torch.abs(hash_probs - 0.5)) * 2 # Distance from uncertain (0.5)
223
+
224
+ indices.append(bit_index)
225
+ confidences.append(confidence.item())
226
+
227
+ return indices, confidences
228
+
229
+ def add(self, item, associated_items=None):
230
+ item_vector = item_to_vector(item, self.vector_dim)
231
+
232
+ item_key = str(item)
233
+ self.stored_items[item_key] = item
234
+ self.item_vectors[item_key] = item_vector
235
+
236
+ indices, confidences = self._get_bit_indices(item_vector)
237
+
238
+ with torch.no_grad():
239
+ for idx, conf in zip(indices, confidences):
240
+ self.bit_array[idx] = 1.0
241
+ self.confidence_array[idx] = max(float(self.confidence_array[idx].item()), conf)
242
+ self.access_counts[idx] += 1
243
+
244
+ for hash_func in self.hash_functions:
245
+ hash_func.hebbian_update(item_vector, associated_items)
246
+
247
+ with torch.no_grad():
248
+ self.total_items_added.add_(1)
249
+
250
+ if associated_items:
251
+ self._learn_associations(item, associated_items)
252
+
253
+ return indices
254
+
255
+ def _learn_associations(self, primary_item, associated_items):
256
+ primary_vector = item_to_vector(primary_item, self.vector_dim)
257
+
258
+ for assoc_item in associated_items:
259
+ assoc_vector = item_to_vector(assoc_item, self.vector_dim)
260
+
261
+ similarity = safe_cosine_similarity(
262
+ primary_vector.unsqueeze(0),
263
+ assoc_vector.unsqueeze(0)
264
+ ).squeeze()
265
+
266
+ association_strength = torch.clamp(self.association_strength, 0.01, 1.0)
267
+ _ = association_strength # keep variable used to respect format
268
+
269
+ for hash_func in self.hash_functions:
270
+ if float(similarity.item()) > 0.5:
271
+ hash_func.hebbian_update(primary_vector, [assoc_item])
272
+
273
+ def query(self, item, return_confidence=False):
274
+ item_vector = item_to_vector(item, self.vector_dim)
275
+ indices, confidences = self._get_bit_indices(item_vector)
276
+
277
+ bit_checks = [self.bit_array[idx].item() > 0 for idx in indices]
278
+ is_member = all(bit_checks)
279
+
280
+ if return_confidence:
281
+ bit_confidences = [self.confidence_array[idx].item() for idx in indices]
282
+ hash_confidences = confidences
283
+
284
+ bit_conf = np.mean(bit_confidences) if bit_confidences else 0.0
285
+ hash_conf = np.mean(hash_confidences) if hash_confidences else 0.0
286
+
287
+ access_conf = np.mean([self.access_counts[idx].item() for idx in indices])
288
+ access_conf = min(access_conf / 10.0, 1.0) # Normalize
289
+
290
+ overall_confidence = (bit_conf + hash_conf + access_conf) / 3.0
291
+
292
+ return is_member, overall_confidence
293
+
294
+ return is_member
295
+
296
+ def find_similar_items(self, query_item, top_k=5):
297
+ query_vector = item_to_vector(query_item, self.vector_dim)
298
+
299
+ coact_weights = []
300
+ for hash_func in self.hash_functions:
301
+ q_act, _ = hash_func.compute_hash_activation(query_vector)
302
+ q_weight = torch.matmul(hash_func.coactivation_matrix.t(), q_act)
303
+ coact_weights.append((q_act, q_weight))
304
+
305
+ similarities = []
306
+ for item_key, item_vector in self.item_vectors.items():
307
+ base_sim = safe_cosine_similarity(
308
+ query_vector.unsqueeze(0),
309
+ item_vector.unsqueeze(0)
310
+ ).squeeze().item()
311
+
312
+ co_sim_sum = 0.0
313
+ for (hash_func, (q_act, q_weight)) in zip(self.hash_functions, coact_weights):
314
+ i_act, _ = hash_func.compute_hash_activation(item_vector)
315
+ co_sim_sum += torch.dot(q_weight, i_act).item() / max(1, len(i_act))
316
+ co_sim = co_sim_sum / max(1, len(self.hash_functions))
317
+
318
+ alpha, beta = 0.6, 0.4
319
+ score = alpha * base_sim + beta * co_sim
320
+ similarities.append((self.stored_items[item_key], score))
321
+
322
+ similarities.sort(key=lambda x: x[1], reverse=True)
323
+ return similarities[:top_k]
324
+
325
+ def get_hash_statistics(self):
326
+ stats = {
327
+ 'total_items': int(self.total_items_added.item()),
328
+ 'bit_array_utilization': (self.bit_array > 0).float().mean().item(),
329
+ 'average_confidence': self.confidence_array.mean().item(),
330
+ 'hash_function_stats': []
331
+ }
332
+
333
+ for i, hash_func in enumerate(self.hash_functions):
334
+ hash_stats = {
335
+ 'function_id': i,
336
+ 'hebbian_weights_mean': hash_func.hebbian_weights.mean().item(),
337
+ 'plasticity_rate': hash_func.plasticity_rate.item(),
338
+ 'activation_threshold_mean': hash_func.activation_threshold.mean().item()
339
+ }
340
+ stats['hash_function_stats'].append(hash_stats)
341
+
342
+ return stats
343
+
344
+ def apply_temporal_decay(self):
345
+ decay_rate = torch.clamp(self.decay_rate, 0.9, 0.999)
346
+
347
+ with torch.no_grad():
348
+ self.confidence_array.mul_(decay_rate)
349
+ self.access_counts.mul_(decay_rate)
350
+
351
+ low_confidence_mask = self.confidence_array < 0.1
352
+ self.bit_array[low_confidence_mask] = 0.0
353
+ self.confidence_array[low_confidence_mask] = 0.0
354
+
355
+ for hash_func in self.hash_functions:
356
+ hash_func.apply_forgetting(float(decay_rate.item()))
357
+
358
+ def optimize_structure(self):
359
+ with torch.no_grad():
360
+ high_access_ratio = (self.access_counts > self.access_counts.mean()).float().mean().item()
361
+ adjustment = -0.01 * high_access_ratio
362
+ for hash_func in self.hash_functions:
363
+ hash_func.activation_threshold.data.add_(adjustment)
364
+ hash_func.activation_threshold.data.clamp_(-1.0, 1.0)
365
+
366
+ ###########################################################################################################################################
367
+ ############################################- - - ASSOCIATIVE HEBBIAN BLOOM SYSTEM - - -###############################################
368
+
369
+ class AssociativeHebbianBloomSystem(nn.Module):
370
+ def __init__(self, capacity=10000, vector_dim=64, num_filters=3):
371
+ super().__init__()
372
+ self.capacity = capacity
373
+ self.vector_dim = vector_dim
374
+ self.num_filters = num_filters
375
+
376
+ self.filters = nn.ModuleList([
377
+ HebbianBloomFilter(
378
+ capacity=capacity // num_filters,
379
+ error_rate=0.01,
380
+ vector_dim=vector_dim,
381
+ num_hash_functions=6
382
+ ) for _ in range(num_filters)
383
+ ])
384
+
385
+ self.filter_selector = nn.Sequential(
386
+ nn.Linear(vector_dim, vector_dim // 2),
387
+ nn.ReLU(),
388
+ nn.Linear(vector_dim // 2, num_filters),
389
+ nn.Softmax(dim=-1)
390
+ )
391
+
392
+ self.global_association_net = nn.Sequential(
393
+ nn.Linear(vector_dim * 2, vector_dim),
394
+ nn.Tanh(),
395
+ nn.Linear(vector_dim, 1),
396
+ nn.Sigmoid()
397
+ )
398
+
399
+ self.register_buffer('global_access_count', torch.tensor(0, dtype=torch.long))
400
+
401
+ def add_item(self, item, category=None, associated_items=None):
402
+ item_vector = item_to_vector(item, self.vector_dim)
403
+
404
+ filter_weights = self.filter_selector(item_vector.unsqueeze(0)).squeeze(0)
405
+
406
+ with torch.no_grad():
407
+ loads = torch.tensor([f.total_items_added.item() / max(1, f.capacity) for f in self.filters], dtype=filter_weights.dtype, device=filter_weights.device)
408
+ filter_weights = filter_weights - 0.1 * loads
409
+
410
+ top_k_filters = min(2, self.num_filters) # Use top 2 filters
411
+ _, top_indices = torch.topk(filter_weights, top_k_filters)
412
+
413
+ added_to_filters = []
414
+ for filter_idx in top_indices:
415
+ filter_obj = self.filters[filter_idx.item()]
416
+ indices = filter_obj.add(item, associated_items)
417
+ added_to_filters.append((filter_idx.item(), indices))
418
+
419
+ with torch.no_grad():
420
+ self.global_access_count.add_(1)
421
+
422
+ return added_to_filters
423
+
424
+ def query_item(self, item, return_detailed=False):
425
+ item_vector = item_to_vector(item, self.vector_dim)
426
+
427
+ results = []
428
+ confidences = []
429
+
430
+ for i, filter_obj in enumerate(self.filters):
431
+ is_member, confidence = filter_obj.query(item, return_confidence=True)
432
+ results.append(is_member)
433
+ confidences.append(confidence)
434
+
435
+ positive_votes = sum(results)
436
+ avg_confidence = np.mean(confidences)
437
+
438
+ ensemble_decision = positive_votes > len(self.filters) // 2
439
+
440
+ if return_detailed:
441
+ return {
442
+ 'is_member': ensemble_decision,
443
+ 'confidence': avg_confidence,
444
+ 'individual_results': list(zip(results, confidences)),
445
+ 'positive_votes': positive_votes,
446
+ 'total_filters': len(self.filters)
447
+ }
448
+
449
+ return ensemble_decision
450
+
451
+ def find_associations(self, query_item, top_k=10):
452
+ all_similarities = []
453
+
454
+ for filter_obj in self.filters:
455
+ similarities = filter_obj.find_similar_items(query_item, top_k)
456
+ all_similarities.extend(similarities)
457
+
458
+ unique_items = {}
459
+ for item, similarity in all_similarities:
460
+ item_key = str(item)
461
+ if item_key in unique_items:
462
+ unique_items[item_key] = max(unique_items[item_key], similarity)
463
+ else:
464
+ unique_items[item_key] = similarity
465
+
466
+ ranked_items = sorted(unique_items.items(), key=lambda x: x[1], reverse=True)
467
+
468
+ return ranked_items[:top_k]
469
+
470
+ def system_maintenance(self):
471
+ for filter_obj in self.filters:
472
+ filter_obj.apply_temporal_decay()
473
+ filter_obj.optimize_structure()
474
+
475
+ if self.global_access_count % 1000 == 0:
476
+ self._global_optimization()
477
+
478
+ def _global_optimization(self):
479
+ print("Performing global Hebbian Bloom system optimization...")
480
+
481
+ filter_utilizations = []
482
+ for filter_obj in self.filters:
483
+ stats = filter_obj.get_hash_statistics()
484
+ utilization = stats['bit_array_utilization']
485
+ filter_utilizations.append(utilization)
486
+
487
+ def get_system_statistics(self):
488
+ """Get comprehensive system statistics."""
489
+ stats = {
490
+ 'global_access_count': int(self.global_access_count.item()),
491
+ 'num_filters': self.num_filters,
492
+ 'filter_statistics': []
493
+ }
494
+
495
+ for i, filter_obj in enumerate(self.filters):
496
+ filter_stats = filter_obj.get_hash_statistics()
497
+ filter_stats['filter_id'] = i
498
+ stats['filter_statistics'].append(filter_stats)
499
+
500
+ return stats
501
+
502
+
503
+ ###########################################################################################################################################
hebbian_bloom_docs.py ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###########################################################################################################################################
2
+ #||- - - |6.25.2025| - - - || HEBBIAN BLOOM || - - - | 1990two | - - -||#
3
+ ###########################################################################################################################################
4
+ """
5
+ Mathematical Foundation & Conceptual Documentation
6
+ -------------------------------------------------
7
+
8
+ CORE PRINCIPLE:
9
+ Combines Hebbian learning ("neurons that fire together, wire together") with
10
+ Bloom filter probabilistic membership testing to create self-organizing
11
+ associative memory systems that adapt based on usage patterns.
12
+
13
+ MATHEMATICAL FOUNDATION:
14
+ =======================
15
+
16
+ 1. HEBBIAN LEARNING RULE:
17
+ Δw_ij = η * a_i * a_j
18
+
19
+ Where:
20
+ - w_ij: connection strength between neurons i and j
21
+ - η: learning rate (plasticity parameter)
22
+ - a_i, a_j: activation levels of neurons i and j
23
+
24
+ In our context:
25
+ - Strengthens hash function weights for co-occurring patterns
26
+ - Adapts activation thresholds based on usage frequency
27
+ - Creates associative links between related items
28
+
29
+ 2. BLOOM FILTER MATHEMATICS:
30
+
31
+ Optimal bit array size: m = -n * ln(p) / (ln(2))²
32
+ Optimal hash functions: k = (m/n) * ln(2)
33
+
34
+ Where:
35
+ - n: expected number of items
36
+ - p: desired false positive rate
37
+ - m: bit array size
38
+ - k: number of hash functions
39
+
40
+ False positive probability: P_fp ≈ (1 - e^(-kn/m))^k
41
+
42
+ 3. CONFIDENCE ESTIMATION:
43
+
44
+ C_total = (C_bit + C_hash + C_access) / 3
45
+
46
+ Where:
47
+ - C_bit: confidence from bit array activation strength
48
+ - C_hash: confidence from hash activation patterns
49
+ - C_access: confidence from historical access frequency
50
+
51
+ 4. TEMPORAL DECAY:
52
+
53
+ w_t+1 = γ * w_t
54
+
55
+ Where γ ∈ [0.9, 0.999] is the decay rate, implementing forgetting.
56
+
57
+ CONCEPTUAL REASONING:
58
+ ====================
59
+
60
+ WHY HEBBIAN + BLOOM FILTERS?
61
+ - Traditional Bloom filters use static hash functions
62
+ - Real-world data has temporal and associative patterns
63
+ - Hebbian learning enables dynamic adaptation to these patterns
64
+ - Results in more efficient memory utilization and better retrieval
65
+
66
+ KEY INNOVATIONS:
67
+ 1. **Learnable Hash Functions**: Neural networks that adapt their mappings
68
+ 2. **Associative Strengthening**: Related items develop similar hash patterns
69
+ 3. **Confidence Estimation**: Multi-factor confidence scoring
70
+ 4. **Temporal Adaptation**: Gradual forgetting prevents overfitting
71
+ 5. **Ensemble Filtering**: Multiple filters with voting for robustness
72
+
73
+ APPLICATIONS:
74
+ - Caching systems that learn access patterns
75
+ - Recommendation engines with temporal adaptation
76
+ - Memory systems for neural architectures
77
+ - Similarity search with learned associations
78
+
79
+ COMPLEXITY ANALYSIS:
80
+ - Space: O(m + n*d) where m=bit array size, n=items, d=vector dimension
81
+ - Time: O(k*d) per operation where k=hash functions
82
+ - Learning: O(d²) for co-activation matrix updates
83
+ """
84
+
85
+ import torch
86
+ import torch.nn as nn
87
+ import torch.nn.functional as F
88
+ import numpy as np
89
+ import math
90
+ import hashlib
91
+ from collections import defaultdict, deque
92
+ from typing import List, Dict, Tuple, Optional, Union
93
+
94
+ SAFE_MIN = -1e6
95
+ SAFE_MAX = 1e6
96
+ EPS = 1e-8
97
+
98
+ #||- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 𓅸 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -||#
99
+
100
+ def make_safe(tensor, min_val=SAFE_MIN, max_val=SAFE_MAX):
101
+ tensor = torch.where(torch.isnan(tensor), torch.tensor(0.0, device=tensor.device, dtype=tensor.dtype), tensor)
102
+ tensor = torch.where(torch.isinf(tensor), torch.tensor(max_val, device=tensor.device, dtype=tensor.dtype), tensor)
103
+ return torch.clamp(tensor, min_val, max_val)
104
+
105
+ def safe_cosine_similarity(a, b, dim=-1, eps=EPS):
106
+ if a.dtype != torch.float32:
107
+ a = a.float()
108
+ if b.dtype != torch.float32:
109
+ b = b.float()
110
+ a_norm = torch.norm(a, dim=dim, keepdim=True).clamp(min=eps)
111
+ b_norm = torch.norm(b, dim=dim, keepdim=True).clamp(min=eps)
112
+ return torch.sum(a * b, dim=dim, keepdim=True) / (a_norm * b_norm)
113
+
114
+ def item_to_vector(item, vector_dim=64):
115
+ """Convert arbitrary item to fixed-size vector representation.
116
+
117
+ Uses different encoding strategies:
118
+ - Strings: MD5 hash-based encoding
119
+ - Numbers: Sinusoidal positional encoding
120
+ - Tensors: Flattening with padding/truncation
121
+ - Other: Deterministic hash-based random vector
122
+ """
123
+ if isinstance(item, str):
124
+ # String to vector via hashing
125
+ hash_obj = hashlib.md5(item.encode())
126
+ hash_bytes = hash_obj.digest()
127
+ # Convert bytes to float vector
128
+ vector = torch.tensor([b / 255.0 for b in hash_bytes], dtype=torch.float32)
129
+ # Pad or truncate to desired dimension
130
+ if len(vector) < vector_dim:
131
+ padding = torch.zeros(vector_dim - len(vector), dtype=torch.float32)
132
+ vector = torch.cat([vector, padding])
133
+ else:
134
+ vector = vector[:vector_dim]
135
+ elif isinstance(item, (int, float)):
136
+ # Numeric to vector via sinusoidal encoding
137
+ vector = torch.zeros(vector_dim, dtype=torch.float32)
138
+ for i in range(vector_dim // 2):
139
+ freq = 10000 ** (-2 * i / vector_dim)
140
+ vector[2*i] = math.sin(item * freq)
141
+ vector[2*i + 1] = math.cos(item * freq)
142
+ elif torch.is_tensor(item):
143
+ # Tensor to vector via projection
144
+ vector = item.flatten().float()
145
+ if len(vector) < vector_dim:
146
+ padding = torch.zeros(vector_dim - len(vector), dtype=torch.float32, device=vector.device)
147
+ vector = torch.cat([vector, padding])
148
+ else:
149
+ vector = vector[:vector_dim]
150
+ else:
151
+ # Default: random stable vector based on hash (no global RNG side-effects)
152
+ hash_val = hash(str(item)) % (2**31)
153
+ gen = torch.Generator(device='cpu')
154
+ gen.manual_seed(hash_val)
155
+ vector = torch.randn(vector_dim, generator=gen, dtype=torch.float32)
156
+
157
+ return make_safe(vector)
158
+
159
+ ###########################################################################################################################################
160
+ ###############################################- - - LEARNABLE HASH FUNCTION - - -#####################################################
161
+
162
+ class LearnableHashFunction(nn.Module):
163
+ """Neural hash function with Hebbian plasticity.
164
+
165
+ Implements learnable hash functions that adapt through Hebbian learning,
166
+ strengthening patterns that co-occur and developing associative mappings.
167
+
168
+ Mathematical Details:
169
+ - Base hash: h = tanh(W2 * tanh(W1 * x + b1) + b2)
170
+ - Hebbian modulation: h_mod = h * tanh(w_hebbian)
171
+ - Threshold adaptation: h_thresh = h_mod - θ
172
+ - Binary conversion: p = sigmoid(5 * h_thresh)
173
+ """
174
+ def __init__(self, input_dim, hash_output_bits=32, learning_rate=0.01):
175
+ super().__init__()
176
+ self.input_dim = input_dim
177
+ self.hash_output_bits = hash_output_bits
178
+ self.learning_rate = learning_rate
179
+
180
+ # Neural hash function
181
+ self.hash_network = nn.Sequential(
182
+ nn.Linear(input_dim, input_dim * 2),
183
+ nn.LayerNorm(input_dim * 2),
184
+ nn.Tanh(),
185
+ nn.Linear(input_dim * 2, hash_output_bits),
186
+ nn.Tanh() # Output in [-1, 1]
187
+ )
188
+
189
+ # Hebbian plasticity parameters
190
+ self.hebbian_weights = nn.Parameter(torch.ones(hash_output_bits) * 0.1)
191
+ self.plasticity_rate = nn.Parameter(torch.tensor(learning_rate))
192
+
193
+ # Activity history for Hebbian learning
194
+ self.register_buffer('activity_history', torch.zeros(100, hash_output_bits))
195
+ self.register_buffer('history_pointer', torch.tensor(0, dtype=torch.long))
196
+
197
+ # Co-activation tracking
198
+ self.coactivation_matrix = nn.Parameter(torch.eye(hash_output_bits) * 0.1)
199
+
200
+ # Adaptive threshold
201
+ self.activation_threshold = nn.Parameter(torch.zeros(hash_output_bits))
202
+
203
+ def compute_hash_activation(self, item_vector):
204
+ """Compute hash activation pattern for an item."""
205
+ # Ensure correct shape/dtype/device
206
+ if item_vector.dim() == 1:
207
+ item_vector = item_vector.unsqueeze(0)
208
+ item_vector = item_vector.to(next(self.hash_network.parameters()).device, dtype=torch.float32)
209
+
210
+ # Base neural hash
211
+ base_hash = self.hash_network(item_vector).squeeze(0)
212
+
213
+ # Apply Hebbian modulation
214
+ hebbian_modulation = torch.tanh(self.hebbian_weights)
215
+ modulated_hash = base_hash * hebbian_modulation
216
+
217
+ # Apply adaptive threshold
218
+ thresholded = modulated_hash - self.activation_threshold
219
+
220
+ # Convert to binary pattern (probabilistic)
221
+ hash_probs = torch.sigmoid(thresholded * 10.0) # Sharp sigmoid
222
+
223
+ return hash_probs, modulated_hash
224
+
225
+ def get_hash_bits(self, item_vector, deterministic=False):
226
+ """Get binary hash bits for an item."""
227
+ hash_probs, _ = self.compute_hash_activation(item_vector)
228
+
229
+ if deterministic:
230
+ hash_bits = (hash_probs > 0.5).float()
231
+ else:
232
+ hash_bits = torch.bernoulli(hash_probs)
233
+
234
+ return hash_bits
235
+
236
+ def hebbian_update(self, item_vector, co_occurring_items=None):
237
+ """Apply Hebbian learning rule: Δw = η * pre * post.
238
+
239
+ Strengthens connections between co-activated hash bits and updates
240
+ the co-activation matrix for associative learning.
241
+ """
242
+ hash_probs, modulated_hash = self.compute_hash_activation(item_vector)
243
+
244
+ # Store activity in history
245
+ with torch.no_grad():
246
+ ptr = int(self.history_pointer.item())
247
+ self.activity_history[ptr % self.activity_history.size(0)].copy_(hash_probs.detach())
248
+ self.history_pointer.add_(1)
249
+ self.history_pointer.remainder_(self.activity_history.size(0))
250
+
251
+ # Hebbian weight update: strengthen active bits
252
+ plasticity_rate = torch.clamp(self.plasticity_rate, 0.001, 0.1)
253
+
254
+ # Basic Hebbian rule: Δw = η * pre * post
255
+ activity_strength = torch.abs(modulated_hash)
256
+ hebbian_delta = plasticity_rate * activity_strength * hash_probs
257
+
258
+ # Update Hebbian weights
259
+ with torch.no_grad():
260
+ self.hebbian_weights.data.add_(hebbian_delta * 0.05)
261
+ self.hebbian_weights.data.clamp_(-2.0, 2.0)
262
+
263
+ # Co-activation matrix update if multiple items provided
264
+ if co_occurring_items is not None:
265
+ self.update_coactivation_matrix(hash_probs, co_occurring_items)
266
+
267
+ return hash_probs
268
+
269
+ def update_coactivation_matrix(self, current_activation, co_occurring_items):
270
+ """Update co-activation matrix based on items that occur together."""
271
+ with torch.no_grad():
272
+ for co_item in co_occurring_items:
273
+ co_item_vector = item_to_vector(co_item, self.input_dim).to(current_activation.device)
274
+ co_activation, _ = self.compute_hash_activation(co_item_vector)
275
+
276
+ # Outer product for co-activation strengthening
277
+ coactivation_update = torch.outer(current_activation, co_activation)
278
+
279
+ # Update co-activation matrix
280
+ learning_rate = 0.01
281
+ self.coactivation_matrix.data.add_(learning_rate * coactivation_update)
282
+ self.coactivation_matrix.data.clamp_(-1.0, 1.0)
283
+
284
+ def get_similar_patterns(self, item_vector, top_k=5):
285
+ """Find historically similar activation patterns."""
286
+ current_probs, _ = self.compute_hash_activation(item_vector)
287
+
288
+ # Compare with history
289
+ similarities = []
290
+ for i in range(self.activity_history.shape[0]):
291
+ hist_pattern = self.activity_history[i]
292
+ if torch.sum(hist_pattern) > 0: # Non-zero pattern
293
+ similarity = safe_cosine_similarity(
294
+ current_probs.unsqueeze(0),
295
+ hist_pattern.unsqueeze(0)
296
+ ).squeeze()
297
+ similarities.append((i, float(similarity.item())))
298
+
299
+ # Sort by similarity
300
+ similarities.sort(key=lambda x: x[1], reverse=True)
301
+
302
+ return similarities[:top_k]
303
+
304
+ def apply_forgetting(self, forget_rate=0.99):
305
+ """Apply gradual forgetting to prevent overfitting."""
306
+ with torch.no_grad():
307
+ self.hebbian_weights.data.mul_(forget_rate)
308
+ self.coactivation_matrix.data.mul_(forget_rate)
309
+
310
+ ###########################################################################################################################################
311
+ ################################################- - - HEBBIAN BLOOM FILTER - - -#######################################################
312
+
313
+ class HebbianBloomFilter(nn.Module):
314
+ """Probabilistic set membership filter with Hebbian learning.
315
+
316
+ Combines traditional Bloom filter efficiency with adaptive hash functions
317
+ that learn from usage patterns and develop associative mappings.
318
+
319
+ Key Features:
320
+ - Learnable hash functions with neural plasticity
321
+ - Confidence-based membership testing
322
+ - Associative learning between related items
323
+ - Temporal decay for forgetting old patterns
324
+ """
325
+ def __init__(self, capacity=10000, error_rate=0.01, vector_dim=64, num_hash_functions=8):
326
+ super().__init__()
327
+ self.capacity = capacity
328
+ self.error_rate = error_rate
329
+ self.vector_dim = vector_dim
330
+ self.num_hash_functions = num_hash_functions
331
+
332
+ # Calculate optimal bit array size
333
+ self.bit_array_size = self._calculate_bit_array_size(capacity, error_rate)
334
+
335
+ # Learnable hash functions
336
+ self.hash_functions = nn.ModuleList([
337
+ LearnableHashFunction(vector_dim, hash_output_bits=32)
338
+ for _ in range(num_hash_functions)
339
+ ])
340
+
341
+ # Bit array with confidence scores (not just binary)
342
+ self.register_buffer('bit_array', torch.zeros(self.bit_array_size))
343
+ self.register_buffer('confidence_array', torch.zeros(self.bit_array_size))
344
+
345
+ # Item storage for association learning
346
+ self.stored_items = {}
347
+ self.item_vectors = {}
348
+
349
+ # Usage statistics
350
+ self.register_buffer('access_counts', torch.zeros(self.bit_array_size))
351
+ self.register_buffer('total_items_added', torch.tensor(0, dtype=torch.long))
352
+
353
+ # Associative learning parameters
354
+ self.association_strength = nn.Parameter(torch.tensor(0.1))
355
+ self.confidence_threshold = nn.Parameter(torch.tensor(0.5))
356
+
357
+ # Temporal decay for forgetting
358
+ self.decay_rate = nn.Parameter(torch.tensor(0.999))
359
+
360
+ def _calculate_bit_array_size(self, capacity, error_rate):
361
+ """Calculate optimal bit array size for given capacity and error rate."""
362
+ return int(-capacity * math.log(error_rate) / (math.log(2) ** 2))
363
+
364
+ def _get_bit_indices(self, item_vector):
365
+ """Get bit indices from all hash functions for an item."""
366
+ indices = []
367
+ confidences = []
368
+
369
+ for hash_func in self.hash_functions:
370
+ hash_bits = hash_func.get_hash_bits(item_vector, deterministic=True)
371
+
372
+ # Convert hash bits to index in bit array using binary encoding -> integer -> modulo
373
+ weights = (1 << torch.arange(len(hash_bits), device=hash_bits.device, dtype=torch.int64))
374
+ bit_index = int((hash_bits.to(dtype=torch.int64) * weights).sum().item())
375
+ bit_index = bit_index % self.bit_array_size
376
+
377
+ # Compute confidence based on hash activation strength
378
+ hash_probs, _ = hash_func.compute_hash_activation(item_vector)
379
+ confidence = torch.mean(torch.abs(hash_probs - 0.5)) * 2 # Distance from uncertain (0.5)
380
+
381
+ indices.append(bit_index)
382
+ confidences.append(confidence.item())
383
+
384
+ return indices, confidences
385
+
386
+ def add(self, item, associated_items=None):
387
+ """Add item to the Hebbian Bloom filter with optional associations.
388
+
389
+ Args:
390
+ item: Item to add to the filter
391
+ associated_items: Optional list of items to associate with this item
392
+
393
+ Returns:
394
+ List of bit indices that were set for this item
395
+ """
396
+ # Convert item to vector
397
+ item_vector = item_to_vector(item, self.vector_dim)
398
+
399
+ # Store item information
400
+ item_key = str(item)
401
+ self.stored_items[item_key] = item
402
+ self.item_vectors[item_key] = item_vector
403
+
404
+ # Get bit indices and confidences
405
+ indices, confidences = self._get_bit_indices(item_vector)
406
+
407
+ # Update bit array and confidence array
408
+ with torch.no_grad():
409
+ for idx, conf in zip(indices, confidences):
410
+ self.bit_array[idx] = 1.0
411
+ self.confidence_array[idx] = max(float(self.confidence_array[idx].item()), conf)
412
+ self.access_counts[idx] += 1
413
+
414
+ # Apply Hebbian learning to hash functions
415
+ for hash_func in self.hash_functions:
416
+ hash_func.hebbian_update(item_vector, associated_items)
417
+
418
+ # Update item count
419
+ with torch.no_grad():
420
+ self.total_items_added.add_(1)
421
+
422
+ # Learn associations if provided
423
+ if associated_items:
424
+ self._learn_associations(item, associated_items)
425
+
426
+ return indices
427
+
428
+ def _learn_associations(self, primary_item, associated_items):
429
+ """Learn associations between items using Hebbian principles."""
430
+ primary_vector = item_to_vector(primary_item, self.vector_dim)
431
+
432
+ for assoc_item in associated_items:
433
+ assoc_vector = item_to_vector(assoc_item, self.vector_dim)
434
+
435
+ # Compute similarity
436
+ similarity = safe_cosine_similarity(
437
+ primary_vector.unsqueeze(0),
438
+ assoc_vector.unsqueeze(0)
439
+ ).squeeze()
440
+
441
+ # Strengthen hash functions based on similarity
442
+ association_strength = torch.clamp(self.association_strength, 0.01, 1.0)
443
+ _ = association_strength # keep variable used to respect format
444
+
445
+ for hash_func in self.hash_functions:
446
+ # If items are similar, encourage similar hash patterns
447
+ if float(similarity.item()) > 0.5:
448
+ hash_func.hebbian_update(primary_vector, [assoc_item])
449
+
450
+ def query(self, item, return_confidence=False):
451
+ """Query membership with optional confidence estimation.
452
+
453
+ Args:
454
+ item: Item to query
455
+ return_confidence: Whether to return confidence score
456
+
457
+ Returns:
458
+ Boolean membership result, optionally with confidence score
459
+ """
460
+ item_vector = item_to_vector(item, self.vector_dim)
461
+ indices, confidences = self._get_bit_indices(item_vector)
462
+
463
+ # Check if all bits are set
464
+ bit_checks = [self.bit_array[idx].item() > 0 for idx in indices]
465
+ is_member = all(bit_checks)
466
+
467
+ if return_confidence:
468
+ # Compute confidence based on multiple factors
469
+ bit_confidences = [self.confidence_array[idx].item() for idx in indices]
470
+ hash_confidences = confidences
471
+
472
+ # Combined confidence
473
+ bit_conf = np.mean(bit_confidences) if bit_confidences else 0.0
474
+ hash_conf = np.mean(hash_confidences) if hash_confidences else 0.0
475
+
476
+ # Access frequency confidence
477
+ access_conf = np.mean([self.access_counts[idx].item() for idx in indices])
478
+ access_conf = min(access_conf / 10.0, 1.0) # Normalize
479
+
480
+ overall_confidence = (bit_conf + hash_conf + access_conf) / 3.0
481
+
482
+ return is_member, overall_confidence
483
+
484
+ return is_member
485
+
486
+ def find_similar_items(self, query_item, top_k=5):
487
+ """Find items similar to query using learned associations (vector + coactivation)."""
488
+ query_vector = item_to_vector(query_item, self.vector_dim)
489
+
490
+ # Precompute query activations and coactivation weights for each hash function
491
+ coact_weights = []
492
+ for hash_func in self.hash_functions:
493
+ q_act, _ = hash_func.compute_hash_activation(query_vector)
494
+ # act_q^T M act_i = dot(M^T act_q, act_i)
495
+ q_weight = torch.matmul(hash_func.coactivation_matrix.t(), q_act)
496
+ coact_weights.append((q_act, q_weight))
497
+
498
+ similarities = []
499
+ for item_key, item_vector in self.item_vectors.items():
500
+ # Base cosine similarity in item space
501
+ base_sim = safe_cosine_similarity(
502
+ query_vector.unsqueeze(0),
503
+ item_vector.unsqueeze(0)
504
+ ).squeeze().item()
505
+
506
+ # Coactivation similarity averaged over hash functions
507
+ co_sim_sum = 0.0
508
+ for (hash_func, (q_act, q_weight)) in zip(self.hash_functions, coact_weights):
509
+ i_act, _ = hash_func.compute_hash_activation(item_vector)
510
+ co_sim_sum += torch.dot(q_weight, i_act).item() / max(1, len(i_act))
511
+ co_sim = co_sim_sum / max(1, len(self.hash_functions))
512
+
513
+ # Blend scores (alpha vector, beta coactivation)
514
+ alpha, beta = 0.6, 0.4
515
+ score = alpha * base_sim + beta * co_sim
516
+ similarities.append((self.stored_items[item_key], score))
517
+
518
+ similarities.sort(key=lambda x: x[1], reverse=True)
519
+ return similarities[:top_k]
520
+
521
+ def get_hash_statistics(self):
522
+ """Get statistics about hash function learning."""
523
+ stats = {
524
+ 'total_items': int(self.total_items_added.item()),
525
+ 'bit_array_utilization': (self.bit_array > 0).float().mean().item(),
526
+ 'average_confidence': self.confidence_array.mean().item(),
527
+ 'hash_function_stats': []
528
+ }
529
+
530
+ for i, hash_func in enumerate(self.hash_functions):
531
+ hash_stats = {
532
+ 'function_id': i,
533
+ 'hebbian_weights_mean': hash_func.hebbian_weights.mean().item(),
534
+ 'plasticity_rate': hash_func.plasticity_rate.item(),
535
+ 'activation_threshold_mean': hash_func.activation_threshold.mean().item()
536
+ }
537
+ stats['hash_function_stats'].append(hash_stats)
538
+
539
+ return stats
540
+
541
+ def apply_temporal_decay(self):
542
+ """Apply temporal decay to implement forgetting."""
543
+ decay_rate = torch.clamp(self.decay_rate, 0.9, 0.999)
544
+
545
+ with torch.no_grad():
546
+ self.confidence_array.mul_(decay_rate)
547
+ self.access_counts.mul_(decay_rate)
548
+
549
+ # Remove bits with very low confidence
550
+ low_confidence_mask = self.confidence_array < 0.1
551
+ self.bit_array[low_confidence_mask] = 0.0
552
+ self.confidence_array[low_confidence_mask] = 0.0
553
+
554
+ # Apply forgetting to hash functions
555
+ for hash_func in self.hash_functions:
556
+ hash_func.apply_forgetting(float(decay_rate.item()))
557
+
558
+ def optimize_structure(self):
559
+ """Optimize the filter structure based on usage patterns."""
560
+ with torch.no_grad():
561
+ # Adjust thresholds based on access patterns (coarse global heuristic)
562
+ high_access_ratio = (self.access_counts > self.access_counts.mean()).float().mean().item()
563
+ adjustment = -0.01 * high_access_ratio
564
+ for hash_func in self.hash_functions:
565
+ hash_func.activation_threshold.data.add_(adjustment)
566
+ hash_func.activation_threshold.data.clamp_(-1.0, 1.0)
567
+
568
+ ###########################################################################################################################################
569
+ ############################################- - - ASSOCIATIVE HEBBIAN BLOOM SYSTEM - - -###############################################
570
+
571
+ class AssociativeHebbianBloomSystem(nn.Module):
572
+ """Ensemble of Hebbian Bloom filters with meta-learning.
573
+
574
+ Combines multiple Hebbian Bloom filters with learned routing to create
575
+ a robust, scalable associative memory system with ensemble decision making.
576
+
577
+ Features:
578
+ - Multiple specialized filters with learned routing
579
+ - Ensemble voting for robust membership decisions
580
+ - Global association learning across filters
581
+ - Automatic system maintenance and optimization
582
+ """
583
+ def __init__(self, capacity=10000, vector_dim=64, num_filters=3):
584
+ super().__init__()
585
+ self.capacity = capacity
586
+ self.vector_dim = vector_dim
587
+ self.num_filters = num_filters
588
+
589
+ # Multiple Hebbian Bloom filters for ensemble behavior
590
+ self.filters = nn.ModuleList([
591
+ HebbianBloomFilter(
592
+ capacity=capacity // num_filters,
593
+ error_rate=0.01,
594
+ vector_dim=vector_dim,
595
+ num_hash_functions=6
596
+ ) for _ in range(num_filters)
597
+ ])
598
+
599
+ # Meta-learning for filter selection
600
+ self.filter_selector = nn.Sequential(
601
+ nn.Linear(vector_dim, vector_dim // 2),
602
+ nn.ReLU(),
603
+ nn.Linear(vector_dim // 2, num_filters),
604
+ nn.Softmax(dim=-1)
605
+ )
606
+
607
+ # Global association learning
608
+ self.global_association_net = nn.Sequential(
609
+ nn.Linear(vector_dim * 2, vector_dim),
610
+ nn.Tanh(),
611
+ nn.Linear(vector_dim, 1),
612
+ nn.Sigmoid()
613
+ )
614
+
615
+ # System statistics
616
+ self.register_buffer('global_access_count', torch.tensor(0, dtype=torch.long))
617
+
618
+ def add_item(self, item, category=None, associated_items=None):
619
+ """Add item to the most appropriate filter(s)."""
620
+ item_vector = item_to_vector(item, self.vector_dim)
621
+
622
+ # Determine which filter(s) to use
623
+ filter_weights = self.filter_selector(item_vector.unsqueeze(0)).squeeze(0)
624
+
625
+ # Light load-balancing penalty to avoid starving filters
626
+ with torch.no_grad():
627
+ loads = torch.tensor([f.total_items_added.item() / max(1, f.capacity) for f in self.filters], dtype=filter_weights.dtype, device=filter_weights.device)
628
+ filter_weights = filter_weights - 0.1 * loads
629
+
630
+ # Add to filters based on weights (top-k selection)
631
+ top_k_filters = min(2, self.num_filters) # Use top 2 filters
632
+ _, top_indices = torch.topk(filter_weights, top_k_filters)
633
+
634
+ added_to_filters = []
635
+ for filter_idx in top_indices:
636
+ filter_obj = self.filters[filter_idx.item()]
637
+ indices = filter_obj.add(item, associated_items)
638
+ added_to_filters.append((filter_idx.item(), indices))
639
+
640
+ # Update global statistics
641
+ with torch.no_grad():
642
+ self.global_access_count.add_(1)
643
+
644
+ return added_to_filters
645
+
646
+ def query_item(self, item, return_detailed=False):
647
+ """Query item across all filters with ensemble confidence."""
648
+ item_vector = item_to_vector(item, self.vector_dim)
649
+
650
+ results = []
651
+ confidences = []
652
+
653
+ for i, filter_obj in enumerate(self.filters):
654
+ is_member, confidence = filter_obj.query(item, return_confidence=True)
655
+ results.append(is_member)
656
+ confidences.append(confidence)
657
+
658
+ # Ensemble decision
659
+ positive_votes = sum(results)
660
+ avg_confidence = np.mean(confidences)
661
+
662
+ # Final decision based on majority vote and confidence
663
+ ensemble_decision = positive_votes > len(self.filters) // 2
664
+
665
+ if return_detailed:
666
+ return {
667
+ 'is_member': ensemble_decision,
668
+ 'confidence': avg_confidence,
669
+ 'individual_results': list(zip(results, confidences)),
670
+ 'positive_votes': positive_votes,
671
+ 'total_filters': len(self.filters)
672
+ }
673
+
674
+ return ensemble_decision
675
+
676
+ def find_associations(self, query_item, top_k=10):
677
+ """Find associated items across all filters."""
678
+ all_similarities = []
679
+
680
+ for filter_obj in self.filters:
681
+ similarities = filter_obj.find_similar_items(query_item, top_k)
682
+ all_similarities.extend(similarities)
683
+
684
+ # Remove duplicates and re-rank
685
+ unique_items = {}
686
+ for item, similarity in all_similarities:
687
+ item_key = str(item)
688
+ if item_key in unique_items:
689
+ unique_items[item_key] = max(unique_items[item_key], similarity)
690
+ else:
691
+ unique_items[item_key] = similarity
692
+
693
+ # Sort by similarity
694
+ ranked_items = sorted(unique_items.items(), key=lambda x: x[1], reverse=True)
695
+
696
+ return ranked_items[:top_k]
697
+
698
+ def system_maintenance(self):
699
+ # Apply temporal decay to all filters
700
+ for filter_obj in self.filters:
701
+ filter_obj.apply_temporal_decay()
702
+ filter_obj.optimize_structure()
703
+
704
+ # System-level optimization every 1000 accesses
705
+ if self.global_access_count % 1000 == 0:
706
+ self._global_optimization()
707
+
708
+ def _global_optimization(self):
709
+ print("Performing global Hebbian Bloom system optimization...")
710
+
711
+ # Rebalance filter usage if needed
712
+ filter_utilizations = []
713
+ for filter_obj in self.filters:
714
+ stats = filter_obj.get_hash_statistics()
715
+ utilization = stats['bit_array_utilization']
716
+ filter_utilizations.append(utilization)
717
+
718
+ # Could implement filter rebalancing here if needed
719
+
720
+ def get_system_statistics(self):
721
+ stats = {
722
+ 'global_access_count': int(self.global_access_count.item()),
723
+ 'num_filters': self.num_filters,
724
+ 'filter_statistics': []
725
+ }
726
+
727
+ for i, filter_obj in enumerate(self.filters):
728
+ filter_stats = filter_obj.get_hash_statistics()
729
+ filter_stats['filter_id'] = i
730
+ stats['filter_statistics'].append(filter_stats)
731
+
732
+ return stats
733
+
734
+ ###########################################################################################################################################
735
+ ####################################################- - - DEMO AND TESTING - - -#######################################################
736
+
737
+ def test_hebbian_bloom():
738
+ print("Testing Hebbian Bloom Filter - Self-Organizing Probabilistic Memory")
739
+ print("=" * 85)
740
+
741
+ # Create Hebbian Bloom Filter system
742
+ system = AssociativeHebbianBloomSystem(
743
+ capacity=1000,
744
+ vector_dim=32,
745
+ num_filters=3
746
+ )
747
+
748
+ print(f"Created Hebbian Bloom System:")
749
+ print(f" - Capacity: 1000 items")
750
+ print(f" - Vector dimension: 32")
751
+ print(f" - Number of filters: 3")
752
+ print(f" - Hash functions per filter: 6")
753
+
754
+ # Test with related items to demonstrate Hebbian learning
755
+ print("\nAdding related items to demonstrate associative learning...")
756
+
757
+ # Add some related items
758
+ fruits = ["apple", "banana", "orange", "grape", "strawberry"]
759
+ colors = ["red", "yellow", "orange", "purple", "red"]
760
+
761
+ for fruit, color in zip(fruits, colors):
762
+ system.add_item(fruit, associated_items=[color, "fruit"])
763
+ system.add_item(color, associated_items=[fruit, "color"])
764
+
765
+ # Add some numbers
766
+ numbers = [1, 2, 3, 4, 5]
767
+ for num in numbers:
768
+ system.add_item(num, associated_items=["number", "digit"])
769
+
770
+ print(f"Added {len(fruits)} fruits with colors and {len(numbers)} numbers")
771
+
772
+ # Test membership queries
773
+ print("\nTesting membership queries...")
774
+
775
+ test_items = ["apple", "banana", "pineapple", 1, 3, 7, "red", "blue"]
776
+
777
+ for item in test_items:
778
+ result = system.query_item(item, return_detailed=True)
779
+ print(f" '{item}': {result['is_member']} (confidence: {result['confidence']:.3f}, votes: {result['positive_votes']}/{result['total_filters']})")
780
+
781
+ # Test associative retrieval
782
+ print("\nTesting associative retrieval...")
783
+
784
+ query_items = ["apple", "red", 2]
785
+ for query in query_items:
786
+ associations = system.find_associations(query, top_k=5)
787
+ print(f"\nItems associated with '{query}':")
788
+ for i, (item, similarity) in enumerate(associations[:3]):
789
+ print(f" {i+1}. {item} (similarity: {similarity:.3f})")
790
+
791
+ # Test Hebbian adaptation
792
+ print("\nTesting Hebbian adaptation with repeated associations...")
793
+
794
+ # Repeatedly associate "apple" with "healthy"
795
+ for _ in range(5):
796
+ system.add_item("apple", associated_items=["healthy", "nutrition"])
797
+
798
+ # Check if "healthy" becomes more associated with "apple"
799
+ updated_associations = system.find_associations("apple", top_k=5)
800
+ print("Updated associations for 'apple' after repeated 'healthy' associations:")
801
+ for item, similarity in updated_associations[:3]:
802
+ print(f" {item}: {similarity:.3f}")
803
+
804
+ # System statistics
805
+ stats = system.get_system_statistics()
806
+ print(f"\nSystem Statistics:")
807
+ print(f" - Total accesses: {stats['global_access_count']}")
808
+
809
+ for filter_stats in stats['filter_statistics']:
810
+ print(f" Filter {filter_stats['filter_id']}:")
811
+ print(f" - Items added: {filter_stats['total_items']}")
812
+ print(f" - Bit utilization: {filter_stats['bit_array_utilization']:.3f}")
813
+ print(f" - Average confidence: {filter_stats['average_confidence']:.3f}")
814
+
815
+ # Test temporal decay
816
+ print("\nApplying temporal decay...")
817
+ system.system_maintenance()
818
+
819
+ print("\nHebbian Bloom Filter test completed!")
820
+ print("✓ Self-organizing hash functions with Hebbian learning")
821
+ print("✓ Associative memory formation")
822
+ print("✓ Adaptive confidence estimation")
823
+ print("✓ Temporal decay and forgetting mechanisms")
824
+ print("✓ Ensemble filtering for robust membership testing")
825
+
826
+ return True
827
+
828
+ def hebbian_learning_demo():
829
+ """Demonstrate Hebbian learning in action."""
830
+ print("\n" + "="*60)
831
+ print("HEBBIAN LEARNING DEMONSTRATION")
832
+ print("="*60)
833
+
834
+ # Create simple single filter for clear demonstration
835
+ hb_filter = HebbianBloomFilter(capacity=100, vector_dim=16, num_hash_functions=4)
836
+
837
+ # Add items with strong associations
838
+ print("Phase 1: Adding animal-habitat associations")
839
+
840
+ animals_habitats = [
841
+ ("lion", ["savanna", "africa", "predator"]),
842
+ ("tiger", ["jungle", "asia", "predator"]),
843
+ ("penguin", ["antarctica", "ice", "bird"]),
844
+ ("shark", ["ocean", "water", "predator"]),
845
+ ("eagle", ["mountain", "sky", "bird"])
846
+ ]
847
+
848
+ for animal, habitats in animals_habitats:
849
+ hb_filter.add(animal, associated_items=habitats)
850
+ for habitat in habitats:
851
+ hb_filter.add(habitat, associated_items=[animal])
852
+
853
+ # Test initial associations
854
+ print("\nInitial associations:")
855
+ similar_to_lion = hb_filter.find_similar_items("lion", top_k=3)
856
+ for item, similarity in similar_to_lion:
857
+ print(f" lion -> {item}: {similarity:.3f}")
858
+
859
+ # Strengthen specific associations through repetition
860
+ print("\nPhase 2: Strengthening lion-savanna association through repetition")
861
+
862
+ for _ in range(10):
863
+ hb_filter.add("lion", associated_items=["savanna"])
864
+ hb_filter.add("savanna", associated_items=["lion"])
865
+
866
+ # Test strengthened associations
867
+ print("\nStrengthened associations:")
868
+ similar_to_lion = hb_filter.find_similar_items("lion", top_k=3)
869
+ for item, similarity in similar_to_lion:
870
+ print(f" lion -> {item}: {similarity:.3f}")
871
+
872
+ # Show hash function adaptation
873
+ stats = hb_filter.get_hash_statistics()
874
+ print(f"\nHash function adaptation statistics:")
875
+ for hash_stat in stats['hash_function_stats'][:2]: # Show first 2
876
+ print(f" Hash function {hash_stat['function_id']}:")
877
+ print(f" - Hebbian weights mean: {hash_stat['hebbian_weights_mean']:.4f}")
878
+ print(f" - Plasticity rate: {hash_stat['plasticity_rate']:.4f}")
879
+
880
+ print("\n Hebbian learning successfully demonstrated")
881
+ print(" Repeated associations strengthen neural pathways in hash functions")
882
+
883
+ if __name__ == "__main__":
884
+ test_hebbian_bloom()
885
+ hebbian_learning_demo()
886
+
887
+ ###########################################################################################################################################
888
+ ###########################################################################################################################################