Spaces:

domdp
/

Anonimizzazione

Running

App Files Files Community

domdp commited on May 20

Commit

8d470cb

verified ·

1 Parent(s): 0a7ed84

Upload 3 files

Browse files

Initial deployment from Colab

Files changed (3) hide show

readme.md +55 -0
requirements.txt +15 -0
stan+regex.py +1584 -0

readme.md ADDED Viewed

	@@ -0,0 +1,55 @@

+---
+title: Sistema Ibrido di Anonimizzazione Dati
+emoji: 🔒
+colorFrom: blue
+colorTo: red
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# 🔒 Sistema Ibrido di Anonimizzazione Dati
+Un sistema avanzato per l'identificazione e anonimizzazione di dati sensibili in testi italiani, che combina:
+- **Stanford Deidentifier**: Modello transformer specializzato
+- **Microsoft Presidio**: Framework enterprise per data privacy
+- **Regex personalizzate**: Pattern specifici per formati italiani
+## 🎯 Caratteristiche
+- ✅ Riconoscimento di **10+ tipi di entità** (persone, CF, P.IVA, IBAN, telefoni, email, ecc.)
+- 🎛️ **Controllo granulare** per ogni tipo di entità
+- 🔄 **3 modalità di anonimizzazione**: sostituzione, oscuramento, pseudonimizzazione
+- ⚙️ **Parametri avanzati** personalizzabili
+- 📊 **Statistiche dettagliate** di rilevamento
+## 🚀 Utilizzo
+1. Inserisci il testo contenente dati sensibili
+2. Seleziona i tipi di entità da anonimizzare
+3. Configura il metodo di anonimizzazione
+4. Visualizza i risultati con entità evidenziate
+## 🔬 Tecnologie
+- **Gradio** per l'interfaccia web
+- **spaCy** per il processamento NLP
+- **Transformers** per i modelli deep learning
+- **Presidio** per la privacy dei dati
+## 📋 Esempi supportati
+- **Persone**: Mario Rossi, Dott. Giovanni Bianchi
+- **Codici Fiscali**: RSSMRC80D15H501V
+- **Partite IVA**: IT12345678901
+- **IBAN**: IT60X0542811101000000123456
+- **Telefoni**: +39 333-123-4567
+- **Email**: [email protected]
+- **Targhe**: AB123CD
+---
+Basato sulla ricerca comparativa di Small Language Models per l'anonimizzazione di dati in lingua italiana.

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+gradio>=4.0.0
+presidio-analyzer[transformers]
+presidio-anonymizer
+spacy>=3.8.0
+spacy-transformers
+spacy-alignments
+transformers>=4.21.0
+torch>=1.12.0
+accelerate>=0.20.0
+pandas>=1.3.0
+numpy>=1.21.0
+# Modelli spaCy (installati automaticamente)
+https://github.com/explosion/spacy-models/releases/download/it_core_news_sm-3.8.0/it_core_news_sm-3.8.0-py3-none-any.whl
+https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl

stan+regex.py ADDED Viewed

	@@ -0,0 +1,1584 @@

+# =========================================================
+# CELLA 1: IMPORT E SETUP INIZIALE
+# =========================================================
+import os
+import re
+import gradio as gr
+import pandas as pd
+import json
+from typing import List, Dict, Tuple, Any
+import spacy
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+# Import Presidio
+from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, PatternRecognizer
+from presidio_analyzer.pattern_recognizer import Pattern
+from presidio_analyzer.nlp_engine import NlpEngine, NlpEngineProvider
+from presidio_analyzer.context_aware_enhancers import LemmaContextAwareEnhancer
+from presidio_anonymizer import AnonymizerEngine
+from presidio_anonymizer.entities import OperatorConfig
+# Configurazione base
+print("✅ Import completati!")
+# =========================================================
+# CELLA 2: CONFIGURAZIONE RICONOSCITORI PERSONALIZZATI (CORRETTA)
+# =========================================================
+def create_italian_recognizers():
+    """
+    Crea riconoscitori personalizzati per il contesto italiano
+    """
+    recognizers = []
+    # CODICE FISCALE
+    cf_patterns = [Pattern(name="codice fiscale",
+                          regex=r"\b[A-Z]{6}[0-9]{2}[A-Z][0-9]{2}[A-Z][0-9]{3}[A-Z]\b",
+                          score=0.9)]
+    cf_recognizer = PatternRecognizer(
+        supported_entity="CODICE_FISCALE",
+        patterns=cf_patterns,
+        context=["codice", "fiscale", "cf", "c.f.", "cod.fisc.", "codice fiscale"],
+        supported_language="en"  # Aggiungiamo il supporto per l'inglese
+    )
+    recognizers.append(cf_recognizer)
+    # PARTITA IVA
+    piva_patterns = [Pattern(name="partita iva",
+                           regex=r"\b(IT)?[0-9]{11}\b",
+                           score=0.85)]
+    piva_recognizer = PatternRecognizer(
+        supported_entity="PARTITA_IVA",
+        patterns=piva_patterns,
+        context=["partita", "iva", "p.iva", "p. iva", "piva", "partita iva"],
+        supported_language="en"
+    )
+    recognizers.append(piva_recognizer)
+    # IBAN ITALIANO
+    iban_patterns = [Pattern(name="iban",
+                           regex=r"\b[A-Z]{2}[0-9]{2}[A-Z0-9]{4}[0-9]{7}([A-Z0-9]?){0,16}\b",
+                           score=0.9)]
+    iban_recognizer = PatternRecognizer(
+        supported_entity="IBAN_CODE",
+        patterns=iban_patterns,
+        context=["iban", "bonifico", "bancario", "conto", "pagamento", "IBAN"],
+        supported_language="en"
+    )
+    recognizers.append(iban_recognizer)
+    # TARGA ITALIANA
+    targa_patterns = [Pattern(name="targa",
+                            regex=r"\b[A-Z]{2}[0-9]{3}[A-Z]{2}\b",
+                            score=0.85)]
+    targa_recognizer = PatternRecognizer(
+        supported_entity="TARGA",
+        patterns=targa_patterns,
+        context=["targa", "auto", "veicolo", "automobile", "macchina"],
+        supported_language="en"
+    )
+    recognizers.append(targa_recognizer)
+    # TELEFONO ITALIANO
+    telefono_patterns = [
+        Pattern(name="telefono (con prefisso)", regex=r"\b\+39\s?[0-9]{10}\b", score=0.9),
+        Pattern(name="telefono (cellulare)", regex=r"\b[3][0-9]{9}\b", score=0.8),
+        Pattern(name="telefono (fisso)", regex=r"\b0[0-9]{1,3}[-\s]?[0-9]{7}\b", score=0.7),
+        Pattern(name="telefono (generico)", regex=r"\b[0-9]{10}\b", score=0.6)
+    ]
+    telefono_recognizer = PatternRecognizer(
+        supported_entity="PHONE_NUMBER",
+        patterns=telefono_patterns,
+        context=["telefono", "cellulare", "tel", "chiamare", "contattare", "mobile"],
+        supported_language="en"
+    )
+    recognizers.append(telefono_recognizer)
+    # DATA ITALIANA
+    data_patterns = [
+        Pattern(name="data (dd/mm/yyyy)", regex=r"\b[0-3][0-9]/[0-1][0-9]/[1-2][0-9]{3}\b", score=0.9),
+        Pattern(name="data (dd-mm-yyyy)", regex=r"\b[0-3][0-9]-[0-1][0-9]-[1-2][0-9]{3}\b", score=0.9),
+        Pattern(name="data (d/m/yyyy)", regex=r"\b[1-9]/[1-9]/[1-2][0-9]{3}\b", score=0.8),
+        Pattern(name="data (dd/mm/yy)", regex=r"\b[0-3][0-9]/[0-1][0-9]/[0-9]{2}\b", score=0.8)
+    ]
+    data_recognizer = PatternRecognizer(
+        supported_entity="DATE_TIME",
+        patterns=data_patterns,
+        context=["nato", "nata", "data di nascita", "nasce", "data", "nascita"],
+        supported_language="en"
+    )
+    recognizers.append(data_recognizer)
+    print(f"✅ Creati {len(recognizers)} riconoscitori personalizzati")
+    return recognizers
+# Crea i riconoscitori
+italian_recognizers = create_italian_recognizers()
+# =========================================================
+# CELLA 3: STANFORD COME RECOGNIZER SEPARATO
+# =========================================================
+from presidio_analyzer import EntityRecognizer, RecognizerResult
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+import torch
+class StanfordRecognizer(EntityRecognizer):
+    def __init__(self):
+        self.supported_entities = ["PERSON", "ORGANIZATION", "LOCATION", "DATE_TIME", "AGE", "PHONE_NUMBER", "EMAIL"]
+        self.supported_language = "en"
+        # Carica il modello Stanford
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained("StanfordAIMI/stanford-deidentifier-base")
+            self.model = AutoModelForTokenClassification.from_pretrained("StanfordAIMI/stanford-deidentifier-base")
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            self.model.to(self.device)
+            self.model.eval()
+            # Crea una pipeline per gestire più facilmente il modello
+            self.pipeline = pipeline(
+                "token-classification",
+                model=self.model,
+                tokenizer=self.tokenizer,
+                device=0 if torch.cuda.is_available() else -1,
+                aggregation_strategy="max"
+            )
+            print("✅ Modello Stanford caricato con successo!")
+        except Exception as e:
+            print(f"⚠️ Errore nel caricamento del modello Stanford: {e}")
+            self.pipeline = None
+        super().__init__(supported_entities=self.supported_entities, supported_language=self.supported_language)
+    def analyze(self, text, entities, nlp_artifacts):
+        """
+        Analizza il testo e restituisce RecognizerResult
+        """
+        results = []
+        if self.pipeline is None:
+            return results
+        try:
+            # Usa la pipeline per processare il testo
+            outputs = self.pipeline(text)
+            for output in outputs:
+                # Mappa le etichette del modello Stanford a quelle di Presidio
+                stanford_to_presidio = {
+                    "PATIENT": "PERSON",
+                    "STAFF": "PERSON",
+                    "HOSP": "ORGANIZATION",
+                    "HOSPITAL": "ORGANIZATION",
+                    "AGE": "AGE",
+                    "DATE": "DATE_TIME",
+                    "PHONE": "PHONE_NUMBER",
+                    "PER": "PERSON",
+                    "LOC": "LOCATION",
+                    "ORG": "ORGANIZATION",
+                    "PERSON": "PERSON",
+                    "LOCATION": "LOCATION",
+                    "ORGANIZATION": "ORGANIZATION"
+                }
+                entity_type = output.get("entity_group", "")
+                # Rimuovi prefissi B-, I- se presenti
+                if entity_type.startswith(("B-", "I-")):
+                    entity_type = entity_type[2:]
+                # Mappa all'entità Presidio
+                presidio_entity = stanford_to_presidio.get(entity_type, entity_type)
+                # Crea RecognizerResult se l'entità è supportata
+                if presidio_entity in self.supported_entities:
+                    result = RecognizerResult(
+                        entity_type=presidio_entity,
+                        start=output["start"],
+                        end=output["end"],
+                        score=output["score"]
+                    )
+                    results.append(result)
+        except Exception as e:
+            print(f"Errore nell'analisi Stanford: {e}")
+        return results
+    def load(self):
+        pass  # Il caricamento è fatto nel costruttore
+# Crea un'istanza del recognizer Stanford
+stanford_recognizer = StanfordRecognizer()
+# Se l'analyzer è già stato creato, aggiungi il recognizer Stanford
+if 'analyzer' in globals():
+    try:
+        analyzer.registry.add_recognizer(stanford_recognizer)
+        print("✅ Stanford recognizer aggiunto a Presidio")
+    except Exception as e:
+        print(f"⚠️ Errore nell'aggiunta di Stanford recognizer: {e}")
+# =========================================================
+# CELLA 4: SISTEMA DI REGEX FALLBACK
+# =========================================================
+class RegexFallbackEngine:
+    def __init__(self):
+        self.patterns = {
+            "PERSON": [
+                r"\b[A-Z][a-z]+\s+[A-Z][a-z]+\b",  # Nome Cognome
+                r"\b(?:Sig\.|Dott\.|Dr\.|Ing\.)\s+[A-Z][a-z]+\s+[A-Z][a-z]+\b",  # Titolo + Nome Cognome
+            ],
+            "CODICE_FISCALE": [
+                r"\b[A-Z]{6}\d{2}[A-Z]\d{2}[A-Z]\d{3}[A-Z]\b",
+            ],
+            "PARTITA_IVA": [
+                r"\b(?:IT)?\d{11}\b",
+            ],
+            "DATE_TIME": [
+                r"\b\d{1,2}[/\-\.]\d{1,2}[/\-\.]\d{2,4}\b",
+                r"\b\d{1,2}\s+(?:gennaio|febbraio|marzo|aprile|maggio|giugno|luglio|agosto|settembre|ottobre|novembre|dicembre)\s+\d{4}\b",
+            ],
+            "PHONE_NUMBER": [
+                r"\b\+39\s?\d{10}\b",
+                r"\b\d{10}\b",
+                r"\b0\d{1,3}[-\.\s]?\d{7}\b",
+            ],
+            "EMAIL": [
+                r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
+            ],
+            "IBAN_CODE": [
+                r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}[A-Z0-9]{12}\b",
+            ],
+            "TARGA": [
+                r"\b[A-Z]{2}\d{3}[A-Z]{2}\b",
+            ]
+        }
+    def analyze(self, text):
+        """
+        Analizza il testo utilizzando regex
+        """
+        results = []
+        for entity_type, patterns in self.patterns.items():
+            for pattern in patterns:
+                for match in re.finditer(pattern, text):
+                    results.append({
+                        "entity_type": entity_type,
+                        "start": match.start(),
+                        "end": match.end(),
+                        "text": match.group(),
+                        "score": 0.9  # Assegna un punteggio fisso per regex
+                    })
+        return results
+# Inizializza il sistema di regex fallback
+regex_engine = RegexFallbackEngine()
+# =========================================================
+# CELLA 5: CONFIGURAZIONE PRESIDIO SEMPLIFICATA
+# =========================================================
+# Per ora, creiamo una configurazione base senza Stanford, che possiamo aggiungere come recognizer separato
+from presidio_analyzer import AnalyzerEngine
+from presidio_analyzer.predefined_recognizers import (
+    PhoneRecognizer, EmailRecognizer, CreditCardRecognizer, IbanRecognizer
+)
+def setup_presidio_simple():
+    """
+    Configura Presidio con setup semplificato
+    """
+    try:
+        # Crea l'analyzer engine con configurazione di base
+        analyzer = AnalyzerEngine()
+        # Aggiungi riconoscitori predefiniti
+        try:
+            analyzer.registry.add_recognizer(PhoneRecognizer())
+        except:
+            pass
+        try:
+            analyzer.registry.add_recognizer(EmailRecognizer())
+        except:
+            pass
+        try:
+            analyzer.registry.add_recognizer(CreditCardRecognizer())
+        except:
+            pass
+        try:
+            analyzer.registry.add_recognizer(IbanRecognizer())
+        except:
+            pass
+        # Aggiungi riconoscitori personalizzati se definiti
+        if 'italian_recognizers' in globals():
+            for recognizer in italian_recognizers:
+                try:
+                    analyzer.registry.add_recognizer(recognizer)
+                except Exception as e:
+                    print(f"Errore aggiungendo recognizer: {e}")
+        # Crea l'anonymizer engine
+        anonymizer = AnonymizerEngine()
+        print("✅ Presidio configurato con successo!")
+        return analyzer, anonymizer
+    except Exception as e:
+        print(f"❌ Errore nella configurazione di Presidio: {e}")
+        # Fallback a configurazione minima
+        analyzer = AnalyzerEngine()
+        anonymizer = AnonymizerEngine()
+        print("⚠️ Usando configurazione di default")
+        return analyzer, anonymizer
+# Inizializza Presidio
+analyzer, anonymizer = setup_presidio_simple()
+# =========================================================
+# CELLA 6: SISTEMA DI ANONIMIZZAZIONE IBRIDO (CORRETTO)
+# =========================================================
+class HybridAnonymizer:
+    def __init__(self, presidio_analyzer, regex_engine, anonymizer):
+        self.presidio_analyzer = presidio_analyzer
+        self.regex_engine = regex_engine
+        self.anonymizer = anonymizer
+    def analyze_text(self, text, enable_stanford=True, enable_regex=True):
+        """
+        Analizza il testo usando tutti i metodi disponibili
+        """
+        all_entities = []
+        # Presidio ora include Stanford tramite il recognizer aggiunto
+        presidio_results = self.presidio_analyzer.analyze(
+            text=text,
+            language="en",
+            entities=None,  # Usa tutti i recognizer disponibili
+            allow_list=None
+        )
+        # Converti risultati Presidio
+        for result in presidio_results:
+            all_entities.append({
+                "entity_type": result.entity_type,
+                "start": result.start,
+                "end": result.end,
+                "text": text[result.start:result.end],
+                "score": result.score,
+                "source": "presidio"
+            })
+        # Aggiungi regex se abilitato
+        if enable_regex:
+            try:
+                regex_entities = self.regex_engine.analyze(text)
+                for entity in regex_entities:
+                    all_entities.append({
+                        "entity_type": entity["entity_type"],
+                        "start": entity["start"],
+                        "end": entity["end"],
+                        "text": entity["text"],
+                        "score": entity["score"],
+                        "source": "regex"
+                    })
+            except Exception as e:
+                print(f"Errore in Regex: {e}")
+        return self._merge_overlapping_entities(all_entities)
+    def _merge_overlapping_entities(self, entities):
+        if not entities:
+            return []
+        entities.sort(key=lambda x: (x["start"], -x["score"]))
+        merged = []
+        for entity in entities:
+            if not merged or merged[-1]["end"] <= entity["start"]:
+                merged.append(entity)
+            elif entity["score"] > merged[-1]["score"]:
+                merged[-1] = entity
+        return merged
+    def anonymize_text(self, text, entities, anonymization_type="replace"):
+        """
+        Anonimizza il testo basandosi sulle entità trovate con diversi metodi
+        Tipi di anonimizzazione:
+        - replace: sostituisce con tag (es. <PERSON>)
+        - redact: oscura con asterischi (es. ******)
+        - pseudonymize: sostituisce con valori fittizi (es. Persona1)
+        """
+        if not entities:
+            return text
+        if anonymization_type == "replace":
+            # Usa Presidio per sostituire le entità con tag
+            presidio_results = []
+            for entity in entities:
+                from presidio_analyzer import RecognizerResult
+                presidio_results.append(
+                    RecognizerResult(
+                        entity_type=entity["entity_type"],
+                        start=entity["start"],
+                        end=entity["end"],
+                        score=entity["score"]
+                    )
+                )
+            # Configura l'anonimizzazione con tag
+            operators = {
+                "PERSON": OperatorConfig("replace", {"new_value": "<PERSON>"}),
+                "LOCATION": OperatorConfig("replace", {"new_value": "<LOCATION>"}),
+                "ORGANIZATION": OperatorConfig("replace", {"new_value": "<ORGANIZATION>"}),
+                "DATE_TIME": OperatorConfig("replace", {"new_value": "<DATE>"}),
+                "PHONE_NUMBER": OperatorConfig("replace", {"new_value": "<PHONE>"}),
+                "EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "<EMAIL>"}),
+                "IBAN_CODE": OperatorConfig("replace", {"new_value": "<IBAN>"}),
+                "CODICE_FISCALE": OperatorConfig("replace", {"new_value": "<CF>"}),
+                "PARTITA_IVA": OperatorConfig("replace", {"new_value": "<PIVA>"}),
+                "TARGA": OperatorConfig("replace", {"new_value": "<TARGA>"}),
+                "AGE": OperatorConfig("replace", {"new_value": "<AGE>"})
+            }
+            anonymized_result = self.anonymizer.anonymize(
+                text=text,
+                analyzer_results=presidio_results,
+                operators=operators
+            )
+            return anonymized_result.text
+        elif anonymization_type == "redact":
+            # Sostituisce ogni entità con asterischi
+            anonymized = text
+            # Ordina le entità per posizione (dall'ultima alla prima) per non alterare gli indici
+            sorted_entities = sorted(entities, key=lambda x: x["start"], reverse=True)
+            for entity in sorted_entities:
+                # Genera asterischi della stessa lunghezza dell'entità
+                asterisks = "*" * (entity["end"] - entity["start"])
+                # Sostituisci il testo
+                anonymized = anonymized[:entity["start"]] + asterisks + anonymized[entity["end"]:]
+            return anonymized
+        elif anonymization_type == "pseudonymize":
+            # Sostituisce ogni entità con un valore fittizio
+            anonymized = text
+            # Dizionario per tenere traccia dei valori fittizi generati
+            pseudonyms = {}
+            type_counts = {}
+            # Ordina le entità per posizione (dall'ultima alla prima) per non alterare gli indici
+            sorted_entities = sorted(entities, key=lambda x: x["start"], reverse=True)
+            for entity in sorted_entities:
+                entity_type = entity["entity_type"]
+                entity_text = entity["text"]
+                # Se questa entità è già stata sostituita in precedenza, usa lo stesso valore
+                if entity_text in pseudonyms:
+                    new_value = pseudonyms[entity_text]
+                else:
+                    # Inizializza il contatore se non esiste
+                    if entity_type not in type_counts:
+                        type_counts[entity_type] = 0
+                    # Incrementa il contatore
+                    type_counts[entity_type] += 1
+                    # Genera un valore fittizio basato sul tipo di entità
+                    if entity_type == "PERSON":
+                        new_value = f"Persona{type_counts[entity_type]}"
+                    elif entity_type == "LOCATION":
+                        new_value = f"Luogo{type_counts[entity_type]}"
+                    elif entity_type == "ORGANIZATION":
+                        new_value = f"Organizzazione{type_counts[entity_type]}"
+                    elif entity_type == "DATE_TIME":
+                        new_value = f"Data{type_counts[entity_type]}"
+                    elif entity_type == "PHONE_NUMBER":
+                        new_value = f"+39-XXX-XXX-{1000+type_counts[entity_type]}"
+                    elif entity_type == "EMAIL_ADDRESS" or entity_type == "EMAIL":
+                        new_value = f"email{type_counts[entity_type]}@esempio.com"
+                    elif entity_type == "IBAN_CODE":
+                        new_value = f"IT00X0000000000000{type_counts[entity_type]}"
+                    elif entity_type == "CODICE_FISCALE" or entity_type == "CF":
+                        new_value = f"ABCDEF00G00H000{type_counts[entity_type]}"
+                    elif entity_type == "PARTITA_IVA" or entity_type == "PIVA":
+                        new_value = f"IT0000000000{type_counts[entity_type]}"
+                    elif entity_type == "TARGA":
+                        new_value = f"XX000{type_counts[entity_type]}"
+                    elif entity_type == "AGE":
+                        new_value = f"XX"
+                    else:
+                        new_value = f"{entity_type}{type_counts[entity_type]}"
+                    # Memorizza il valore generato per riusi futuri
+                    pseudonyms[entity_text] = new_value
+                # Sostituisci il testo
+                anonymized = anonymized[:entity["start"]] + new_value + anonymized[entity["end"]:]
+            return anonymized
+        else:
+            # Tipo di anonimizzazione non supportato, usa replace come fallback
+            print(f"Tipo di anonimizzazione non supportato: {anonymization_type}, usando 'replace'")
+            return self.anonymize_text(text, entities, "replace")
+# Inizializza il sistema ibrido
+hybrid_anonymizer = HybridAnonymizer(analyzer, regex_engine, anonymizer)
+# =========================================================
+# CELLA 7: UTILITÀ DI VISUALIZZAZIONE
+# =========================================================
+# Colori per i diversi tipi di entità
+ENTITY_COLORS = {
+    "PERSON": "#ff7f50",       # Corallo
+    "LOCATION": "#6495ed",     # Azzurro
+    "ORGANIZATION": "#9acd32", # Verde
+    "DATE_TIME": "#ffa500",    # Arancione
+    "PHONE_NUMBER": "#da70d6", # Orchidea
+    "EMAIL_ADDRESS": "#dda0dd", # Plum
+    "IBAN_CODE": "#1e90ff",    # Blu
+    "CODICE_FISCALE": "#ff69b4", # Rosa
+    "PARTITA_IVA": "#ff69b4",  # Rosa
+    "TARGA": "#bdb76b"         # Kaki
+}
+def highlight_entities_html(text, entities):
+    """
+    Evidenzia le entità trovate nel testo con colori
+    """
+    if not entities:
+        return text
+    # Prepara HTML con span colorati
+    chars = list(text)
+    spans = []
+    for entity in entities:
+        entity_type = entity["entity_type"]
+        source = entity.get("source", "unknown")
+        color = ENTITY_COLORS.get(entity_type, "#cccccc")
+        score = int(entity["score"] * 100)
+        # Tooltip con informazioni dettagliate
+        tooltip = f"{entity_type} ({score}%) - detected by {source}"
+        spans.append({
+            "index": entity["start"],
+            "content": f'<span style="background-color: {color}; padding: 2px; border-radius: 3px;" title="{tooltip}">',
+            "is_opening": True
+        })
+        spans.append({
+            "index": entity["end"],
+            "content": '</span>',
+            "is_opening": False
+        })
+    # Ordina i span (chiusura prima dell'apertura se stesso indice)
+    spans.sort(key=lambda x: (x["index"], not x["is_opening"]))
+    # Inserisce i tag span nel testo
+    offset = 0
+    for span in spans:
+        adjusted_index = span["index"] + offset
+        chars.insert(adjusted_index, span["content"])
+        offset += 1
+    return "".join(chars)
+def generate_statistics(entities):
+    """
+    Genera statistiche sulle entità rilevate
+    """
+    stats = {
+        "total_entities": len(entities),
+        "by_type": {},
+        "by_source": {},
+        "avg_confidence": 0,
+        "all_detected_types": set()
+    }
+    for entity in entities:
+        entity_type = entity["entity_type"]
+        source = entity.get("source", "unknown")
+        score = entity["score"]
+        # Count by type
+        stats["by_type"][entity_type] = stats["by_type"].get(entity_type, 0) + 1
+        # Count by source
+        stats["by_source"][source] = stats["by_source"].get(source, 0) + 1
+        # Track all detected types
+        stats["all_detected_types"].add(entity_type)
+        # Update average confidence
+        stats["avg_confidence"] += score
+    if entities:
+        stats["avg_confidence"] /= len(entities)
+    stats["all_detected_types"] = list(stats["all_detected_types"])
+    return stats
+# =========================================================
+# CELLA 8: INTERFACCIA GRADIO (MODIFICHE)
+# =========================================================
+def process_text_gradio(text, anonymization_type, use_stanford, use_regex, confidence_threshold):
+    """
+    Processa il testo con l'interfaccia Gradio
+    """
+    # Verifica che il testo sia una stringa
+    if not isinstance(text, str):
+        return "Errore: input deve essere una stringa", "", "Errore: tipo di input non valido"
+    if not text.strip():
+        return "", "", "Nessun testo fornito"
+    try:
+        # Analizza il testo
+        entities = hybrid_anonymizer.analyze_text(
+            text,
+            enable_stanford=use_stanford,
+            enable_regex=use_regex
+        )
+        # Filtra per confidenza
+        filtered_entities = [e for e in entities if e["score"] >= confidence_threshold]
+        # Genera HTML evidenziato
+        highlighted_html = highlight_entities_html(text, filtered_entities)
+        # Anonimizza il testo
+        anonymized_text = hybrid_anonymizer.anonymize_text(text, filtered_entities, anonymization_type)
+        # Genera statistiche
+        stats = generate_statistics(filtered_entities)
+        # Formatta le statistiche per Gradio
+        stats_str = f"""
+**Statistiche rilevamento:**
+- Entità totali trovate: {stats['total_entities']}
+- Confidenza media: {stats['avg_confidence']:.2%}
+- Tipi di entità rilevati: {', '.join(sorted(stats['all_detected_types']))}
+**Per tipo:**
+{chr(10).join([f"- {k}: {v}" for k, v in stats['by_type'].items()])}
+**Per sorgente:**
+{chr(10).join([f"- {k}: {v}" for k, v in stats['by_source'].items()])}
+"""
+        return highlighted_html, anonymized_text, stats_str
+    except Exception as e:
+        import traceback
+        error_msg = f"Errore: {str(e)}\n{traceback.format_exc()}"
+        return error_msg, "", error_msg
+# =========================================================
+# CELLA 9: INTERFACCIA DI CONTROLLO ENTITÀ (VERSIONE COMPLETA)
+# =========================================================
+def process_text_with_entity_control(
+    text,
+    anonymization_type,
+    use_stanford,
+    use_regex,
+    confidence_threshold,
+    person_enabled,
+    location_enabled,
+    organization_enabled,
+    date_time_enabled,
+    phone_number_enabled,
+    email_enabled,
+    iban_enabled,
+    codice_fiscale_enabled,
+    partita_iva_enabled,
+    targa_enabled,
+    # Nuovi parametri di anonimizzazione
+    tag_format="<TAG>",
+    redact_char="*",
+    preserve_length=False,
+    # Anonimizzazione per tipo specifico
+    person_anon_method=None,
+    location_anon_method=None,
+    organization_anon_method=None,
+    date_time_anon_method=None,
+    phone_anon_method=None,
+    email_anon_method=None,
+    iban_anon_method=None,
+    cf_anon_method=None,
+    piva_anon_method=None,
+    targa_anon_method=None,
+    # Soglie di confidenza specifiche per tipo
+    person_threshold=None,
+    location_threshold=None,
+    organization_threshold=None,
+    date_time_threshold=None,
+    phone_threshold=None,
+    email_threshold=None,
+    iban_threshold=None,
+    cf_threshold=None,
+    piva_threshold=None,
+    targa_threshold=None,
+    # Formati dei pseudonimi
+    person_pseudo_format="Persona{num}",
+    location_pseudo_format="Luogo{num}",
+    organization_pseudo_format="Organizzazione{num}",
+    date_pseudo_format="Data{num}",
+    phone_pseudo_format="+39-XXX-XXX-{num}",
+    email_pseudo_format="email{num}@esempio.com",
+    iban_pseudo_format="IT00X0000000000000{num}",
+    cf_pseudo_format="ABCDEF00G00H000{num}",
+    piva_pseudo_format="IT0000000000{num}",
+    targa_pseudo_format="XX000{num}"
+):
+    """
+    Processa il testo con controllo sulle entità da estrarre/anonimizzare
+    e con parametri avanzati di anonimizzazione
+    """
+    # Verifica che il testo sia una stringa
+    if not isinstance(text, str):
+        return "Errore: input deve essere una stringa", "", "Errore: tipo di input non valido"
+    if not text.strip():
+        return "", "", "Nessun testo fornito"
+    try:
+        # Crea una lista di entità abilitate e mappa dei metodi per tipo
+        enabled_entities = []
+        entity_anon_methods = {}
+        entity_thresholds = {}
+        entity_pseudo_formats = {}
+        # Mappa degli entity types, abilitazione, metodi, soglie e formati
+        entity_config = [
+            ("PERSON", person_enabled, person_anon_method, person_threshold, person_pseudo_format),
+            ("LOCATION", location_enabled, location_anon_method, location_threshold, location_pseudo_format),
+            ("ORGANIZATION", organization_enabled, organization_anon_method, organization_threshold, organization_pseudo_format),
+            ("DATE_TIME", date_time_enabled, date_time_anon_method, date_time_threshold, date_pseudo_format),
+            ("PHONE_NUMBER", phone_number_enabled, phone_anon_method, phone_threshold, phone_pseudo_format),
+            ("EMAIL", email_enabled, email_anon_method, email_threshold, email_pseudo_format),
+            ("EMAIL_ADDRESS", email_enabled, email_anon_method, email_threshold, email_pseudo_format),
+            ("IBAN_CODE", iban_enabled, iban_anon_method, iban_threshold, iban_pseudo_format),
+            ("CODICE_FISCALE", codice_fiscale_enabled, cf_anon_method, cf_threshold, cf_pseudo_format),
+            ("PARTITA_IVA", partita_iva_enabled, piva_anon_method, piva_threshold, piva_pseudo_format),
+            ("TARGA", targa_enabled, targa_anon_method, targa_threshold, targa_pseudo_format)
+        ]
+        # Popola gli array basandosi sulla configurazione
+        for entity_type, is_enabled, anon_method, threshold, pseudo_format in entity_config:
+            if is_enabled:
+                enabled_entities.append(entity_type)
+                # Se è specificato un metodo specifico per questo tipo, usalo
+                if anon_method:
+                    entity_anon_methods[entity_type] = anon_method
+                # Se è specificata una soglia specifica per questo tipo, usala
+                if threshold is not None:
+                    entity_thresholds[entity_type] = threshold
+                # Salva il formato del pseudonimo per questo tipo
+                entity_pseudo_formats[entity_type] = pseudo_format
+        # Se nessuna entità è abilitata, mostra il testo originale
+        if not enabled_entities:
+            return text, text, "Nessuna entità selezionata per l'anonimizzazione"
+        # Analizza il testo
+        entities = hybrid_anonymizer.analyze_text(
+            text,
+            enable_stanford=use_stanford,
+            enable_regex=use_regex
+        )
+        # Filtra per confidenza e per tipo di entità abilitato, usando soglie specifiche per tipo se disponibili
+        filtered_entities = []
+        for e in entities:
+            if e["entity_type"] in enabled_entities:
+                # Determina la soglia da usare
+                entity_threshold = entity_thresholds.get(e["entity_type"], confidence_threshold)
+                if e["score"] >= entity_threshold:
+                    filtered_entities.append(e)
+        # Genera HTML evidenziato
+        highlighted_html = highlight_entities_html(text, filtered_entities)
+        # Anonimizza il testo con i parametri avanzati
+        anonymized_text = advanced_anonymize_text(
+            text,
+            filtered_entities,
+            anonymization_type,
+            tag_format=tag_format,
+            redact_char=redact_char,
+            preserve_length=preserve_length,
+            entity_anon_methods=entity_anon_methods,
+            entity_pseudo_formats=entity_pseudo_formats
+        )
+        # Genera statistiche
+        stats = generate_statistics(filtered_entities)
+        # Formatta le statistiche per Gradio
+        stats_str = f"""
+**Statistiche rilevamento:**
+- Entità totali trovate: {stats['total_entities']}
+- Confidenza media: {stats['avg_confidence']:.2%}
+- Tipi di entità rilevati: {', '.join(sorted(stats['all_detected_types']))}
+**Per tipo:**
+{chr(10).join([f"- {k}: {v}" for k, v in stats['by_type'].items()])}
+**Per sorgente:**
+{chr(10).join([f"- {k}: {v}" for k, v in stats['by_source'].items()])}
+**Parametri di anonimizzazione:**
+- Metodo globale: {anonymization_type}
+- Formato tag: {tag_format}
+- Preserva lunghezza: {"Sì" if preserve_length else "No"}
+"""
+        # Aggiungi informazioni sui metodi specifici
+        if entity_anon_methods:
+            stats_str += "\n**Metodi specifici per tipo:**\n"
+            stats_str += chr(10).join([f"- {k}: {v}" for k, v in entity_anon_methods.items()])
+        # Aggiungi informazioni sulle soglie specifiche
+        if entity_thresholds:
+            stats_str += "\n\n**Soglie di confidenza specifiche:**\n"
+            stats_str += chr(10).join([f"- {k}: {v}" for k, v in entity_thresholds.items()])
+        return highlighted_html, anonymized_text, stats_str
+    except Exception as e:
+        import traceback
+        error_msg = f"Errore: {str(e)}\n{traceback.format_exc()}"
+        return error_msg, "", error_msg
+def advanced_anonymize_text(text, entities, global_anon_type, tag_format="<TAG>", redact_char="*",
+                           preserve_length=False, entity_anon_methods={}, entity_pseudo_formats={}):
+    """
+    Versione avanzata dell'anonimizzazione che supporta più parametri
+    """
+    if not entities:
+        return text
+    # Ordina le entità per posizione (dall'ultima alla prima) per non alterare gli indici
+    sorted_entities = sorted(entities, key=lambda x: x["start"], reverse=True)
+    anonymized = text
+    # Dizionario per tenere traccia dei valori sostituiti
+    pseudonyms = {}
+    type_counts = {}
+    for entity in sorted_entities:
+        entity_type = entity["entity_type"]
+        entity_text = entity["text"]
+        entity_start = entity["start"]
+        entity_end = entity["end"]
+        # Determina il metodo di anonimizzazione per questa entità specifica
+        anon_type = entity_anon_methods.get(entity_type, global_anon_type)
+        if anon_type == "replace":
+            # Formatta il tag in base al formato scelto
+            if tag_format == "<TAG>":
+                new_value = f"<{entity_type}>"
+            elif tag_format == "[TAG]":
+                new_value = f"[{entity_type}]"
+            elif tag_format == "{TAG}":
+                new_value = f"{{{entity_type}}}"
+            elif tag_format == "TAG_":
+                new_value = f"{entity_type}_"
+            else:
+                new_value = f"<{entity_type}>"
+        elif anon_type == "redact":
+            # Redact con il carattere scelto, mantenendo o meno la lunghezza originale
+            if preserve_length:
+                new_value = redact_char * (entity_end - entity_start)
+            else:
+                new_value = redact_char * 5  # Lunghezza fissa
+        elif anon_type == "pseudonymize":
+            # Pseudonimizzazione con nomi fittizi
+            if entity_text in pseudonyms:
+                new_value = pseudonyms[entity_text]
+            else:
+                # Inizializza il contatore se non esiste
+                if entity_type not in type_counts:
+                    type_counts[entity_type] = 0
+                # Incrementa il contatore
+                type_counts[entity_type] += 1
+                # Ottieni il formato del pseudonimo per questo tipo di entità
+                pseudo_format = entity_pseudo_formats.get(entity_type, "")
+                # Genera un valore fittizio basato sul tipo e formato
+                if pseudo_format:
+                    try:
+                        # Prova a formattare usando il formato specificato
+                        new_value = pseudo_format.format(
+                            num=type_counts[entity_type],
+                            type=entity_type,
+                            orig=entity_text[:1] if entity_text else "X"
+                        )
+                    except Exception:
+                        # Fallback in caso di errore di formattazione
+                        new_value = f"{entity_type}{type_counts[entity_type]}"
+                else:
+                    # Formati predefiniti per ogni tipo se non specificato
+                    if entity_type == "PERSON":
+                        new_value = f"Persona{type_counts[entity_type]}"
+                    elif entity_type == "LOCATION":
+                        new_value = f"Luogo{type_counts[entity_type]}"
+                    elif entity_type == "ORGANIZATION":
+                        new_value = f"Organizzazione{type_counts[entity_type]}"
+                    elif entity_type == "DATE_TIME":
+                        new_value = f"Data{type_counts[entity_type]}"
+                    elif entity_type == "PHONE_NUMBER":
+                        new_value = f"+39-XXX-XXX-{1000+type_counts[entity_type]}"
+                    elif entity_type == "EMAIL_ADDRESS" or entity_type == "EMAIL":
+                        new_value = f"email{type_counts[entity_type]}@esempio.com"
+                    elif entity_type == "IBAN_CODE":
+                        new_value = f"IT00X0000000000000{type_counts[entity_type]}"
+                    elif entity_type == "CODICE_FISCALE" or entity_type == "CF":
+                        new_value = f"ABCDEF00G00H000{type_counts[entity_type]}"
+                    elif entity_type == "PARTITA_IVA" or entity_type == "PIVA":
+                        new_value = f"IT0000000000{type_counts[entity_type]}"
+                    elif entity_type == "TARGA":
+                        new_value = f"XX000{type_counts[entity_type]}"
+                    else:
+                        new_value = f"{entity_type}{type_counts[entity_type]}"
+                # Memorizza il valore per riusi futuri
+                pseudonyms[entity_text] = new_value
+                # Adatta la lunghezza se necessario
+                if preserve_length and len(new_value) < (entity_end - entity_start):
+                    new_value = new_value.ljust(entity_end - entity_start)
+                elif preserve_length and len(new_value) > (entity_end - entity_start):
+                    # Troncamento con ellipsis
+                    new_value = new_value[:entity_end - entity_start - 1] + "…"
+        else:
+            # Tipo sconosciuto, usa il metodo replace come fallback
+            new_value = f"<{entity_type}>"
+        # Sostituisci il testo
+        anonymized = anonymized[:entity_start] + new_value + anonymized[entity_end:]
+    return anonymized
+# Esempi per la nuova interfaccia
+entity_control_examples = [
+    [
+        "Il signor Mario Rossi, nato il 15/04/1980, CF: RSSMRC80D15H501V, residente in Via Roma 123, Milano, possiede la partita IVA IT12345678901.",
+        "replace",
+        True,
+        False,
+        0.5,
+        True,  # person_enabled
+        True,  # location_enabled
+        True,  # organization_enabled
+        True,  # date_time_enabled
+        True,  # phone_number_enabled
+        True,  # email_enabled
+        True,  # iban_enabled
+        True,  # codice_fiscale_enabled
+        True,  # partita_iva_enabled
+        True,  # targa_enabled
+    ],
+    [
+        "Per contattare il cliente Giovanni Bianchi utilizzare l'email [email protected] o il numero +39 333-123-4567.",
+        "replace",
+        False,
+        True,
+        0.6,
+        True,  # person_enabled
+        False,  # location_enabled
+        False,  # organization_enabled
+        False,  # date_time_enabled
+        True,  # phone_number_enabled
+        True,  # email_enabled
+        False,  # iban_enabled
+        False,  # codice_fiscale_enabled
+        False,  # partita_iva_enabled
+        False,  # targa_enabled
+    ],
+    [
+        "Il veicolo targato AB123CD appartiene a Maria Verdi, titolare del conto bancario IT12K1234567890123456789012.",
+        "replace",
+        True,
+        True,
+        0.7,
+        True,  # person_enabled
+        False,  # location_enabled
+        False,  # organization_enabled
+        False,  # date_time_enabled
+        False,  # phone_number_enabled
+        False,  # email_enabled
+        True,  # iban_enabled
+        False,  # codice_fiscale_enabled
+        False,  # partita_iva_enabled
+        True,  # targa_enabled
+    ]
+]
+def process_text_with_entity_control_wrapper(
+    text,
+    anonymization_type,
+    use_stanford,
+    use_regex,
+    confidence_threshold,
+    person_enabled,
+    location_enabled,
+    organization_enabled,
+    date_time_enabled,
+    phone_number_enabled,
+    email_enabled,
+    iban_enabled,
+    codice_fiscale_enabled,
+    partita_iva_enabled,
+    targa_enabled
+):
+    """
+    Funzione wrapper che passa i parametri predefiniti ai nuovi parametri della funzione originale
+    """
+    return process_text_with_entity_control(
+        text=text,
+        anonymization_type=anonymization_type,
+        use_stanford=use_stanford,
+        use_regex=use_regex,
+        confidence_threshold=confidence_threshold,
+        person_enabled=person_enabled,
+        location_enabled=location_enabled,
+        organization_enabled=organization_enabled,
+        date_time_enabled=date_time_enabled,
+        phone_number_enabled=phone_number_enabled,
+        email_enabled=email_enabled,
+        iban_enabled=iban_enabled,
+        codice_fiscale_enabled=codice_fiscale_enabled,
+        partita_iva_enabled=partita_iva_enabled,
+        targa_enabled=targa_enabled,
+        # Valori predefiniti per i nuovi parametri
+        tag_format="<TAG>",
+        redact_char="*",
+        preserve_length=False,
+        # Metodi specifici per tipo (tutti None = usa metodo globale)
+        person_anon_method=None,
+        location_anon_method=None,
+        organization_anon_method=None,
+        date_time_anon_method=None,
+        phone_anon_method=None,
+        email_anon_method=None,
+        iban_anon_method=None,
+        cf_anon_method=None,
+        piva_anon_method=None,
+        targa_anon_method=None,
+        # Soglie specifiche (tutte None = usa soglia globale)
+        person_threshold=None,
+        location_threshold=None,
+        organization_threshold=None,
+        date_time_threshold=None,
+        phone_threshold=None,
+        email_threshold=None,
+        iban_threshold=None,
+        cf_threshold=None,
+        piva_threshold=None,
+        targa_threshold=None,
+        # Formati predefiniti per i pseudonimi
+        person_pseudo_format="Persona{num}",
+        location_pseudo_format="Luogo{num}",
+        organization_pseudo_format="Organizzazione{num}",
+        date_pseudo_format="Data{num}",
+        phone_pseudo_format="+39-XXX-XXX-{num}",
+        email_pseudo_format="email{num}@esempio.com",
+        iban_pseudo_format="IT00X0000000000000{num}",
+        cf_pseudo_format="ABCDEF00G00H000{num}",
+        piva_pseudo_format="IT0000000000{num}",
+        targa_pseudo_format="XX000{num}"
+    )
+# Crea l'interfaccia Gradio con controllo entità
+demo_advanced = gr.Interface(
+    fn=process_text_with_entity_control_wrapper,  # Usa la funzione wrapper
+    inputs=[
+        gr.Textbox(
+            label="Testo da analizzare",
+            lines=5,
+            placeholder="Inserisci il testo contenente dati sensibili...",
+            value="Il signor Marco Rossi, nato il 15/04/1978, CF: RSSMRC78D15H501T, può essere contattato al numero +39 333-1234567 o all'email [email protected]."
+        ),
+        gr.Radio(
+            ["replace", "redact", "pseudonymize"],
+            label="Tipo di anonimizzazione",
+            value="replace"
+        ),
+        gr.Checkbox(
+            label="Usa modello Stanford",
+            value=True
+        ),
+        gr.Checkbox(
+            label="Usa Regex Fallback",
+            value=True
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.5,
+            step=0.05,
+            label="Soglia di confidenza minima"
+        ),
+        # Controlli per i tipi di entità
+        gr.Checkbox(label="Persone (PERSON)", value=True),
+        gr.Checkbox(label="Luoghi (LOCATION)", value=True),
+        gr.Checkbox(label="Organizzazioni (ORGANIZATION)", value=True),
+        gr.Checkbox(label="Date (DATE_TIME)", value=True),
+        gr.Checkbox(label="Numeri di telefono (PHONE_NUMBER)", value=True),
+        gr.Checkbox(label="Email (EMAIL)", value=True),
+        gr.Checkbox(label="IBAN (IBAN_CODE)", value=True),
+        gr.Checkbox(label="Codici Fiscali (CODICE_FISCALE)", value=True),
+        gr.Checkbox(label="Partite IVA (PARTITA_IVA)", value=True),
+        gr.Checkbox(label="Targhe (TARGA)", value=True)
+    ],
+    outputs=[
+        gr.HTML(label="Testo con entità evidenziate"),
+        gr.Textbox(label="Testo anonimizzato", lines=5),
+        gr.Markdown(label="Statistiche di rilevamento")
+    ],
+    title="🔒 Sistema Ibrido di Anonimizzazione Dati - Controllo Entità",
+    description="Analizza e anonimizza testi selezionando i tipi di entità da processare.\n"
+                "I diversi colori indicano i tipi di entità rilevate.",
+    examples=entity_control_examples,
+    theme=gr.themes.Soft(),
+    allow_flagging="never"
+)
+# Avvia l'interfaccia migliorata
+# demo_advanced.launch(share=True, debug=True)
+# =========================================================
+# CELLA 10: INTERFACCIA AVANZATA CON PARAMETRI DI ANONIMIZZAZIONE
+# =========================================================
+with gr.Blocks(theme=gr.themes.Soft()) as demo_blocks:
+    gr.Markdown("# 🔒 Sistema Ibrido di Anonimizzazione Dati")
+    gr.Markdown("Analizza e anonimizza testi in italiano con controllo avanzato dei parametri.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            text_input = gr.Textbox(
+                label="Testo da analizzare",
+                lines=6,
+                placeholder="Inserisci il testo contenente dati sensibili...",
+                value="Il signor Marco Rossi, nato il 15/04/1978, CF: RSSMRC78D15H501T, può essere contattato al numero +39 333-1234567 o all'email [email protected]."
+            )
+            with gr.Tabs():
+                with gr.TabItem("Impostazioni Base"):
+                    with gr.Row():
+                        with gr.Column():
+                            anon_type = gr.Radio(
+                                ["replace", "redact", "pseudonymize"],
+                                label="Tipo di anonimizzazione globale",
+                                value="replace"
+                            )
+                            confidence = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                value=0.5,
+                                step=0.05,
+                                label="Soglia di confidenza globale"
+                            )
+                        with gr.Column():
+                            use_stanford = gr.Checkbox(label="Usa modello Stanford", value=True)
+                            use_regex = gr.Checkbox(label="Usa Regex Fallback", value=True)
+                with gr.TabItem("Parametri di Anonimizzazione"):
+                    with gr.Row():
+                        with gr.Column():
+                            tag_format = gr.Radio(
+                                ["<TAG>", "[TAG]", "{TAG}", "TAG_"],
+                                label="Formato dei tag di sostituzione",
+                                value="<TAG>"
+                            )
+                            redact_char = gr.Radio(
+                                ["*", "X", "#", "_"],
+                                label="Carattere di oscuramento (per redact)",
+                                value="*"
+                            )
+                        with gr.Column():
+                            preserve_length = gr.Checkbox(
+                                label="Preserva lunghezza originale nelle sostituzioni",
+                                value=False
+                            )
+                            gr.Markdown("### Anteprima formati di tag")
+                            anteprima_html = gr.HTML(value="<div style='padding: 10px; background-color: #f0f0f0; border-radius: 5px;'><p><b>Replace:</b> &lt;PERSON&gt;, [PERSON], {PERSON}, PERSON_</p><p><b>Redact:</b> *****, XXXXX, #####, _____</p><p><b>Pseudonymize:</b> Persona1, Luogo1, Data1...</p></div>")
+                    with gr.Accordion("Metodi specifici per tipo di entità", open=False):
+                        gr.Markdown("Seleziona un metodo specifico per ogni tipo di entità, o lascia 'Globale' per usare il metodo globale")
+                        with gr.Row():
+                            with gr.Column():
+                                person_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per PERSON",
+                                    value=None
+                                )
+                                location_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per LOCATION",
+                                    value=None
+                                )
+                                organization_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per ORGANIZATION",
+                                    value=None
+                                )
+                                date_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per DATE_TIME",
+                                    value=None
+                                )
+                                phone_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per PHONE_NUMBER",
+                                    value=None
+                                )
+                            with gr.Column():
+                                email_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per EMAIL",
+                                    value=None
+                                )
+                                iban_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per IBAN_CODE",
+                                    value=None
+                                )
+                                cf_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per CODICE_FISCALE",
+                                    value=None
+                                )
+                                piva_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per PARTITA_IVA",
+                                    value=None
+                                )
+                                targa_method = gr.Dropdown(
+                                    [None, "replace", "redact", "pseudonymize"],
+                                    label="Metodo per TARGA",
+                                    value=None
+                                )
+                with gr.TabItem("Soglie di Confidenza"):
+                    gr.Markdown("### Imposta soglie di confidenza specifiche per tipo di entità")
+                    gr.Markdown("Lascia vuoto per usare la soglia globale")
+                    with gr.Row():
+                        with gr.Column():
+                            person_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per PERSON",
+                                value=None
+                            )
+                            location_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per LOCATION",
+                                value=None
+                            )
+                            organization_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per ORGANIZATION",
+                                value=None
+                            )
+                            date_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per DATE_TIME",
+                                value=None
+                            )
+                            phone_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per PHONE_NUMBER",
+                                value=None
+                            )
+                        with gr.Column():
+                            email_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per EMAIL",
+                                value=None
+                            )
+                            iban_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per IBAN_CODE",
+                                value=None
+                            )
+                            cf_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per CODICE_FISCALE",
+                                value=None
+                            )
+                            piva_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per PARTITA_IVA",
+                                value=None
+                            )
+                            targa_threshold = gr.Slider(
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.05,
+                                label="Soglia per TARGA",
+                                value=None
+                            )
+                with gr.TabItem("Formati Pseudonimi"):
+                    gr.Markdown("### Personalizza i formati dei pseudonimi")
+                    gr.Markdown("Usa {num} per inserire il numero progressivo, {type} per il tipo di entità, {orig} per l'iniziale dell'originale")
+                    with gr.Row():
+                        with gr.Column():
+                            person_format = gr.Textbox(
+                                label="Formato per PERSON",
+                                value="Persona{num}",
+                                placeholder="es. Persona{num}, P{num}, {orig}..."
+                            )
+                            location_format = gr.Textbox(
+                                label="Formato per LOCATION",
+                                value="Luogo{num}",
+                                placeholder="es. Luogo{num}, L{num}..."
+                            )
+                            organization_format = gr.Textbox(
+                                label="Formato per ORGANIZATION",
+                                value="Organizzazione{num}",
+                                placeholder="es. Org{num}, Azienda{num}..."
+                            )
+                            date_format = gr.Textbox(
+                                label="Formato per DATE_TIME",
+                                value="Data{num}",
+                                placeholder="es. GG/MM/AAAA, Data{num}..."
+                            )
+                            phone_format = gr.Textbox(
+                                label="Formato per PHONE_NUMBER",
+                                value="+39-XXX-XXX-{num}",
+                                placeholder="es. +39-XXX-XXX-{num}..."
+                            )
+                        with gr.Column():
+                            email_format = gr.Textbox(
+                                label="Formato per EMAIL",
+                                value="email{num}@esempio.com",
+                                placeholder="es. user{num}@domain.com..."
+                            )
+                            iban_format = gr.Textbox(
+                                label="Formato per IBAN_CODE",
+                                value="IT00X0000000000000{num}",
+                                placeholder="es. IT00X0000..."
+                            )
+                            cf_format = gr.Textbox(
+                                label="Formato per CODICE_FISCALE",
+                                value="ABCDEF00G00H000{num}",
+                                placeholder="es. ABCDEF00G00H000{num}..."
+                            )
+                            piva_format = gr.Textbox(
+                                label="Formato per PARTITA_IVA",
+                                value="IT0000000000{num}",
+                                placeholder="es. IT0000000000{num}..."
+                            )
+                            targa_format = gr.Textbox(
+                                label="Formato per TARGA",
+                                value="XX000{num}",
+                                placeholder="es. XX000{num}..."
+                            )
+            process_btn = gr.Button("Analizza e Anonimizza", variant="primary")
+        with gr.Column(scale=1):
+            gr.Markdown("### Seleziona i tipi di entità da anonimizzare")
+            with gr.Group():
+                person_enabled = gr.Checkbox(label="👤 Persone (PERSON)", value=True)
+                location_enabled = gr.Checkbox(label="📍 Luoghi (LOCATION)", value=True)
+                organization_enabled = gr.Checkbox(label="🏢 Organizzazioni (ORGANIZATION)", value=True)
+                date_time_enabled = gr.Checkbox(label="📅 Date (DATE_TIME)", value=True)
+                phone_number_enabled = gr.Checkbox(label="📞 Numeri di telefono (PHONE_NUMBER)", value=True)
+                email_enabled = gr.Checkbox(label="📧 Email (EMAIL)", value=True)
+                iban_enabled = gr.Checkbox(label="💳 IBAN (IBAN_CODE)", value=True)
+                codice_fiscale_enabled = gr.Checkbox(label="🪪 Codici Fiscali (CODICE_FISCALE)", value=True)
+                partita_iva_enabled = gr.Checkbox(label="🏷️ Partite IVA (PARTITA_IVA)", value=True)
+                targa_enabled = gr.Checkbox(label="🚗 Targhe (TARGA)", value=True)
+            with gr.Row():
+                select_all_btn = gr.Button("Seleziona tutti")
+                clear_all_btn = gr.Button("Deseleziona tutti")
+            with gr.Accordion("Guida rapida", open=False):
+                gr.Markdown("""
+                **Tipi di anonimizzazione:**
+                - **Replace**: sostituisce l'entità con un tag (es. <PERSON>)
+                - **Redact**: oscura l'entità con caratteri (es. *****)
+                - **Pseudonymize**: sostituisce con valori fittizi (es. Persona1)
+                **Formato tag:**
+                - `<TAG>`: usa tag HTML (es. <PERSON>)
+                - `[TAG]`: usa parentesi quadre (es. [PERSON])
+                - `{TAG}`: usa parentesi graffe (es. {PERSON})
+                - `TAG_`: usa underscore (es. PERSON_)
+                **Preserva lunghezza:**
+                - Se attivo, mantiene la lunghezza originale dell'entità
+                - Utile per mantenere il formato del documento
+                """)
+    with gr.Tabs():
+        with gr.TabItem("Risultati"):
+            html_output = gr.HTML(label="Testo con entità evidenziate")
+            anon_output = gr.Textbox(label="Testo anonimizzato", lines=5)
+            stats_output = gr.Markdown(label="Statistiche di rilevamento")
+    # Funzione per aggiornare l'anteprima dei formati di tag
+    def update_preview(tag_format, redact_char, preserve_length):
+        replace_examples = {
+            "<TAG>": "&lt;PERSON&gt;",
+            "[TAG]": "[PERSON]",
+            "{TAG}": "{PERSON}",
+            "TAG_": "PERSON_"
+        }
+        redact_example = redact_char * 5
+        if preserve_length:
+            redact_note = " (mantenendo lunghezza originale)"
+        else:
+            redact_note = " (lunghezza fissa)"
+        return f"""
+        <div style='padding: 10px; background-color: #f0f0f0; border-radius: 5px;'>
+            <p><b>Replace:</b> {replace_examples[tag_format]}</p>
+            <p><b>Redact:</b> {redact_example}{redact_note}</p>
+            <p><b>Pseudonymize:</b> Persona1, Luogo1, Data1...</p>
+        </div>
+        """
+    # Aggiorna l'anteprima quando cambiano i parametri
+    tag_format.change(
+        update_preview,
+        inputs=[tag_format, redact_char, preserve_length],
+        outputs=anteprima_html
+    )
+    redact_char.change(
+        update_preview,
+        inputs=[tag_format, redact_char, preserve_length],
+        outputs=anteprima_html
+    )
+    preserve_length.change(
+        update_preview,
+        inputs=[tag_format, redact_char, preserve_length],
+        outputs=anteprima_html
+    )
+    # Logica per i pulsanti di selezione
+    def select_all():
+        return [True] * 10
+    def clear_all():
+        return [False] * 10
+    select_all_btn.click(
+        select_all,
+        inputs=None,
+        outputs=[
+            person_enabled, location_enabled, organization_enabled, date_time_enabled,
+            phone_number_enabled, email_enabled, iban_enabled, codice_fiscale_enabled,
+            partita_iva_enabled, targa_enabled
+        ]
+    )
+    clear_all_btn.click(
+        clear_all,
+        inputs=None,
+        outputs=[
+            person_enabled, location_enabled, organization_enabled, date_time_enabled,
+            phone_number_enabled, email_enabled, iban_enabled, codice_fiscale_enabled,
+            partita_iva_enabled, targa_enabled
+        ]
+    )
+    # Callback per il pulsante di processo
+    process_btn.click(
+        process_text_with_entity_control,
+        inputs=[
+            text_input, anon_type, use_stanford, use_regex, confidence,
+            person_enabled, location_enabled, organization_enabled, date_time_enabled,
+            phone_number_enabled, email_enabled, iban_enabled, codice_fiscale_enabled,
+            partita_iva_enabled, targa_enabled,
+            # Parametri di anonimizzazione avanzati
+            tag_format, redact_char, preserve_length,
+            # Metodi specifici per tipo
+            person_method, location_method, organization_method, date_method,
+            phone_method, email_method, iban_method, cf_method, piva_method, targa_method,
+            # Soglie specifiche per tipo
+            person_threshold, location_threshold, organization_threshold, date_threshold,
+            phone_threshold, email_threshold, iban_threshold, cf_threshold, piva_threshold, targa_threshold,
+            # Formati dei pseudonimi
+            person_format, location_format, organization_format, date_format, phone_format,
+            email_format, iban_format, cf_format, piva_format, targa_format
+        ],
+        outputs=[html_output, anon_output, stats_output]
+    )
+# Avvia l'interfaccia a blocchi (commenta la linea launch della cella 11 se la usi)
+if __name__ == "__main__":
+    demo_blocks.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )