Spaces:

domdp
/

Anonimizzazione

Running

App Files Files Community

Anonimizzazione / app.py

domdp

Rename stan+regex.py to app.py

c83de77 verified 2 months ago

raw

history blame contribute delete

64.7 kB


	# =========================================================
	# CELLA 1: IMPORT E SETUP INIZIALE
	# =========================================================

	import os
	import re
	import gradio as gr
	import pandas as pd
	import json
	from typing import List, Dict, Tuple, Any
	import spacy
	import torch
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

	# Import Presidio
	from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, PatternRecognizer
	from presidio_analyzer.pattern_recognizer import Pattern
	from presidio_analyzer.nlp_engine import NlpEngine, NlpEngineProvider
	from presidio_analyzer.context_aware_enhancers import LemmaContextAwareEnhancer
	from presidio_anonymizer import AnonymizerEngine
	from presidio_anonymizer.entities import OperatorConfig

	# Configurazione base
	print("✅ Import completati!")

	# =========================================================
	# CELLA 2: CONFIGURAZIONE RICONOSCITORI PERSONALIZZATI (CORRETTA)
	# =========================================================

	def create_italian_recognizers():
	"""
	Crea riconoscitori personalizzati per il contesto italiano
	"""
	recognizers = []

	# CODICE FISCALE
	cf_patterns = [Pattern(name="codice fiscale",
	regex=r"\b[A-Z]{6}[0-9]{2}[A-Z][0-9]{2}[A-Z][0-9]{3}[A-Z]\b",
	score=0.9)]
	cf_recognizer = PatternRecognizer(
	supported_entity="CODICE_FISCALE",
	patterns=cf_patterns,
	context=["codice", "fiscale", "cf", "c.f.", "cod.fisc.", "codice fiscale"],
	supported_language="en" # Aggiungiamo il supporto per l'inglese
	)
	recognizers.append(cf_recognizer)

	# PARTITA IVA
	piva_patterns = [Pattern(name="partita iva",
	regex=r"\b(IT)?[0-9]{11}\b",
	score=0.85)]
	piva_recognizer = PatternRecognizer(
	supported_entity="PARTITA_IVA",
	patterns=piva_patterns,
	context=["partita", "iva", "p.iva", "p. iva", "piva", "partita iva"],
	supported_language="en"
	)
	recognizers.append(piva_recognizer)

	# IBAN ITALIANO
	iban_patterns = [Pattern(name="iban",
	regex=r"\b[A-Z]{2}[0-9]{2}[A-Z0-9]{4}[0-9]{7}([A-Z0-9]?){0,16}\b",
	score=0.9)]
	iban_recognizer = PatternRecognizer(
	supported_entity="IBAN_CODE",
	patterns=iban_patterns,
	context=["iban", "bonifico", "bancario", "conto", "pagamento", "IBAN"],
	supported_language="en"
	)
	recognizers.append(iban_recognizer)

	# TARGA ITALIANA
	targa_patterns = [Pattern(name="targa",
	regex=r"\b[A-Z]{2}[0-9]{3}[A-Z]{2}\b",
	score=0.85)]
	targa_recognizer = PatternRecognizer(
	supported_entity="TARGA",
	patterns=targa_patterns,
	context=["targa", "auto", "veicolo", "automobile", "macchina"],
	supported_language="en"
	)
	recognizers.append(targa_recognizer)

	# TELEFONO ITALIANO
	telefono_patterns = [
	Pattern(name="telefono (con prefisso)", regex=r"\b\+39\s?[0-9]{10}\b", score=0.9),
	Pattern(name="telefono (cellulare)", regex=r"\b[3][0-9]{9}\b", score=0.8),
	Pattern(name="telefono (fisso)", regex=r"\b0[0-9]{1,3}[-\s]?[0-9]{7}\b", score=0.7),
	Pattern(name="telefono (generico)", regex=r"\b[0-9]{10}\b", score=0.6)
	]
	telefono_recognizer = PatternRecognizer(
	supported_entity="PHONE_NUMBER",
	patterns=telefono_patterns,
	context=["telefono", "cellulare", "tel", "chiamare", "contattare", "mobile"],
	supported_language="en"
	)
	recognizers.append(telefono_recognizer)

	# DATA ITALIANA
	data_patterns = [
	Pattern(name="data (dd/mm/yyyy)", regex=r"\b[0-3][0-9]/[0-1][0-9]/[1-2][0-9]{3}\b", score=0.9),
	Pattern(name="data (dd-mm-yyyy)", regex=r"\b[0-3][0-9]-[0-1][0-9]-[1-2][0-9]{3}\b", score=0.9),
	Pattern(name="data (d/m/yyyy)", regex=r"\b[1-9]/[1-9]/[1-2][0-9]{3}\b", score=0.8),
	Pattern(name="data (dd/mm/yy)", regex=r"\b[0-3][0-9]/[0-1][0-9]/[0-9]{2}\b", score=0.8)
	]
	data_recognizer = PatternRecognizer(
	supported_entity="DATE_TIME",
	patterns=data_patterns,
	context=["nato", "nata", "data di nascita", "nasce", "data", "nascita"],
	supported_language="en"
	)
	recognizers.append(data_recognizer)

	print(f"✅ Creati {len(recognizers)} riconoscitori personalizzati")
	return recognizers

	# Crea i riconoscitori
	italian_recognizers = create_italian_recognizers()

	# =========================================================
	# CELLA 3: STANFORD COME RECOGNIZER SEPARATO
	# =========================================================

	from presidio_analyzer import EntityRecognizer, RecognizerResult
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
	import torch

	class StanfordRecognizer(EntityRecognizer):
	def __init__(self):
	self.supported_entities = ["PERSON", "ORGANIZATION", "LOCATION", "DATE_TIME", "AGE", "PHONE_NUMBER", "EMAIL"]
	self.supported_language = "en"

	# Carica il modello Stanford
	try:
	self.tokenizer = AutoTokenizer.from_pretrained("StanfordAIMI/stanford-deidentifier-base")
	self.model = AutoModelForTokenClassification.from_pretrained("StanfordAIMI/stanford-deidentifier-base")
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	self.model.to(self.device)
	self.model.eval()

	# Crea una pipeline per gestire più facilmente il modello
	self.pipeline = pipeline(
	"token-classification",
	model=self.model,
	tokenizer=self.tokenizer,
	device=0 if torch.cuda.is_available() else -1,
	aggregation_strategy="max"
	)
	print("✅ Modello Stanford caricato con successo!")
	except Exception as e:
	print(f"⚠️ Errore nel caricamento del modello Stanford: {e}")
	self.pipeline = None

	super().__init__(supported_entities=self.supported_entities, supported_language=self.supported_language)

	def analyze(self, text, entities, nlp_artifacts):
	"""
	Analizza il testo e restituisce RecognizerResult
	"""
	results = []

	if self.pipeline is None:
	return results

	try:
	# Usa la pipeline per processare il testo
	outputs = self.pipeline(text)

	for output in outputs:
	# Mappa le etichette del modello Stanford a quelle di Presidio
	stanford_to_presidio = {
	"PATIENT": "PERSON",
	"STAFF": "PERSON",
	"HOSP": "ORGANIZATION",
	"HOSPITAL": "ORGANIZATION",
	"AGE": "AGE",
	"DATE": "DATE_TIME",
	"PHONE": "PHONE_NUMBER",
	"PER": "PERSON",
	"LOC": "LOCATION",
	"ORG": "ORGANIZATION",
	"PERSON": "PERSON",
	"LOCATION": "LOCATION",
	"ORGANIZATION": "ORGANIZATION"
	}

	entity_type = output.get("entity_group", "")
	# Rimuovi prefissi B-, I- se presenti
	if entity_type.startswith(("B-", "I-")):
	entity_type = entity_type[2:]

	# Mappa all'entità Presidio
	presidio_entity = stanford_to_presidio.get(entity_type, entity_type)

	# Crea RecognizerResult se l'entità è supportata
	if presidio_entity in self.supported_entities:
	result = RecognizerResult(
	entity_type=presidio_entity,
	start=output["start"],
	end=output["end"],
	score=output["score"]
	)
	results.append(result)

	except Exception as e:
	print(f"Errore nell'analisi Stanford: {e}")

	return results

	def load(self):
	pass # Il caricamento è fatto nel costruttore

	# Crea un'istanza del recognizer Stanford
	stanford_recognizer = StanfordRecognizer()

	# Se l'analyzer è già stato creato, aggiungi il recognizer Stanford
	if 'analyzer' in globals():
	try:
	analyzer.registry.add_recognizer(stanford_recognizer)
	print("✅ Stanford recognizer aggiunto a Presidio")
	except Exception as e:
	print(f"⚠️ Errore nell'aggiunta di Stanford recognizer: {e}")

	# =========================================================
	# CELLA 4: SISTEMA DI REGEX FALLBACK
	# =========================================================

	class RegexFallbackEngine:
	def __init__(self):
	self.patterns = {
	"PERSON": [
	r"\b[A-Z][a-z]+\s+[A-Z][a-z]+\b", # Nome Cognome
	r"\b(?:Sig\.\|Dott\.\|Dr\.\|Ing\.)\s+[A-Z][a-z]+\s+[A-Z][a-z]+\b", # Titolo + Nome Cognome
	],
	"CODICE_FISCALE": [
	r"\b[A-Z]{6}\d{2}[A-Z]\d{2}[A-Z]\d{3}[A-Z]\b",
	],
	"PARTITA_IVA": [
	r"\b(?:IT)?\d{11}\b",
	],
	"DATE_TIME": [
	r"\b\d{1,2}[/\-\.]\d{1,2}[/\-\.]\d{2,4}\b",
	r"\b\d{1,2}\s+(?:gennaio\|febbraio\|marzo\|aprile\|maggio\|giugno\|luglio\|agosto\|settembre\|ottobre\|novembre\|dicembre)\s+\d{4}\b",
	],
	"PHONE_NUMBER": [
	r"\b\+39\s?\d{10}\b",
	r"\b\d{10}\b",
	r"\b0\d{1,3}[-\.\s]?\d{7}\b",
	],
	"EMAIL": [
	r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z\|a-z]{2,}\b",
	],
	"IBAN_CODE": [
	r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}[A-Z0-9]{12}\b",
	],
	"TARGA": [
	r"\b[A-Z]{2}\d{3}[A-Z]{2}\b",
	]
	}

	def analyze(self, text):
	"""
	Analizza il testo utilizzando regex
	"""
	results = []

	for entity_type, patterns in self.patterns.items():
	for pattern in patterns:
	for match in re.finditer(pattern, text):
	results.append({
	"entity_type": entity_type,
	"start": match.start(),
	"end": match.end(),
	"text": match.group(),
	"score": 0.9 # Assegna un punteggio fisso per regex
	})

	return results

	# Inizializza il sistema di regex fallback
	regex_engine = RegexFallbackEngine()

	# =========================================================
	# CELLA 5: CONFIGURAZIONE PRESIDIO SEMPLIFICATA
	# =========================================================

	# Per ora, creiamo una configurazione base senza Stanford, che possiamo aggiungere come recognizer separato
	from presidio_analyzer import AnalyzerEngine
	from presidio_analyzer.predefined_recognizers import (
	PhoneRecognizer, EmailRecognizer, CreditCardRecognizer, IbanRecognizer
	)

	def setup_presidio_simple():
	"""
	Configura Presidio con setup semplificato
	"""
	try:
	# Crea l'analyzer engine con configurazione di base
	analyzer = AnalyzerEngine()

	# Aggiungi riconoscitori predefiniti
	try:
	analyzer.registry.add_recognizer(PhoneRecognizer())
	except:
	pass

	try:
	analyzer.registry.add_recognizer(EmailRecognizer())
	except:
	pass

	try:
	analyzer.registry.add_recognizer(CreditCardRecognizer())
	except:
	pass

	try:
	analyzer.registry.add_recognizer(IbanRecognizer())
	except:
	pass

	# Aggiungi riconoscitori personalizzati se definiti
	if 'italian_recognizers' in globals():
	for recognizer in italian_recognizers:
	try:
	analyzer.registry.add_recognizer(recognizer)
	except Exception as e:
	print(f"Errore aggiungendo recognizer: {e}")

	# Crea l'anonymizer engine
	anonymizer = AnonymizerEngine()

	print("✅ Presidio configurato con successo!")
	return analyzer, anonymizer

	except Exception as e:
	print(f"❌ Errore nella configurazione di Presidio: {e}")
	# Fallback a configurazione minima
	analyzer = AnalyzerEngine()
	anonymizer = AnonymizerEngine()
	print("⚠️ Usando configurazione di default")
	return analyzer, anonymizer

	# Inizializza Presidio
	analyzer, anonymizer = setup_presidio_simple()

	# =========================================================
	# CELLA 6: SISTEMA DI ANONIMIZZAZIONE IBRIDO (CORRETTO)
	# =========================================================

	class HybridAnonymizer:
	def __init__(self, presidio_analyzer, regex_engine, anonymizer):
	self.presidio_analyzer = presidio_analyzer
	self.regex_engine = regex_engine
	self.anonymizer = anonymizer

	def analyze_text(self, text, enable_stanford=True, enable_regex=True):
	"""
	Analizza il testo usando tutti i metodi disponibili
	"""
	all_entities = []

	# Presidio ora include Stanford tramite il recognizer aggiunto
	presidio_results = self.presidio_analyzer.analyze(
	text=text,
	language="en",
	entities=None, # Usa tutti i recognizer disponibili
	allow_list=None
	)

	# Converti risultati Presidio
	for result in presidio_results:
	all_entities.append({
	"entity_type": result.entity_type,
	"start": result.start,
	"end": result.end,
	"text": text[result.start:result.end],
	"score": result.score,
	"source": "presidio"
	})

	# Aggiungi regex se abilitato
	if enable_regex:
	try:
	regex_entities = self.regex_engine.analyze(text)
	for entity in regex_entities:
	all_entities.append({
	"entity_type": entity["entity_type"],
	"start": entity["start"],
	"end": entity["end"],
	"text": entity["text"],
	"score": entity["score"],
	"source": "regex"
	})
	except Exception as e:
	print(f"Errore in Regex: {e}")

	return self._merge_overlapping_entities(all_entities)

	def _merge_overlapping_entities(self, entities):
	if not entities:
	return []

	entities.sort(key=lambda x: (x["start"], -x["score"]))
	merged = []

	for entity in entities:
	if not merged or merged[-1]["end"] <= entity["start"]:
	merged.append(entity)
	elif entity["score"] > merged[-1]["score"]:
	merged[-1] = entity

	return merged

	def anonymize_text(self, text, entities, anonymization_type="replace"):
	"""
	Anonimizza il testo basandosi sulle entità trovate con diversi metodi

	Tipi di anonimizzazione:
	- replace: sostituisce con tag (es. <PERSON>)
	- redact: oscura con asterischi (es. ******)
	- pseudonymize: sostituisce con valori fittizi (es. Persona1)
	"""
	if not entities:
	return text

	if anonymization_type == "replace":
	# Usa Presidio per sostituire le entità con tag
	presidio_results = []
	for entity in entities:
	from presidio_analyzer import RecognizerResult
	presidio_results.append(
	RecognizerResult(
	entity_type=entity["entity_type"],
	start=entity["start"],
	end=entity["end"],
	score=entity["score"]
	)
	)

	# Configura l'anonimizzazione con tag
	operators = {
	"PERSON": OperatorConfig("replace", {"new_value": "<PERSON>"}),
	"LOCATION": OperatorConfig("replace", {"new_value": "<LOCATION>"}),
	"ORGANIZATION": OperatorConfig("replace", {"new_value": "<ORGANIZATION>"}),
	"DATE_TIME": OperatorConfig("replace", {"new_value": "<DATE>"}),
	"PHONE_NUMBER": OperatorConfig("replace", {"new_value": "<PHONE>"}),
	"EMAIL_ADDRESS": OperatorConfig("replace", {"new_value": "<EMAIL>"}),
	"IBAN_CODE": OperatorConfig("replace", {"new_value": "<IBAN>"}),
	"CODICE_FISCALE": OperatorConfig("replace", {"new_value": "<CF>"}),
	"PARTITA_IVA": OperatorConfig("replace", {"new_value": "<PIVA>"}),
	"TARGA": OperatorConfig("replace", {"new_value": "<TARGA>"}),
	"AGE": OperatorConfig("replace", {"new_value": "<AGE>"})
	}

	anonymized_result = self.anonymizer.anonymize(
	text=text,
	analyzer_results=presidio_results,
	operators=operators
	)

	return anonymized_result.text

	elif anonymization_type == "redact":
	# Sostituisce ogni entità con asterischi
	anonymized = text
	# Ordina le entità per posizione (dall'ultima alla prima) per non alterare gli indici
	sorted_entities = sorted(entities, key=lambda x: x["start"], reverse=True)

	for entity in sorted_entities:
	# Genera asterischi della stessa lunghezza dell'entità
	asterisks = "" (entity["end"] - entity["start"])
	# Sostituisci il testo
	anonymized = anonymized[:entity["start"]] + asterisks + anonymized[entity["end"]:]

	return anonymized

	elif anonymization_type == "pseudonymize":
	# Sostituisce ogni entità con un valore fittizio
	anonymized = text

	# Dizionario per tenere traccia dei valori fittizi generati
	pseudonyms = {}
	type_counts = {}

	# Ordina le entità per posizione (dall'ultima alla prima) per non alterare gli indici
	sorted_entities = sorted(entities, key=lambda x: x["start"], reverse=True)

	for entity in sorted_entities:
	entity_type = entity["entity_type"]
	entity_text = entity["text"]

	# Se questa entità è già stata sostituita in precedenza, usa lo stesso valore
	if entity_text in pseudonyms:
	new_value = pseudonyms[entity_text]
	else:
	# Inizializza il contatore se non esiste
	if entity_type not in type_counts:
	type_counts[entity_type] = 0

	# Incrementa il contatore
	type_counts[entity_type] += 1

	# Genera un valore fittizio basato sul tipo di entità
	if entity_type == "PERSON":
	new_value = f"Persona{type_counts[entity_type]}"
	elif entity_type == "LOCATION":
	new_value = f"Luogo{type_counts[entity_type]}"
	elif entity_type == "ORGANIZATION":
	new_value = f"Organizzazione{type_counts[entity_type]}"
	elif entity_type == "DATE_TIME":
	new_value = f"Data{type_counts[entity_type]}"
	elif entity_type == "PHONE_NUMBER":
	new_value = f"+39-XXX-XXX-{1000+type_counts[entity_type]}"
	elif entity_type == "EMAIL_ADDRESS" or entity_type == "EMAIL":
	new_value = f"email{type_counts[entity_type]}@esempio.com"
	elif entity_type == "IBAN_CODE":
	new_value = f"IT00X0000000000000{type_counts[entity_type]}"
	elif entity_type == "CODICE_FISCALE" or entity_type == "CF":
	new_value = f"ABCDEF00G00H000{type_counts[entity_type]}"
	elif entity_type == "PARTITA_IVA" or entity_type == "PIVA":
	new_value = f"IT0000000000{type_counts[entity_type]}"
	elif entity_type == "TARGA":
	new_value = f"XX000{type_counts[entity_type]}"
	elif entity_type == "AGE":
	new_value = f"XX"
	else:
	new_value = f"{entity_type}{type_counts[entity_type]}"

	# Memorizza il valore generato per riusi futuri
	pseudonyms[entity_text] = new_value

	# Sostituisci il testo
	anonymized = anonymized[:entity["start"]] + new_value + anonymized[entity["end"]:]

	return anonymized

	else:
	# Tipo di anonimizzazione non supportato, usa replace come fallback
	print(f"Tipo di anonimizzazione non supportato: {anonymization_type}, usando 'replace'")
	return self.anonymize_text(text, entities, "replace")

	# Inizializza il sistema ibrido
	hybrid_anonymizer = HybridAnonymizer(analyzer, regex_engine, anonymizer)

	# =========================================================
	# CELLA 7: UTILITÀ DI VISUALIZZAZIONE
	# =========================================================

	# Colori per i diversi tipi di entità
	ENTITY_COLORS = {
	"PERSON": "#ff7f50", # Corallo
	"LOCATION": "#6495ed", # Azzurro
	"ORGANIZATION": "#9acd32", # Verde
	"DATE_TIME": "#ffa500", # Arancione
	"PHONE_NUMBER": "#da70d6", # Orchidea
	"EMAIL_ADDRESS": "#dda0dd", # Plum
	"IBAN_CODE": "#1e90ff", # Blu
	"CODICE_FISCALE": "#ff69b4", # Rosa
	"PARTITA_IVA": "#ff69b4", # Rosa
	"TARGA": "#bdb76b" # Kaki
	}

	def highlight_entities_html(text, entities):
	"""
	Evidenzia le entità trovate nel testo con colori
	"""
	if not entities:
	return text

	# Prepara HTML con span colorati
	chars = list(text)
	spans = []

	for entity in entities:
	entity_type = entity["entity_type"]
	source = entity.get("source", "unknown")
	color = ENTITY_COLORS.get(entity_type, "#cccccc")
	score = int(entity["score"] * 100)

	# Tooltip con informazioni dettagliate
	tooltip = f"{entity_type} ({score}%) - detected by {source}"

	spans.append({
	"index": entity["start"],
	"content": f'<span style="background-color: {color}; padding: 2px; border-radius: 3px;" title="{tooltip}">',
	"is_opening": True
	})

	spans.append({
	"index": entity["end"],
	"content": '</span>',
	"is_opening": False
	})

	# Ordina i span (chiusura prima dell'apertura se stesso indice)
	spans.sort(key=lambda x: (x["index"], not x["is_opening"]))

	# Inserisce i tag span nel testo
	offset = 0
	for span in spans:
	adjusted_index = span["index"] + offset
	chars.insert(adjusted_index, span["content"])
	offset += 1

	return "".join(chars)

	def generate_statistics(entities):
	"""
	Genera statistiche sulle entità rilevate
	"""
	stats = {
	"total_entities": len(entities),
	"by_type": {},
	"by_source": {},
	"avg_confidence": 0,
	"all_detected_types": set()
	}

	for entity in entities:
	entity_type = entity["entity_type"]
	source = entity.get("source", "unknown")
	score = entity["score"]

	# Count by type
	stats["by_type"][entity_type] = stats["by_type"].get(entity_type, 0) + 1

	# Count by source
	stats["by_source"][source] = stats["by_source"].get(source, 0) + 1

	# Track all detected types
	stats["all_detected_types"].add(entity_type)

	# Update average confidence
	stats["avg_confidence"] += score

	if entities:
	stats["avg_confidence"] /= len(entities)

	stats["all_detected_types"] = list(stats["all_detected_types"])

	return stats

	# =========================================================
	# CELLA 8: INTERFACCIA GRADIO (MODIFICHE)
	# =========================================================

	def process_text_gradio(text, anonymization_type, use_stanford, use_regex, confidence_threshold):
	"""
	Processa il testo con l'interfaccia Gradio
	"""
	# Verifica che il testo sia una stringa
	if not isinstance(text, str):
	return "Errore: input deve essere una stringa", "", "Errore: tipo di input non valido"

	if not text.strip():
	return "", "", "Nessun testo fornito"

	try:
	# Analizza il testo
	entities = hybrid_anonymizer.analyze_text(
	text,
	enable_stanford=use_stanford,
	enable_regex=use_regex
	)

	# Filtra per confidenza
	filtered_entities = [e for e in entities if e["score"] >= confidence_threshold]

	# Genera HTML evidenziato
	highlighted_html = highlight_entities_html(text, filtered_entities)

	# Anonimizza il testo
	anonymized_text = hybrid_anonymizer.anonymize_text(text, filtered_entities, anonymization_type)

	# Genera statistiche
	stats = generate_statistics(filtered_entities)

	# Formatta le statistiche per Gradio
	stats_str = f"""
	Statistiche rilevamento:
	- Entità totali trovate: {stats['total_entities']}
	- Confidenza media: {stats['avg_confidence']:.2%}
	- Tipi di entità rilevati: {', '.join(sorted(stats['all_detected_types']))}

	Per tipo:
	{chr(10).join([f"- {k}: {v}" for k, v in stats['by_type'].items()])}

	Per sorgente:
	{chr(10).join([f"- {k}: {v}" for k, v in stats['by_source'].items()])}
	"""

	return highlighted_html, anonymized_text, stats_str

	except Exception as e:
	import traceback
	error_msg = f"Errore: {str(e)}\n{traceback.format_exc()}"
	return error_msg, "", error_msg

	# =========================================================
	# CELLA 9: INTERFACCIA DI CONTROLLO ENTITÀ (VERSIONE COMPLETA)
	# =========================================================

	def process_text_with_entity_control(
	text,
	anonymization_type,
	use_stanford,
	use_regex,
	confidence_threshold,
	person_enabled,
	location_enabled,
	organization_enabled,
	date_time_enabled,
	phone_number_enabled,
	email_enabled,
	iban_enabled,
	codice_fiscale_enabled,
	partita_iva_enabled,
	targa_enabled,
	# Nuovi parametri di anonimizzazione
	tag_format="<TAG>",
	redact_char="*",
	preserve_length=False,
	# Anonimizzazione per tipo specifico
	person_anon_method=None,
	location_anon_method=None,
	organization_anon_method=None,
	date_time_anon_method=None,
	phone_anon_method=None,
	email_anon_method=None,
	iban_anon_method=None,
	cf_anon_method=None,
	piva_anon_method=None,
	targa_anon_method=None,
	# Soglie di confidenza specifiche per tipo
	person_threshold=None,
	location_threshold=None,
	organization_threshold=None,
	date_time_threshold=None,
	phone_threshold=None,
	email_threshold=None,
	iban_threshold=None,
	cf_threshold=None,
	piva_threshold=None,
	targa_threshold=None,
	# Formati dei pseudonimi
	person_pseudo_format="Persona{num}",
	location_pseudo_format="Luogo{num}",
	organization_pseudo_format="Organizzazione{num}",
	date_pseudo_format="Data{num}",
	phone_pseudo_format="+39-XXX-XXX-{num}",
	email_pseudo_format="email{num}@esempio.com",
	iban_pseudo_format="IT00X0000000000000{num}",
	cf_pseudo_format="ABCDEF00G00H000{num}",
	piva_pseudo_format="IT0000000000{num}",
	targa_pseudo_format="XX000{num}"
	):
	"""
	Processa il testo con controllo sulle entità da estrarre/anonimizzare
	e con parametri avanzati di anonimizzazione
	"""
	# Verifica che il testo sia una stringa
	if not isinstance(text, str):
	return "Errore: input deve essere una stringa", "", "Errore: tipo di input non valido"

	if not text.strip():
	return "", "", "Nessun testo fornito"

	try:
	# Crea una lista di entità abilitate e mappa dei metodi per tipo
	enabled_entities = []
	entity_anon_methods = {}
	entity_thresholds = {}
	entity_pseudo_formats = {}

	# Mappa degli entity types, abilitazione, metodi, soglie e formati
	entity_config = [
	("PERSON", person_enabled, person_anon_method, person_threshold, person_pseudo_format),
	("LOCATION", location_enabled, location_anon_method, location_threshold, location_pseudo_format),
	("ORGANIZATION", organization_enabled, organization_anon_method, organization_threshold, organization_pseudo_format),
	("DATE_TIME", date_time_enabled, date_time_anon_method, date_time_threshold, date_pseudo_format),
	("PHONE_NUMBER", phone_number_enabled, phone_anon_method, phone_threshold, phone_pseudo_format),
	("EMAIL", email_enabled, email_anon_method, email_threshold, email_pseudo_format),
	("EMAIL_ADDRESS", email_enabled, email_anon_method, email_threshold, email_pseudo_format),
	("IBAN_CODE", iban_enabled, iban_anon_method, iban_threshold, iban_pseudo_format),
	("CODICE_FISCALE", codice_fiscale_enabled, cf_anon_method, cf_threshold, cf_pseudo_format),
	("PARTITA_IVA", partita_iva_enabled, piva_anon_method, piva_threshold, piva_pseudo_format),
	("TARGA", targa_enabled, targa_anon_method, targa_threshold, targa_pseudo_format)
	]

	# Popola gli array basandosi sulla configurazione
	for entity_type, is_enabled, anon_method, threshold, pseudo_format in entity_config:
	if is_enabled:
	enabled_entities.append(entity_type)
	# Se è specificato un metodo specifico per questo tipo, usalo
	if anon_method:
	entity_anon_methods[entity_type] = anon_method
	# Se è specificata una soglia specifica per questo tipo, usala
	if threshold is not None:
	entity_thresholds[entity_type] = threshold
	# Salva il formato del pseudonimo per questo tipo
	entity_pseudo_formats[entity_type] = pseudo_format

	# Se nessuna entità è abilitata, mostra il testo originale
	if not enabled_entities:
	return text, text, "Nessuna entità selezionata per l'anonimizzazione"

	# Analizza il testo
	entities = hybrid_anonymizer.analyze_text(
	text,
	enable_stanford=use_stanford,
	enable_regex=use_regex
	)

	# Filtra per confidenza e per tipo di entità abilitato, usando soglie specifiche per tipo se disponibili
	filtered_entities = []
	for e in entities:
	if e["entity_type"] in enabled_entities:
	# Determina la soglia da usare
	entity_threshold = entity_thresholds.get(e["entity_type"], confidence_threshold)
	if e["score"] >= entity_threshold:
	filtered_entities.append(e)

	# Genera HTML evidenziato
	highlighted_html = highlight_entities_html(text, filtered_entities)

	# Anonimizza il testo con i parametri avanzati
	anonymized_text = advanced_anonymize_text(
	text,
	filtered_entities,
	anonymization_type,
	tag_format=tag_format,
	redact_char=redact_char,
	preserve_length=preserve_length,
	entity_anon_methods=entity_anon_methods,
	entity_pseudo_formats=entity_pseudo_formats
	)

	# Genera statistiche
	stats = generate_statistics(filtered_entities)

	# Formatta le statistiche per Gradio
	stats_str = f"""
	Statistiche rilevamento:
	- Entità totali trovate: {stats['total_entities']}
	- Confidenza media: {stats['avg_confidence']:.2%}
	- Tipi di entità rilevati: {', '.join(sorted(stats['all_detected_types']))}

	Per tipo:
	{chr(10).join([f"- {k}: {v}" for k, v in stats['by_type'].items()])}

	Per sorgente:
	{chr(10).join([f"- {k}: {v}" for k, v in stats['by_source'].items()])}

	Parametri di anonimizzazione:
	- Metodo globale: {anonymization_type}
	- Formato tag: {tag_format}
	- Preserva lunghezza: {"Sì" if preserve_length else "No"}
	"""
	# Aggiungi informazioni sui metodi specifici
	if entity_anon_methods:
	stats_str += "\nMetodi specifici per tipo:\n"
	stats_str += chr(10).join([f"- {k}: {v}" for k, v in entity_anon_methods.items()])

	# Aggiungi informazioni sulle soglie specifiche
	if entity_thresholds:
	stats_str += "\n\nSoglie di confidenza specifiche:\n"
	stats_str += chr(10).join([f"- {k}: {v}" for k, v in entity_thresholds.items()])

	return highlighted_html, anonymized_text, stats_str

	except Exception as e:
	import traceback
	error_msg = f"Errore: {str(e)}\n{traceback.format_exc()}"
	return error_msg, "", error_msg

	def advanced_anonymize_text(text, entities, global_anon_type, tag_format="<TAG>", redact_char="*",
	preserve_length=False, entity_anon_methods={}, entity_pseudo_formats={}):
	"""
	Versione avanzata dell'anonimizzazione che supporta più parametri
	"""
	if not entities:
	return text

	# Ordina le entità per posizione (dall'ultima alla prima) per non alterare gli indici
	sorted_entities = sorted(entities, key=lambda x: x["start"], reverse=True)
	anonymized = text

	# Dizionario per tenere traccia dei valori sostituiti
	pseudonyms = {}
	type_counts = {}

	for entity in sorted_entities:
	entity_type = entity["entity_type"]
	entity_text = entity["text"]
	entity_start = entity["start"]
	entity_end = entity["end"]

	# Determina il metodo di anonimizzazione per questa entità specifica
	anon_type = entity_anon_methods.get(entity_type, global_anon_type)

	if anon_type == "replace":
	# Formatta il tag in base al formato scelto
	if tag_format == "<TAG>":
	new_value = f"<{entity_type}>"
	elif tag_format == "[TAG]":
	new_value = f"[{entity_type}]"
	elif tag_format == "{TAG}":
	new_value = f"{{{entity_type}}}"
	elif tag_format == "TAG_":
	new_value = f"{entity_type}_"
	else:
	new_value = f"<{entity_type}>"

	elif anon_type == "redact":
	# Redact con il carattere scelto, mantenendo o meno la lunghezza originale
	if preserve_length:
	new_value = redact_char * (entity_end - entity_start)
	else:
	new_value = redact_char * 5 # Lunghezza fissa

	elif anon_type == "pseudonymize":
	# Pseudonimizzazione con nomi fittizi
	if entity_text in pseudonyms:
	new_value = pseudonyms[entity_text]
	else:
	# Inizializza il contatore se non esiste
	if entity_type not in type_counts:
	type_counts[entity_type] = 0

	# Incrementa il contatore
	type_counts[entity_type] += 1

	# Ottieni il formato del pseudonimo per questo tipo di entità
	pseudo_format = entity_pseudo_formats.get(entity_type, "")

	# Genera un valore fittizio basato sul tipo e formato
	if pseudo_format:
	try:
	# Prova a formattare usando il formato specificato
	new_value = pseudo_format.format(
	num=type_counts[entity_type],
	type=entity_type,
	orig=entity_text[:1] if entity_text else "X"
	)
	except Exception:
	# Fallback in caso di errore di formattazione
	new_value = f"{entity_type}{type_counts[entity_type]}"
	else:
	# Formati predefiniti per ogni tipo se non specificato
	if entity_type == "PERSON":
	new_value = f"Persona{type_counts[entity_type]}"
	elif entity_type == "LOCATION":
	new_value = f"Luogo{type_counts[entity_type]}"
	elif entity_type == "ORGANIZATION":
	new_value = f"Organizzazione{type_counts[entity_type]}"
	elif entity_type == "DATE_TIME":
	new_value = f"Data{type_counts[entity_type]}"
	elif entity_type == "PHONE_NUMBER":
	new_value = f"+39-XXX-XXX-{1000+type_counts[entity_type]}"
	elif entity_type == "EMAIL_ADDRESS" or entity_type == "EMAIL":
	new_value = f"email{type_counts[entity_type]}@esempio.com"
	elif entity_type == "IBAN_CODE":
	new_value = f"IT00X0000000000000{type_counts[entity_type]}"
	elif entity_type == "CODICE_FISCALE" or entity_type == "CF":
	new_value = f"ABCDEF00G00H000{type_counts[entity_type]}"
	elif entity_type == "PARTITA_IVA" or entity_type == "PIVA":
	new_value = f"IT0000000000{type_counts[entity_type]}"
	elif entity_type == "TARGA":
	new_value = f"XX000{type_counts[entity_type]}"
	else:
	new_value = f"{entity_type}{type_counts[entity_type]}"

	# Memorizza il valore per riusi futuri
	pseudonyms[entity_text] = new_value

	# Adatta la lunghezza se necessario
	if preserve_length and len(new_value) < (entity_end - entity_start):
	new_value = new_value.ljust(entity_end - entity_start)
	elif preserve_length and len(new_value) > (entity_end - entity_start):
	# Troncamento con ellipsis
	new_value = new_value[:entity_end - entity_start - 1] + "…"

	else:
	# Tipo sconosciuto, usa il metodo replace come fallback
	new_value = f"<{entity_type}>"

	# Sostituisci il testo
	anonymized = anonymized[:entity_start] + new_value + anonymized[entity_end:]

	return anonymized

	# Esempi per la nuova interfaccia
	entity_control_examples = [
	[
	"Il signor Mario Rossi, nato il 15/04/1980, CF: RSSMRC80D15H501V, residente in Via Roma 123, Milano, possiede la partita IVA IT12345678901.",
	"replace",
	True,
	False,
	0.5,
	True, # person_enabled
	True, # location_enabled
	True, # organization_enabled
	True, # date_time_enabled
	True, # phone_number_enabled
	True, # email_enabled
	True, # iban_enabled
	True, # codice_fiscale_enabled
	True, # partita_iva_enabled
	True, # targa_enabled
	],
	[
	"Per contattare il cliente Giovanni Bianchi utilizzare l'email [email protected] o il numero +39 333-123-4567.",
	"replace",
	False,
	True,
	0.6,
	True, # person_enabled
	False, # location_enabled
	False, # organization_enabled
	False, # date_time_enabled
	True, # phone_number_enabled
	True, # email_enabled
	False, # iban_enabled
	False, # codice_fiscale_enabled
	False, # partita_iva_enabled
	False, # targa_enabled
	],
	[
	"Il veicolo targato AB123CD appartiene a Maria Verdi, titolare del conto bancario IT12K1234567890123456789012.",
	"replace",
	True,
	True,
	0.7,
	True, # person_enabled
	False, # location_enabled
	False, # organization_enabled
	False, # date_time_enabled
	False, # phone_number_enabled
	False, # email_enabled
	True, # iban_enabled
	False, # codice_fiscale_enabled
	False, # partita_iva_enabled
	True, # targa_enabled
	]
	]

	def process_text_with_entity_control_wrapper(
	text,
	anonymization_type,
	use_stanford,
	use_regex,
	confidence_threshold,
	person_enabled,
	location_enabled,
	organization_enabled,
	date_time_enabled,
	phone_number_enabled,
	email_enabled,
	iban_enabled,
	codice_fiscale_enabled,
	partita_iva_enabled,
	targa_enabled
	):
	"""
	Funzione wrapper che passa i parametri predefiniti ai nuovi parametri della funzione originale
	"""
	return process_text_with_entity_control(
	text=text,
	anonymization_type=anonymization_type,
	use_stanford=use_stanford,
	use_regex=use_regex,
	confidence_threshold=confidence_threshold,
	person_enabled=person_enabled,
	location_enabled=location_enabled,
	organization_enabled=organization_enabled,
	date_time_enabled=date_time_enabled,
	phone_number_enabled=phone_number_enabled,
	email_enabled=email_enabled,
	iban_enabled=iban_enabled,
	codice_fiscale_enabled=codice_fiscale_enabled,
	partita_iva_enabled=partita_iva_enabled,
	targa_enabled=targa_enabled,
	# Valori predefiniti per i nuovi parametri
	tag_format="<TAG>",
	redact_char="*",
	preserve_length=False,
	# Metodi specifici per tipo (tutti None = usa metodo globale)
	person_anon_method=None,
	location_anon_method=None,
	organization_anon_method=None,
	date_time_anon_method=None,
	phone_anon_method=None,
	email_anon_method=None,
	iban_anon_method=None,
	cf_anon_method=None,
	piva_anon_method=None,
	targa_anon_method=None,
	# Soglie specifiche (tutte None = usa soglia globale)
	person_threshold=None,
	location_threshold=None,
	organization_threshold=None,
	date_time_threshold=None,
	phone_threshold=None,
	email_threshold=None,
	iban_threshold=None,
	cf_threshold=None,
	piva_threshold=None,
	targa_threshold=None,
	# Formati predefiniti per i pseudonimi
	person_pseudo_format="Persona{num}",
	location_pseudo_format="Luogo{num}",
	organization_pseudo_format="Organizzazione{num}",
	date_pseudo_format="Data{num}",
	phone_pseudo_format="+39-XXX-XXX-{num}",
	email_pseudo_format="email{num}@esempio.com",
	iban_pseudo_format="IT00X0000000000000{num}",
	cf_pseudo_format="ABCDEF00G00H000{num}",
	piva_pseudo_format="IT0000000000{num}",
	targa_pseudo_format="XX000{num}"
	)

	# Crea l'interfaccia Gradio con controllo entità
	demo_advanced = gr.Interface(
	fn=process_text_with_entity_control_wrapper, # Usa la funzione wrapper
	inputs=[
	gr.Textbox(
	label="Testo da analizzare",
	lines=5,
	placeholder="Inserisci il testo contenente dati sensibili...",
	value="Il signor Marco Rossi, nato il 15/04/1978, CF: RSSMRC78D15H501T, può essere contattato al numero +39 333-1234567 o all'email [email protected]."
	),
	gr.Radio(
	["replace", "redact", "pseudonymize"],
	label="Tipo di anonimizzazione",
	value="replace"
	),
	gr.Checkbox(
	label="Usa modello Stanford",
	value=True
	),
	gr.Checkbox(
	label="Usa Regex Fallback",
	value=True
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.5,
	step=0.05,
	label="Soglia di confidenza minima"
	),
	# Controlli per i tipi di entità
	gr.Checkbox(label="Persone (PERSON)", value=True),
	gr.Checkbox(label="Luoghi (LOCATION)", value=True),
	gr.Checkbox(label="Organizzazioni (ORGANIZATION)", value=True),
	gr.Checkbox(label="Date (DATE_TIME)", value=True),
	gr.Checkbox(label="Numeri di telefono (PHONE_NUMBER)", value=True),
	gr.Checkbox(label="Email (EMAIL)", value=True),
	gr.Checkbox(label="IBAN (IBAN_CODE)", value=True),
	gr.Checkbox(label="Codici Fiscali (CODICE_FISCALE)", value=True),
	gr.Checkbox(label="Partite IVA (PARTITA_IVA)", value=True),
	gr.Checkbox(label="Targhe (TARGA)", value=True)
	],
	outputs=[
	gr.HTML(label="Testo con entità evidenziate"),
	gr.Textbox(label="Testo anonimizzato", lines=5),
	gr.Markdown(label="Statistiche di rilevamento")
	],
	title="🔒 Sistema Ibrido di Anonimizzazione Dati - Controllo Entità",
	description="Analizza e anonimizza testi selezionando i tipi di entità da processare.\n"
	"I diversi colori indicano i tipi di entità rilevate.",
	examples=entity_control_examples,
	theme=gr.themes.Soft(),
	allow_flagging="never"
	)

	# Avvia l'interfaccia migliorata
	# demo_advanced.launch(share=True, debug=True)

	# =========================================================
	# CELLA 10: INTERFACCIA AVANZATA CON PARAMETRI DI ANONIMIZZAZIONE
	# =========================================================

	with gr.Blocks(theme=gr.themes.Soft()) as demo_blocks:
	gr.Markdown("# 🔒 Sistema Ibrido di Anonimizzazione Dati")
	gr.Markdown("Analizza e anonimizza testi in italiano con controllo avanzato dei parametri.")

	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.Textbox(
	label="Testo da analizzare",
	lines=6,
	placeholder="Inserisci il testo contenente dati sensibili...",
	value="Il signor Marco Rossi, nato il 15/04/1978, CF: RSSMRC78D15H501T, può essere contattato al numero +39 333-1234567 o all'email [email protected]."
	)

	with gr.Tabs():
	with gr.TabItem("Impostazioni Base"):
	with gr.Row():
	with gr.Column():
	anon_type = gr.Radio(
	["replace", "redact", "pseudonymize"],
	label="Tipo di anonimizzazione globale",
	value="replace"
	)
	confidence = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.5,
	step=0.05,
	label="Soglia di confidenza globale"
	)

	with gr.Column():
	use_stanford = gr.Checkbox(label="Usa modello Stanford", value=True)
	use_regex = gr.Checkbox(label="Usa Regex Fallback", value=True)

	with gr.TabItem("Parametri di Anonimizzazione"):
	with gr.Row():
	with gr.Column():
	tag_format = gr.Radio(
	["<TAG>", "[TAG]", "{TAG}", "TAG_"],
	label="Formato dei tag di sostituzione",
	value="<TAG>"
	)

	redact_char = gr.Radio(
	["*", "X", "#", "_"],
	label="Carattere di oscuramento (per redact)",
	value="*"
	)

	with gr.Column():
	preserve_length = gr.Checkbox(
	label="Preserva lunghezza originale nelle sostituzioni",
	value=False
	)

	gr.Markdown("### Anteprima formati di tag")
	anteprima_html = gr.HTML(value="<div style='padding: 10px; background-color: #f0f0f0; border-radius: 5px;'><p><b>Replace:</b> <PERSON>, [PERSON], {PERSON}, PERSON_</p><p><b>Redact:</b> *****, XXXXX, #####, _____</p><p><b>Pseudonymize:</b> Persona1, Luogo1, Data1...</p></div>")

	with gr.Accordion("Metodi specifici per tipo di entità", open=False):
	gr.Markdown("Seleziona un metodo specifico per ogni tipo di entità, o lascia 'Globale' per usare il metodo globale")

	with gr.Row():
	with gr.Column():
	person_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per PERSON",
	value=None
	)
	location_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per LOCATION",
	value=None
	)
	organization_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per ORGANIZATION",
	value=None
	)
	date_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per DATE_TIME",
	value=None
	)
	phone_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per PHONE_NUMBER",
	value=None
	)

	with gr.Column():
	email_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per EMAIL",
	value=None
	)
	iban_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per IBAN_CODE",
	value=None
	)
	cf_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per CODICE_FISCALE",
	value=None
	)
	piva_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per PARTITA_IVA",
	value=None
	)
	targa_method = gr.Dropdown(
	[None, "replace", "redact", "pseudonymize"],
	label="Metodo per TARGA",
	value=None
	)

	with gr.TabItem("Soglie di Confidenza"):
	gr.Markdown("### Imposta soglie di confidenza specifiche per tipo di entità")
	gr.Markdown("Lascia vuoto per usare la soglia globale")

	with gr.Row():
	with gr.Column():
	person_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per PERSON",
	value=None
	)
	location_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per LOCATION",
	value=None
	)
	organization_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per ORGANIZATION",
	value=None
	)
	date_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per DATE_TIME",
	value=None
	)
	phone_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per PHONE_NUMBER",
	value=None
	)

	with gr.Column():
	email_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per EMAIL",
	value=None
	)
	iban_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per IBAN_CODE",
	value=None
	)
	cf_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per CODICE_FISCALE",
	value=None
	)
	piva_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per PARTITA_IVA",
	value=None
	)
	targa_threshold = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.05,
	label="Soglia per TARGA",
	value=None
	)

	with gr.TabItem("Formati Pseudonimi"):
	gr.Markdown("### Personalizza i formati dei pseudonimi")
	gr.Markdown("Usa {num} per inserire il numero progressivo, {type} per il tipo di entità, {orig} per l'iniziale dell'originale")

	with gr.Row():
	with gr.Column():
	person_format = gr.Textbox(
	label="Formato per PERSON",
	value="Persona{num}",
	placeholder="es. Persona{num}, P{num}, {orig}..."
	)
	location_format = gr.Textbox(
	label="Formato per LOCATION",
	value="Luogo{num}",
	placeholder="es. Luogo{num}, L{num}..."
	)
	organization_format = gr.Textbox(
	label="Formato per ORGANIZATION",
	value="Organizzazione{num}",
	placeholder="es. Org{num}, Azienda{num}..."
	)
	date_format = gr.Textbox(
	label="Formato per DATE_TIME",
	value="Data{num}",
	placeholder="es. GG/MM/AAAA, Data{num}..."
	)
	phone_format = gr.Textbox(
	label="Formato per PHONE_NUMBER",
	value="+39-XXX-XXX-{num}",
	placeholder="es. +39-XXX-XXX-{num}..."
	)

	with gr.Column():
	email_format = gr.Textbox(
	label="Formato per EMAIL",
	value="email{num}@esempio.com",
	placeholder="es. user{num}@domain.com..."
	)
	iban_format = gr.Textbox(
	label="Formato per IBAN_CODE",
	value="IT00X0000000000000{num}",
	placeholder="es. IT00X0000..."
	)
	cf_format = gr.Textbox(
	label="Formato per CODICE_FISCALE",
	value="ABCDEF00G00H000{num}",
	placeholder="es. ABCDEF00G00H000{num}..."
	)
	piva_format = gr.Textbox(
	label="Formato per PARTITA_IVA",
	value="IT0000000000{num}",
	placeholder="es. IT0000000000{num}..."
	)
	targa_format = gr.Textbox(
	label="Formato per TARGA",
	value="XX000{num}",
	placeholder="es. XX000{num}..."
	)

	process_btn = gr.Button("Analizza e Anonimizza", variant="primary")

	with gr.Column(scale=1):
	gr.Markdown("### Seleziona i tipi di entità da anonimizzare")

	with gr.Group():
	person_enabled = gr.Checkbox(label="👤 Persone (PERSON)", value=True)
	location_enabled = gr.Checkbox(label="📍 Luoghi (LOCATION)", value=True)
	organization_enabled = gr.Checkbox(label="🏢 Organizzazioni (ORGANIZATION)", value=True)
	date_time_enabled = gr.Checkbox(label="📅 Date (DATE_TIME)", value=True)
	phone_number_enabled = gr.Checkbox(label="📞 Numeri di telefono (PHONE_NUMBER)", value=True)
	email_enabled = gr.Checkbox(label="📧 Email (EMAIL)", value=True)
	iban_enabled = gr.Checkbox(label="💳 IBAN (IBAN_CODE)", value=True)
	codice_fiscale_enabled = gr.Checkbox(label="🪪 Codici Fiscali (CODICE_FISCALE)", value=True)
	partita_iva_enabled = gr.Checkbox(label="🏷️ Partite IVA (PARTITA_IVA)", value=True)
	targa_enabled = gr.Checkbox(label="🚗 Targhe (TARGA)", value=True)

	with gr.Row():
	select_all_btn = gr.Button("Seleziona tutti")
	clear_all_btn = gr.Button("Deseleziona tutti")

	with gr.Accordion("Guida rapida", open=False):
	gr.Markdown("""
	Tipi di anonimizzazione:
	- Replace: sostituisce l'entità con un tag (es. <PERSON>)
	- Redact: oscura l'entità con caratteri (es. *****)
	- Pseudonymize: sostituisce con valori fittizi (es. Persona1)

	Formato tag:
	- `<TAG>`: usa tag HTML (es. <PERSON>)
	- `[TAG]`: usa parentesi quadre (es. [PERSON])
	- `{TAG}`: usa parentesi graffe (es. {PERSON})
	- `TAG_`: usa underscore (es. PERSON_)

	Preserva lunghezza:
	- Se attivo, mantiene la lunghezza originale dell'entità
	- Utile per mantenere il formato del documento
	""")

	with gr.Tabs():
	with gr.TabItem("Risultati"):
	html_output = gr.HTML(label="Testo con entità evidenziate")
	anon_output = gr.Textbox(label="Testo anonimizzato", lines=5)
	stats_output = gr.Markdown(label="Statistiche di rilevamento")

	# Funzione per aggiornare l'anteprima dei formati di tag
	def update_preview(tag_format, redact_char, preserve_length):
	replace_examples = {
	"<TAG>": "<PERSON>",
	"[TAG]": "[PERSON]",
	"{TAG}": "{PERSON}",
	"TAG_": "PERSON_"
	}

	redact_example = redact_char * 5
	if preserve_length:
	redact_note = " (mantenendo lunghezza originale)"
	else:
	redact_note = " (lunghezza fissa)"

	return f"""
	<div style='padding: 10px; background-color: #f0f0f0; border-radius: 5px;'>
	<p><b>Replace:</b> {replace_examples[tag_format]}</p>
	<p><b>Redact:</b> {redact_example}{redact_note}</p>
	<p><b>Pseudonymize:</b> Persona1, Luogo1, Data1...</p>
	</div>
	"""

	# Aggiorna l'anteprima quando cambiano i parametri
	tag_format.change(
	update_preview,
	inputs=[tag_format, redact_char, preserve_length],
	outputs=anteprima_html
	)

	redact_char.change(
	update_preview,
	inputs=[tag_format, redact_char, preserve_length],
	outputs=anteprima_html
	)

	preserve_length.change(
	update_preview,
	inputs=[tag_format, redact_char, preserve_length],
	outputs=anteprima_html
	)

	# Logica per i pulsanti di selezione
	def select_all():
	return [True] * 10

	def clear_all():
	return [False] * 10

	select_all_btn.click(
	select_all,
	inputs=None,
	outputs=[
	person_enabled, location_enabled, organization_enabled, date_time_enabled,
	phone_number_enabled, email_enabled, iban_enabled, codice_fiscale_enabled,
	partita_iva_enabled, targa_enabled
	]
	)

	clear_all_btn.click(
	clear_all,
	inputs=None,
	outputs=[
	person_enabled, location_enabled, organization_enabled, date_time_enabled,
	phone_number_enabled, email_enabled, iban_enabled, codice_fiscale_enabled,
	partita_iva_enabled, targa_enabled
	]
	)

	# Callback per il pulsante di processo
	process_btn.click(
	process_text_with_entity_control,
	inputs=[
	text_input, anon_type, use_stanford, use_regex, confidence,
	person_enabled, location_enabled, organization_enabled, date_time_enabled,
	phone_number_enabled, email_enabled, iban_enabled, codice_fiscale_enabled,
	partita_iva_enabled, targa_enabled,
	# Parametri di anonimizzazione avanzati
	tag_format, redact_char, preserve_length,
	# Metodi specifici per tipo
	person_method, location_method, organization_method, date_method,
	phone_method, email_method, iban_method, cf_method, piva_method, targa_method,
	# Soglie specifiche per tipo
	person_threshold, location_threshold, organization_threshold, date_threshold,
	phone_threshold, email_threshold, iban_threshold, cf_threshold, piva_threshold, targa_threshold,
	# Formati dei pseudonimi
	person_format, location_format, organization_format, date_format, phone_format,
	email_format, iban_format, cf_format, piva_format, targa_format
	],
	outputs=[html_output, anon_output, stats_output]
	)

	# Avvia l'interfaccia a blocchi (commenta la linea launch della cella 11 se la usi)
	if __name__ == "__main__":
	demo_blocks.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)