NextGenC commited on
Commit
98c2b46
·
verified ·
1 Parent(s): 4abf400

Upload 10 files

Browse files
argument_analyzer.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # argument_analyzer.py (V2 - Adım 2.2 Düzeltilmiş Hali - Tekrar)
2
+ import spacy
3
+ from spacy.tokens import Doc, Span, Token
4
+ from typing import List, Tuple, Dict, Set
5
+ import data_models # Düz yapı importu
6
+ from rich.console import Console
7
+
8
+ console = Console()
9
+
10
+ # --- Göstergeler ---
11
+ CLAIM_INDICATORS_VERBS = {"believe", "think", "argue", "claim", "suggest", "conclude", "state", "assert", "maintain", "propose", "insist", "contend", "show", "demonstrate", "indicate"}
12
+ CLAIM_INDICATORS_PHRASES = {"in my opinion", "it seems that", "the main point is", "it is clear that", "clearly", "the conclusion is", "we must", "it is necessary", "it is evident that"}
13
+ MODAL_VERBS_STRONG = {"should", "must", "ought", "has to", "needs to"}
14
+ PREMISE_INDICATORS = {"because", "since", "as", "for", "given that", "due to", "owing to", "assuming that", "the reason is", "if"}
15
+
16
+ # --- Dependency Parsing Yardımcı Fonksiyonları ---
17
+ def find_root_verb(sent: Span) -> Token | None:
18
+ for token in sent:
19
+ if token.dep_ == "ROOT" and token.pos_ == "VERB": return token
20
+ for token in sent:
21
+ if token.dep_ == "ROOT" and token.pos_ == "AUX": return token
22
+ return None
23
+
24
+ def has_subject(verb: Token) -> bool:
25
+ return any(child.dep_ in {"nsubj", "nsubjpass"} for child in verb.children)
26
+
27
+ def has_complement_or_object(verb: Token) -> bool:
28
+ return any(child.dep_ in {"dobj", "attr", "acomp", "ccomp", "xcomp", "pobj"} for child in verb.children)
29
+
30
+ def get_clause_starting_with(indicator: str, sent: Span) -> Span | None:
31
+ indicator_token = None
32
+ for token in sent:
33
+ if token.text.lower() == indicator and token.dep_ in {"mark", "prep", "agent"}:
34
+ indicator_token = token; break
35
+ if indicator_token:
36
+ start_char = indicator_token.idx; end_char = sent.end_char
37
+ clause_span = sent.doc.char_span(start_char, end_char)
38
+ return clause_span if clause_span else sent
39
+ return None
40
+
41
+ # --- Gelişmiş Analiz Fonksiyonu (Düzeltilmiş Hali) ---
42
+ def enhanced_component_analyzer(doc: Doc) -> List[data_models.ArgumentComponent]:
43
+ """
44
+ spaCy Doc nesnesini analiz ederek dependency parsing ve göstergelerle
45
+ potansiyel iddia ve gerekçeleri tespit eder (V2 - Düzeltilmiş).
46
+ """
47
+ components = []
48
+ sentences = list(doc.sents)
49
+
50
+ for i, sent in enumerate(sentences):
51
+ sent_text_lower = sent.text.lower()
52
+ potential_components_in_sentence: List[Tuple[str, float, Span]] = []
53
+ premise_found_in_this_sentence = False
54
+
55
+ # --- 1. Gerekçe Kontrolü ---
56
+ for indicator in PREMISE_INDICATORS:
57
+ if f" {indicator} " in f" {sent_text_lower} " or sent_text_lower.startswith(f"{indicator} "):
58
+ span_to_use = sent # Şimdilik tüm cümleyi al
59
+ potential_components_in_sentence.append(("Premise", 0.65, span_to_use))
60
+ premise_found_in_this_sentence = True
61
+ break
62
+
63
+ if premise_found_in_this_sentence:
64
+ comp_type, confidence, span = potential_components_in_sentence[0]
65
+ components.append(data_models.ArgumentComponent(
66
+ component_type=comp_type, text=span.text, sentence_index=i,
67
+ span_start=span.start_char, span_end=span.end_char, confidence=confidence
68
+ ))
69
+ continue # Diğer cümleye geç
70
+
71
+ # --- Gerekçe bulunamadıysa İddia Kontrolleri ---
72
+ claim_indicator_confidence = 0.0
73
+ has_claim_verb = any(token.lemma_ in CLAIM_INDICATORS_VERBS and token.pos_ == "VERB" for token in sent)
74
+ has_modal = any(token.lemma_ in MODAL_VERBS_STRONG and token.pos_ == "AUX" for token in sent)
75
+ has_claim_phrase = any(phrase in sent_text_lower for phrase in CLAIM_INDICATORS_PHRASES)
76
+ if has_claim_verb: claim_indicator_confidence = max(claim_indicator_confidence, 0.5)
77
+ if has_modal: claim_indicator_confidence = max(claim_indicator_confidence, 0.6)
78
+ if has_claim_phrase: claim_indicator_confidence = max(claim_indicator_confidence, 0.7)
79
+ if claim_indicator_confidence > 0:
80
+ potential_components_in_sentence.append(("Claim", claim_indicator_confidence, sent))
81
+
82
+ root_verb = find_root_verb(sent)
83
+ if root_verb:
84
+ if has_subject(root_verb) and has_complement_or_object(root_verb):
85
+ if not any(c[0] == "Claim" for c in potential_components_in_sentence):
86
+ potential_components_in_sentence.append(("Claim", 0.4, sent))
87
+
88
+ best_component = None; max_confidence = 0.0
89
+ for comp_type, confidence, span in potential_components_in_sentence:
90
+ if confidence > max_confidence:
91
+ max_confidence = confidence; best_component = (comp_type, span)
92
+
93
+ if best_component:
94
+ comp_type, span = best_component
95
+ components.append(data_models.ArgumentComponent(
96
+ component_type=comp_type, text=span.text, sentence_index=i,
97
+ span_start=span.start_char, span_end=span.end_char, confidence=max_confidence
98
+ ))
99
+
100
+ console.print(f" -> Enhanced Analyzer (Corrected Version) found {len(components)} potential components.", style="dim")
101
+ return components
argument_visualizer.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # argument_visualizer.py (V2/V3 - Adım 3.1: Düşük Eşik Denemesi)
2
+ import networkx as nx
3
+ from typing import List, Dict
4
+ import data_models
5
+ from rich.console import Console
6
+ import textwrap
7
+ import torch
8
+ import torch.nn.functional as F
9
+
10
+ console = Console()
11
+
12
+ # Anlamsal Benzerlik Eşiği DÜŞÜRÜLDÜ!
13
+ LINKING_SIMILARITY_THRESHOLD = 0.55 # Önceki 0.65 idi, şimdi 0.55 deneyelim
14
+
15
+ # --- Yardımcı Fonksiyon: Benzerlik Hesaplama (Aynı) ---
16
+ def calculate_similarity(emb1: torch.Tensor | None, emb2: torch.Tensor | None) -> float | None:
17
+ if emb1 is None or emb2 is None: return None
18
+ emb1 = emb1.cpu(); emb2 = emb2.cpu()
19
+ if emb1.dim() == 1: emb1 = emb1.unsqueeze(0)
20
+ if emb2.dim() == 1: emb2 = emb2.unsqueeze(0)
21
+ try: return F.cosine_similarity(emb1, emb2).item()
22
+ except Exception as e: console.print(f"[yellow]Warn: Cosine similarity failed: {e}[/yellow]"); return None
23
+
24
+ # --- Ana Grafik Oluşturma Fonksiyonu (Aynı) ---
25
+ def build_argument_graph(
26
+ components: List[data_models.ArgumentComponent],
27
+ sentence_embeddings: List[torch.Tensor | None]
28
+ ) -> nx.DiGraph | None:
29
+ # ... (Fonksiyonun geri kalanı bir önceki mesajdakiyle aynı)...
30
+ if not components: return None
31
+ G = nx.DiGraph()
32
+ claims = []; premises = []
33
+ for i, comp in enumerate(components):
34
+ node_id = f"{comp.component_type[0]}{i+1}"
35
+ conf_str = f" (Conf: {comp.confidence:.2f})" if comp.confidence is not None else ""
36
+ node_label = f"\"{textwrap.shorten(comp.text, width=35, placeholder='...')}\"{conf_str}"
37
+ node_shape = "()" if comp.component_type == "Claim" else "[]"
38
+ G.add_node(node_id, label=node_label, type=comp.component_type, shape=node_shape, component_index=i, sentence_index=comp.sentence_index)
39
+ if comp.component_type == "Claim": claims.append((i, node_id, comp.sentence_index))
40
+ else: premises.append((i, node_id, comp.sentence_index))
41
+
42
+ if not claims or not premises:
43
+ console.print(" -> Argument graph requires at least one claim and one premise to show structure.", style="dim")
44
+ return G
45
+
46
+ edges_added = 0
47
+ for p_comp_idx, p_node_id, p_sent_idx in premises:
48
+ if not (0 <= p_sent_idx < len(sentence_embeddings)): continue
49
+ premise_embedding = sentence_embeddings[p_sent_idx]
50
+ if premise_embedding is None: continue
51
+ for c_comp_idx, c_node_id, c_sent_idx in claims:
52
+ if not (0 <= c_sent_idx < len(sentence_embeddings)): continue
53
+ claim_embedding = sentence_embeddings[c_sent_idx]
54
+ if claim_embedding is None: continue
55
+ similarity = calculate_similarity(premise_embedding, claim_embedding)
56
+ if similarity is not None and similarity >= LINKING_SIMILARITY_THRESHOLD: # Eşik kontrolü
57
+ console.print(f" -> Linking {p_node_id} to {c_node_id} (Similarity: {similarity:.2f})", style="dim")
58
+ G.add_edge(p_node_id, c_node_id, relation="supports", similarity=similarity)
59
+ edges_added += 1
60
+ if edges_added == 0:
61
+ console.print(f" -> Could not link any premises to claims based on similarity threshold ({LINKING_SIMILARITY_THRESHOLD}).", style="dim")
62
+ console.print(f" -> Built argument graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges (Semantic Linking).", style="dim")
63
+ return G
64
+
65
+
66
+ # Grafik Formatlama Fonksiyonu (Aynı)
67
+ def format_graph_text(graph: nx.DiGraph | None) -> str:
68
+ # ... (Fonksiyonun içeriği öncekiyle aynı) ...
69
+ if graph is None: return " Argument graph could not be built."
70
+ if graph.number_of_nodes() == 0: return " No argument components identified to build a graph."
71
+ output_lines = [" Argument Structure (Linked by Semantic Similarity):"]
72
+ isolated_nodes = [node for node, degree in graph.degree() if degree == 0 and graph.in_degree(node)==0 and graph.out_degree(node)==0 ]
73
+ connected_nodes = set()
74
+ if graph.number_of_edges() == 0:
75
+ if not isolated_nodes: output_lines.append(" No argument structure links could be determined.")
76
+ else:
77
+ output_lines.append(" Detected Links (Premise --Supports (Similarity)--> Claim):")
78
+ for u, v, data in graph.edges(data=True):
79
+ connected_nodes.add(u); connected_nodes.add(v)
80
+ u_data = graph.nodes[u]; v_data = graph.nodes[v]
81
+ u_shape = u_data.get('shape', '[]'); v_shape = v_data.get('shape', '()')
82
+ u_label = u_data.get('label', u); v_label = v_data.get('label', v)
83
+ similarity = data.get('similarity')
84
+ sim_str = f"(Sim: {similarity:.2f})" if similarity is not None else ""
85
+ output_lines.append(f" {u_shape[0]}{u}{u_shape[1]}: {u_label}")
86
+ output_lines.append(f" {' ' * len(u)} --Supports {sim_str}--> {v_shape[0]}{v}{v_shape[1]}: {v_label}")
87
+ output_lines.append("")
88
+
89
+ really_isolated = set(isolated_nodes) # Tüm izoleler
90
+ if really_isolated:
91
+ # Eğer kenar varsa ve izole düğüm varsa başlık ekle
92
+ if graph.number_of_edges() > 0:
93
+ output_lines.append(" Nodes without determined links:")
94
+ # Kenar yoksa zaten yukarıda başlık vardı
95
+ elif not isolated_nodes and graph.number_of_edges() == 0 : # Hata durumu, yukarıda handle edildi
96
+ pass
97
+ elif isolated_nodes and graph.number_of_edges() == 0: # Sadece izole düğümler varsa
98
+ output_lines.append(" Nodes (No links determined):")
99
+
100
+
101
+ for node_id in sorted(list(really_isolated)):
102
+ data = graph.nodes[node_id]
103
+ shape = data.get('shape', '??')
104
+ label = data.get('label', node_id)
105
+ output_lines.append(f" {shape[0]}{node_id}{shape[1]}: {label}")
106
+
107
+ output_lines.append(f" [dim](Links based on similarity >= {LINKING_SIMILARITY_THRESHOLD})[/dim]")
108
+ return "\n".join(output_lines)
cli.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # cli.py (V2/V3 - Adım 3.1: Anlamsal Görselleştirme Entegrasyonu)
2
+ import typer
3
+ from rich.console import Console
4
+ from rich.panel import Panel
5
+ from rich.table import Table
6
+ from rich.text import Text
7
+ from rich.padding import Padding
8
+ import data_models
9
+ import nlp_utils
10
+ import argument_analyzer
11
+ import logic_analyzer
12
+ # import evidence_analyzer # Kaldırıldı
13
+ import rhetoric_analyzer
14
+ import synthesis_engine
15
+ import argument_visualizer # Güncellenmiş versiyonu çağıracak
16
+ import networkx as nx
17
+ from typing import Optional, List
18
+ import sys
19
+ import textwrap
20
+
21
+ console = Console()
22
+ sentences = []
23
+
24
+ # --- Yardımcı Raporlama Fonksiyonları (Aynı) ---
25
+ def format_sentence_with_highlights(
26
+ sentence_idx: int, all_sentences: List[data_models.SentenceInfo],
27
+ findings: List[data_models.Finding], components: List[data_models.ArgumentComponent]
28
+ ) -> Text:
29
+ # ... (Fonksiyon içeriği öncekiyle aynı)...
30
+ if sentence_idx < 0 or sentence_idx >= len(all_sentences): return Text("(Error: Invalid sentence index)")
31
+ sentence = all_sentences[sentence_idx]; text = Text(sentence.text)
32
+ for comp in components:
33
+ if comp.sentence_index == sentence_idx:
34
+ style = "bold magenta" if comp.component_type == "Claim" else "magenta"
35
+ start = comp.span_start - sentence.start_char; end = comp.span_end - sentence.start_char
36
+ if 0 <= start < end <= len(sentence.text):
37
+ try: text.stylize(style, start, end)
38
+ except Exception as e: console.print(f"[dim yellow]Warn: Styling component ({start}-{end}) in sent {sentence_idx}: {e}[/dim]")
39
+ for finding in findings:
40
+ if finding.span_start == sentence.start_char and finding.span_end == sentence.end_char:
41
+ prefix = "[F] " if finding.finding_type == "Fallacy" else \
42
+ "[R] " if finding.finding_type == "RhetoricalDevice" else "[?] "
43
+ style = "bold red" if prefix=="[F] " else "bold yellow" if prefix=="[R] " else ""
44
+ text.insert(0, prefix, style=style)
45
+ return text
46
+
47
+ # Ana CLI Fonksiyonu
48
+ def main(
49
+ text: Optional[str] = typer.Option(None, "--text", "-t", help="Text to analyze directly."),
50
+ file_path: Optional[str] = typer.Option(None, "--file", "-f", help="Path to a text file to analyze."),
51
+ max_findings_display: int = typer.Option(5, "--max-findings", "-m", help="Max number of each finding type to display in detail.")
52
+ ):
53
+ """
54
+ ETHOS: The AI Arbiter of Rational Discourse (CLI) - v2.2
55
+ Semantic Argument Linking & AI Fallacy Integration.
56
+ """
57
+ console.print(Panel("[bold cyan]ETHOS Analysis Engine v2.2 Starting...[/bold cyan]", expand=False, border_style="cyan"))
58
+ # --- Girdi Kontrolü ve Yükleme ---
59
+ if text and file_path: console.print("[bold red]Error: Use --text OR --file, not both.[/bold red]"); raise typer.Exit(code=1)
60
+ if not text and not file_path: console.print("[bold red]Error: Use --text '...' OR --file '...'[/bold red]"); raise typer.Exit(code=1)
61
+ text_to_analyze = ""; input_source_msg = ""
62
+ if file_path:
63
+ try:
64
+ with open(file_path, 'r', encoding='utf-8') as f: text_to_analyze = f.read()
65
+ input_source_msg = f"File: [yellow]'{file_path}'[/yellow]"
66
+ except Exception as e: console.print(f"[bold red]Error reading file '{file_path}': {e}[/bold red]"); raise typer.Exit(code=1)
67
+ elif text:
68
+ text_to_analyze = text; input_source_msg = f"Input Text ({len(text_to_analyze)} chars)"
69
+ if not text_to_analyze.strip(): console.print("[bold red]Error: Input text is empty.[/bold red]"); raise typer.Exit(code=1)
70
+ console.print(Padding(f"Analyzing: {input_source_msg}", (0, 1)))
71
+
72
+ # --- Analiz Adımları ---
73
+ console.print("\n[bold blue]--- Initializing Analyzers & Embeddings ---[/bold blue]")
74
+ try:
75
+ nlp_utils.load_spacy_model(); nlp_utils.load_bert()
76
+ spacy_doc = nlp_utils.process_text_spacy(text_to_analyze)
77
+ # Tüm cümle embeddinglerini başta hesapla
78
+ sentence_embeddings = nlp_utils.get_all_sentence_embeddings(spacy_doc)
79
+ except Exception as e: console.print(f"[bold red]Error during model loading/processing/embedding: {e}[/bold red]"); raise typer.Exit(code=1)
80
+ if not spacy_doc: console.print("[bold red]Error: spaCy doc creation failed.[/bold red]"); raise typer.Exit(code=1)
81
+
82
+ # --- Analizleri Çalıştır ---
83
+ console.print("\n[bold blue]--- Running Analysis Modules ---[/bold blue]")
84
+ global sentences
85
+ sentences = [data_models.SentenceInfo(text=s.text, start_char=s.start_char, end_char=s.end_char, tokens=[t.text for t in s]) for s in spacy_doc.sents]
86
+
87
+ console.print("[cyan]Running Argument Analyzer (Enhanced)...[/cyan]")
88
+ argument_components = argument_analyzer.enhanced_component_analyzer(spacy_doc)
89
+
90
+ console.print("[cyan]Running Logic Analyzer (Enhanced - Rules + ML)...[/cyan]")
91
+ fallacy_findings = logic_analyzer.enhanced_fallacy_analyzer(spacy_doc)
92
+
93
+ evidence_findings = [] # Kanıt analizi kaldırıldı
94
+
95
+ console.print("[cyan]Running Rhetoric Analyzer...[/cyan]")
96
+ rhetoric_findings = rhetoric_analyzer.simple_rhetoric_analyzer(spacy_doc)
97
+
98
+ console.print("[cyan]Running Synthesis Engine (Evidence Excluded)...[/cyan]")
99
+ all_findings: List[data_models.Finding] = fallacy_findings + rhetoric_findings
100
+ analysis_summary = synthesis_engine.generate_summary_ratings(argument_components, all_findings)
101
+
102
+ # --- Argüman Grafiğini Oluştur (Embeddingler ile) ---
103
+ console.print("[cyan]Building Argument Graph (Semantic Linking)...[/cyan]")
104
+ # Görselleyiciye embedding listesini de gönder
105
+ argument_graph = argument_visualizer.build_argument_graph(argument_components, sentence_embeddings) # <-- DEĞİŞİKLİK BURADA
106
+ graph_text_representation = argument_visualizer.format_graph_text(argument_graph)
107
+
108
+ # --- Sonuç Nesnesini Oluştur ---
109
+ analysis_result = data_models.AnalyzedText(
110
+ original_text=text_to_analyze, sentences=sentences,
111
+ argument_components=argument_components, findings=all_findings,
112
+ analysis_summary=analysis_summary
113
+ )
114
+
115
+ # --- Raporlama ---
116
+ console.rule("[bold green]ETHOS Analysis Report[/bold green]", style="green")
117
+ # Bölüm 1: Özet (Aynı)
118
+ summary_table = Table(title="Analysis Summary", show_header=False, box=None, padding=(0, 1))
119
+ # ... (Özet tablo kodu aynı) ...
120
+ if analysis_result.analysis_summary:
121
+ for category, rating in analysis_result.analysis_summary.items():
122
+ style = "red" if rating.startswith(("Low", "Weak", "Questionable")) else "yellow" if rating.startswith(("Medium", "Moderate", "Mixed")) else "green"
123
+ if rating == "Not Evaluated": style = "dim"
124
+ summary_table.add_row(category, f"[{style}]{rating}[/{style}]")
125
+ else: summary_table.add_row("Summary", "[dim]Not generated.[/dim]")
126
+ console.print(Padding(summary_table, (1, 0)))
127
+
128
+
129
+ # Bölüm 2: Tespit Edilen Bulgular (Aynı)
130
+ console.print("\n[bold underline]Detected Findings:[/bold underline]")
131
+ if not analysis_result.findings: console.print(Padding(" No significant findings detected.", (0, 2)))
132
+ else:
133
+ # ... (Bulgu gruplama ve yazdırma kodu aynı) ...
134
+ grouped_findings = {};
135
+ for f in analysis_result.findings: grouped_findings.setdefault(f.finding_type, []).append(f)
136
+ grouped_findings.pop("EvidenceIndicator", None); grouped_findings.pop("EvidenceStatus", None)
137
+ if not grouped_findings: console.print(Padding(" No significant findings detected.", (0, 2)))
138
+ else:
139
+ for f_type, findings_list in grouped_findings.items():
140
+ color = "red" if f_type=="Fallacy" else "yellow" if f_type=="RhetoricalDevice" else "white"
141
+ console.print(Padding(f"[bold {color}]{f_type} Indicators ({len(findings_list)} found):[/bold {color}]", (1, 1)))
142
+ for i, finding in enumerate(findings_list[:max_findings_display]):
143
+ details_dict = finding.details if finding.details else {}
144
+ details_text = details_dict.get('fallacy_type') or details_dict.get('device_type') or 'Details N/A'
145
+ trigger_text = details_dict.get('trigger') or details_dict.get('words')
146
+ confidence = details_dict.get('confidence'); model_used = details_dict.get('model_used')
147
+ details_str = f"({details_text}"
148
+ if trigger_text and not model_used: details_str += f", Trigger: '{textwrap.shorten(str(trigger_text), width=25, placeholder='...')}'"
149
+ if confidence is not None: details_str += f", Score: {confidence:.2f}"
150
+ if model_used: details_str += f", Model: '{model_used.split('/')[-1]}'"
151
+ details_str += ")"
152
+ console.print(Padding(f"{i+1}. {finding.description} {details_str}", (0, 3)))
153
+ try:
154
+ sentence_idx = next(idx for idx, s in enumerate(analysis_result.sentences) if s.start_char == finding.span_start)
155
+ related_sentence_text = analysis_result.sentences[sentence_idx].text
156
+ console.print(Padding(f"[dim] In Sent {sentence_idx+1}: \"{textwrap.shorten(related_sentence_text, width=90, placeholder='...')}\"[/dim]", (0, 5)))
157
+ except StopIteration: console.print(Padding(f"[dim] (Could not pinpoint exact sentence for span starting at char {finding.span_start})[/dim]", (0,5)))
158
+ if len(findings_list) > max_findings_display: console.print(Padding(f"... and {len(findings_list) - max_findings_display} more.", (0, 3)))
159
+
160
+
161
+ # Bölüm 3: Argüman Bileşenleri (Aynı)
162
+ console.print("\n[bold underline]Identified Argument Components:[/bold underline]")
163
+ if not analysis_result.argument_components: console.print(Padding(" No argument components identified.", (0, 2)))
164
+ else:
165
+ # ... (Bileşen tablosu kodu aynı) ...
166
+ comp_table = Table(title=None, show_header=True, header_style="bold magenta", box=None, padding=(0,1))
167
+ comp_table.add_column("Type", style="magenta", width=10); comp_table.add_column("Text Snippet (Confidence)")
168
+ for comp in analysis_result.argument_components:
169
+ conf_str = f"({comp.confidence:.2f})" if comp.confidence is not None else ""
170
+ comp_table.add_row(comp.component_type, f"\"{textwrap.shorten(comp.text, width=90, placeholder='...')}\" {conf_str}")
171
+ console.print(Padding(comp_table, (1, 1)))
172
+
173
+
174
+ # Bölüm 4: Argüman Yapısı Görselleştirmesi (Güncellenmiş formatı yazdıracak)
175
+ console.print("\n[bold underline]Argument Structure Visualization (Semantic):[/bold underline]") # Başlık güncellendi
176
+ console.print(Padding(graph_text_representation, (0, 1))) # <-- GRAFİĞİ YAZDIR
177
+ # Not metni güncellendi
178
+ console.print(Padding(f"[dim](Note: Links based on semantic similarity >= {argument_visualizer.LINKING_SIMILARITY_THRESHOLD})[/dim]", (0,1)))
179
+
180
+
181
+ console.rule(style="green")
182
+ console.print(f"(V2 Semantic Argument Linking. Total potential findings: {len(analysis_result.findings)})") # Mesaj güncellendi
183
+
184
+
185
+ # Script doğrudan çalıştırıldığında typer.run ile main fonksiyonunu çağır
186
+ if __name__ == "__main__":
187
+ typer.run(main)
data_models.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # data_models.py
2
+ from typing import List, Optional, Any
3
+ from pydantic import BaseModel, Field
4
+
5
+ class Finding(BaseModel):
6
+ """Analiz sırasında bulunan genel bir bulguyu temsil eder."""
7
+ finding_type: str = Field(..., description="Bulgunun türü (örn. 'Fallacy', 'RhetoricalDevice', 'EvidenceStatus')")
8
+ description: str = Field(..., description="Bulgunun kısa açıklaması")
9
+ severity: Optional[str] = Field(None, description="Bulgunun ciddiyeti (örn. 'Low', 'Medium', 'High')")
10
+ span_start: Optional[int] = Field(None, description="Bulgunun metindeki başlangıç karakter indeksi")
11
+ span_end: Optional[int] = Field(None, description="Bulgunun metindeki bitiş karakter indeksi")
12
+ details: Optional[dict[str, Any]] = Field(None, description="Bulguya özel ek detaylar")
13
+
14
+ class ArgumentComponent(BaseModel):
15
+ """Tespit edilen bir argüman bileşenini temsil eder (İddia, Gerekçe vb.)."""
16
+ component_type: str = Field(..., description="Bileşenin türü (örn. 'Claim', 'Premise')")
17
+ text: str = Field(..., description="Bileşenin tam metni")
18
+ sentence_index: int = Field(..., description="Bileşenin bulunduğu cümlenin indeksi")
19
+ span_start: int = Field(..., description="Bileşenin cümle içindeki başlangıç karakter indeksi")
20
+ span_end: int = Field(..., description="Bileşenin cümle içindeki bitiş karakter indeksi")
21
+ confidence: Optional[float] = Field(None, description="Tespitin güven skoru (0.0 - 1.0)")
22
+
23
+ class SentenceInfo(BaseModel):
24
+ """Tek bir cümle hakkındaki bilgileri içerir."""
25
+ text: str
26
+ start_char: int
27
+ end_char: int
28
+ tokens: List[str] # Şimdilik basitçe token metinleri
29
+
30
+ class AnalyzedText(BaseModel):
31
+ """Tüm analiz sürecinin sonucunu içeren ana model."""
32
+ original_text: str
33
+ processed_text: Optional[str] = None
34
+ language: str = "en"
35
+ sentences: List[SentenceInfo] = []
36
+ findings: List[Finding] = [] # Tüm bulgular burada toplanacak
37
+ argument_components: List[ArgumentComponent] = []
38
+ analysis_summary: Optional[dict[str, str]] = Field(None, description="Analizin özet değerlendirmesi") # YENİ ALAN
39
+
40
+ class Config:
41
+ extra = 'forbid'
evidence_analyzer.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evidence_analyzer.py (V2 - Adım 2.4 Revizyonu: Basitleştirilmiş : GEREKLİYDİ ZAYN BAŞARAMADI :))
2
+ import spacy
3
+ from spacy.tokens import Doc, Span
4
+ from typing import List, Tuple
5
+ import data_models
6
+ # Artık nlp_utils, torch, F'e gerek yok bu basit versiyonda
7
+ from rich.console import Console
8
+ import re
9
+
10
+ console = Console()
11
+
12
+ # Sabitler (Aynı)
13
+ CITATION_PHRASES = {
14
+ "according to", "study shows", "research indicates", "data suggests",
15
+ "experts say", "report finds", "source:", "evidence shows", "demonstrates that",
16
+ "reported by", "stated by", "cited in"
17
+ }
18
+ URL_REGEX = r"(?:https?://|www\.)[^\s/$.?#].[^\s]*"
19
+
20
+ # Yardımcı Fonksiyon (Aynı)
21
+ def has_potential_evidence_indicator(sent: Span) -> Tuple[bool, str, str]:
22
+ sent_text = sent.text; sent_text_lower = sent.text.lower()
23
+ if not sent_text.strip(): return False, "", ""
24
+ urls = re.findall(URL_REGEX, sent_text)
25
+ if urls: return True, "URL", urls[0]
26
+ if re.search(r"\b\d{3,}\b", sent_text) or '%' in sent_text or re.search(r"\b\d+(?:\.\d+)?\b", sent_text):
27
+ match = re.search(r"\b\d+(?:\.\d+)?%?\b", sent_text)
28
+ trigger_text = match.group(0) if match else "Number/Percentage"
29
+ return True, "Numerical Data", trigger_text
30
+ for phrase in CITATION_PHRASES:
31
+ if f" {phrase} " in f" {sent_text_lower} " or sent_text_lower.startswith(f"{phrase} "):
32
+ return True, "Citation Phrase", phrase
33
+ return False, "", ""
34
+
35
+ # Ana Analiz Fonksiyonu (Basitleştirilmiş - Sadece aynı cümleyi kontrol eder)
36
+ def simplified_evidence_analyzer(
37
+ doc: Doc,
38
+ argument_components: List[data_models.ArgumentComponent]
39
+ # sentence_embeddings parametresi kaldırıldı
40
+ ) -> List[data_models.Finding]:
41
+ """
42
+ Tespit edilen iddiaları (Claim) inceler ve SADECE kendi cümlelerinde
43
+ basit kanıt göstergeleri olup olmadığını kontrol eder (V1 Tarzı Basit).
44
+ """
45
+ findings = []
46
+ claims_data = [(idx, comp) for idx, comp in enumerate(argument_components) if comp.component_type == "Claim"]
47
+ sentences = list(doc.sents)
48
+ num_sentences = len(sentences)
49
+
50
+ if not claims_data:
51
+ console.print(" -> No claims found to analyze for evidence.", style="dim"); return findings
52
+
53
+ console.print(f" -> Analyzing {len(claims_data)} claims for evidence indicators (Simplified: Same sentence only)...", style="dim")
54
+
55
+ for claim_comp_idx, claim in claims_data:
56
+ claim_sentence_idx = claim.sentence_index
57
+ claim_text_snippet = claim.text[:100] + "..."
58
+
59
+ if not (0 <= claim_sentence_idx < num_sentences):
60
+ console.print(f"[yellow]Warn: Invalid sentence index {claim_sentence_idx} for claim comp_idx {claim_comp_idx}, skipping.[/yellow]"); continue
61
+
62
+ claim_sentence_span = sentences[claim_sentence_idx]
63
+
64
+ # Sadece iddianın kendi cümlesini kontrol et
65
+ has_indicator, indicator_type, indicator_text = has_potential_evidence_indicator(claim_sentence_span)
66
+
67
+ if has_indicator:
68
+ # Gösterge varsa EvidenceIndicator ekle
69
+ findings.append(data_models.Finding(
70
+ finding_type="EvidenceIndicator",
71
+ description=f"Potential evidence indicator ('{indicator_type}') found in the same sentence as the claim.",
72
+ severity="Info",
73
+ span_start=claim_sentence_span.start_char,
74
+ span_end=claim_sentence_span.end_char,
75
+ details={
76
+ "indicator_type": indicator_type, "indicator_trigger": indicator_text,
77
+ "location": "same_sentence", "linked_claim_index": claim_comp_idx,
78
+ "claim_text": claim_text_snippet
79
+ }
80
+ ))
81
+ else:
82
+ # Gösterge yoksa EvidenceStatus ekle
83
+ findings.append(data_models.Finding(
84
+ finding_type="EvidenceStatus",
85
+ description="Claim lacks explicit evidence indicator in the same sentence.", # Açıklama basitleşti
86
+ severity="Medium",
87
+ span_start=claim_sentence_span.start_char, # İddia cümlesinin span'ı
88
+ span_end=claim_sentence_span.end_char,
89
+ details={"claim_text": claim_text_snippet}
90
+ ))
91
+
92
+ console.print(f" -> Simplified Evidence Analyzer generated {len(findings)} findings.", style="dim")
93
+ return findings
logic_analyzer.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # logic_analyzer.py (V2 - Adım 2.4 Düzeltmesi - Fonksiyon Adı Hatası Giderildi)
2
+ import spacy
3
+ from spacy.tokens import Doc, Span
4
+ from typing import List, Dict, Tuple
5
+ import data_models # Düz yapı importu
6
+ import nlp_utils # Embedding fonksiyonları için
7
+ import torch
8
+ import torch.nn as nn
9
+ from rich.console import Console
10
+
11
+ console = Console()
12
+
13
+ # --- Basit Kural Tabanlı Safsata İpuçları (Aynı) ---
14
+ POPULUM_INDICATORS = {
15
+ "everybody knows", "everyone knows", "everyone agrees",
16
+ "it is common sense", "most people think", "the majority believes"
17
+ }
18
+ HASTY_GENERALIZATION_KEYWORDS = {"always", "never", "all", "none", "every", "everyone", "nobody"}
19
+ def check_false_dichotomy(sent_text_lower: str) -> bool:
20
+ has_either = " either " in f" {sent_text_lower} " or sent_text_lower.startswith("either ")
21
+ has_or = " or " in f" {sent_text_lower} "
22
+ return has_either and has_or
23
+
24
+ # Kural Tabanlı Tespit Fonksiyonları (Aynı)
25
+ def detect_ad_populum(sent: Span) -> List[data_models.Finding]:
26
+ findings = []; sent_text_lower = sent.text.lower()
27
+ for indicator in POPULUM_INDICATORS:
28
+ if indicator in sent_text_lower:
29
+ findings.append(data_models.Finding(finding_type="Fallacy", description="Potential 'Appeal to Popularity' (Ad Populum / Bandwagon) detected by rule.", severity="Medium", span_start=sent.start_char, span_end=sent.end_char, details={"fallacy_type": "Ad Populum (Rule)", "trigger": indicator})); break
30
+ return findings
31
+
32
+ def detect_hasty_generalization(sent: Span) -> List[data_models.Finding]:
33
+ findings = []
34
+ for token in sent:
35
+ if token.text.lower() in HASTY_GENERALIZATION_KEYWORDS:
36
+ findings.append(data_models.Finding(finding_type="Fallacy", description="Potential 'Hasty Generalization' detected by keyword rule (needs context!).", severity="Low", span_start=sent.start_char, span_end=sent.end_char, details={"fallacy_type": "Hasty Generalization (Rule)", "trigger": token.text})); break
37
+ return findings
38
+
39
+ def detect_false_dichotomy(sent: Span) -> List[data_models.Finding]:
40
+ findings = []; sent_text_lower = sent.text.lower()
41
+ if check_false_dichotomy(sent_text_lower):
42
+ findings.append(data_models.Finding(finding_type="Fallacy", description="Potential 'False Dichotomy' (Either/Or Fallacy) detected by rule.", severity="Medium", span_start=sent.start_char, span_end=sent.end_char, details={"fallacy_type": "False Dichotomy (Rule)", "trigger": "either...or pattern"}))
43
+ return findings
44
+
45
+ # --- ML Tabanlı Safsata Tespiti (Placeholder - Fonksiyon adı düzeltildi) ---
46
+ FALLACY_CLASSES = ["Ad Hominem", "Hasty Generalization", "Appeal to Popularity", "No Fallacy"]
47
+ BERT_HIDDEN_SIZE = 768; NUM_FALLACY_CLASSES = len(FALLACY_CLASSES)
48
+
49
+ class FallacyClassifierPlaceholder(nn.Module):
50
+ def __init__(self, input_size=BERT_HIDDEN_SIZE, num_classes=NUM_FALLACY_CLASSES):
51
+ super().__init__(); self.linear = nn.Linear(input_size, num_classes)
52
+ # Bu mesajın sadece bir kere görünmesi için kontrol eklenebilir ama şimdilik kalsın
53
+ console.print("[yellow]Placeholder Fallacy Classifier initialized (UNTRAINED). Results will NOT be accurate.[/yellow]", style="dim")
54
+ def forward(self, sentence_embedding):
55
+ if sentence_embedding.dim() == 1: sentence_embedding = sentence_embedding.unsqueeze(0)
56
+ logits = self.linear(sentence_embedding); return logits
57
+
58
+ try:
59
+ placeholder_classifier = FallacyClassifierPlaceholder()
60
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
61
+ placeholder_classifier.to(device); placeholder_classifier.eval()
62
+ _ml_classifier_loaded = True
63
+ except Exception as e:
64
+ console.print(f"[bold red]Error initializing ML Fallacy Classifier: {e}. ML detection disabled.[/bold red]")
65
+ _ml_classifier_loaded = False
66
+
67
+
68
+ def ml_fallacy_detection_for_sentence(sentence_text: str, sent_span: Span) -> List[data_models.Finding]: # Span eklendi
69
+ """
70
+ Tek bir cümle için BERT embedding'i alır ve EĞİTİLMEMİŞ sınıflandırıcı ile
71
+ safsata tahmini yapar (PLACEHOLDER - DOĞRU SONUÇ VERMEZ).
72
+ Span bilgisi eklendi.
73
+ """
74
+ findings = []
75
+ if not _ml_classifier_loaded: return findings # Sınıflandırıcı yüklenemediyse boş dön
76
+
77
+ try:
78
+ # DÜZELTME: Doğru fonksiyon adını kullan: get_sentence_embedding (tekil)
79
+ sentence_embedding = nlp_utils.get_sentence_embedding(sentence_text, strategy='mean')
80
+ if sentence_embedding is None: # Embedding alınamadıysa
81
+ return findings
82
+
83
+ sentence_embedding = sentence_embedding.to(device)
84
+
85
+ with torch.no_grad(): logits = placeholder_classifier(sentence_embedding)
86
+
87
+ probabilities = torch.softmax(logits.squeeze(), dim=0)
88
+ predicted_prob, predicted_idx = torch.max(probabilities, dim=0)
89
+ predicted_class = FALLACY_CLASSES[predicted_idx.item()]
90
+ predicted_prob = predicted_prob.item()
91
+
92
+ # Sadece "No Fallacy" olmayanları ekle (güvenilmez skorla)
93
+ if predicted_class != "No Fallacy":
94
+ findings.append(data_models.Finding(
95
+ finding_type="Fallacy",
96
+ description=f"Potential '{predicted_class}' detected by ML Placeholder (Score: {predicted_prob:.2f} - UNRELIABLE).",
97
+ severity="Low",
98
+ span_start=sent_span.start_char, # Doğru span bilgisi kullanıldı
99
+ span_end=sent_span.end_char, # Doğru span bilgisi kullanıldı
100
+ details={"fallacy_type": f"{predicted_class} (ML Placeholder)", "confidence": predicted_prob}
101
+ ))
102
+ except Exception as e:
103
+ console.print(f"[yellow]Warning: ML Fallacy prediction failed for sentence: {e}[/yellow]", style="dim")
104
+ return findings
105
+
106
+
107
+ # --- Geliştirilmiş Ana Analiz Fonksiyonu (Düzeltilmiş Hali) ---
108
+ def enhanced_fallacy_analyzer(doc: Doc) -> List[data_models.Finding]:
109
+ """
110
+ Metindeki cümleleri hem basit kurallarla hem de ML Placeholder ile
111
+ analiz ederek potansiyel safsataları bulur (V2 Seviyesi).
112
+ """
113
+ all_findings = []
114
+ console.print(" -> Running Rule-Based Fallacy Checks...", style="dim")
115
+ sentences = list(doc.sents)
116
+ for sent in sentences:
117
+ all_findings.extend(detect_ad_populum(sent))
118
+ all_findings.extend(detect_hasty_generalization(sent))
119
+ all_findings.extend(detect_false_dichotomy(sent))
120
+
121
+ if _ml_classifier_loaded: # ML sınıflandırıcı yüklendiyse çalıştır
122
+ console.print(f" -> Running ML Placeholder Fallacy Checks ({len(sentences)} sentences)...", style="dim")
123
+ for sent in sentences:
124
+ # ml_fallacy_detection_for_sentence'a artık span'ı da gönderiyoruz
125
+ all_findings.extend(ml_fallacy_detection_for_sentence(sent.text, sent))
126
+ else:
127
+ console.print(" -> Skipping ML Placeholder Fallacy Checks (Initialization failed).", style="dim")
128
+
129
+ # TODO: Bulguları birleştirme / önceliklendirme
130
+ console.print(f" -> Enhanced Fallacy Analyzer found {len(all_findings)} potential indicators (Rules + ML Placeholder).", style="dim")
131
+ return all_findings
nlp_utils.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # nlp_utils.py (V2 - Adım 2.4 Eklentisi)
2
+ import spacy
3
+ from spacy.tokens import Doc, Span # Span eklendi
4
+ from transformers import AutoTokenizer, AutoModel
5
+ import torch
6
+ import torch.nn.functional as F # Cosine Similarity için eklendi
7
+ from rich.console import Console
8
+ from typing import List, Tuple # List, Tuple eklendi
9
+ import os
10
+
11
+ # os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
12
+
13
+ console = Console()
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ console.print(f"Using device: [bold {'green' if device.type == 'cuda' else 'yellow'}]{device}[/bold {'green' if device.type == 'cuda' else 'yellow'}]")
16
+ if device.type == 'cuda': console.print(f"CUDA Device Name: {torch.cuda.get_device_name(0)}")
17
+
18
+ # --- spaCy ---
19
+ SPACY_MODEL_NAME = "en_core_web_lg"
20
+ _nlp = None
21
+
22
+ def load_spacy_model(model_name: str = SPACY_MODEL_NAME) -> spacy.language.Language:
23
+ global _nlp
24
+ if _nlp is None:
25
+ try:
26
+ console.print(f"Loading spaCy model '{model_name}'...")
27
+ _nlp = spacy.load(model_name)
28
+ console.print(f"[green]spaCy model '{model_name}' loaded successfully (on CPU).[/green]")
29
+ except OSError:
30
+ console.print(f"[bold red]Error: spaCy model '{model_name}' not found.[/bold red]"); raise
31
+ return _nlp
32
+
33
+ def process_text_spacy(text: str) -> spacy.tokens.Doc:
34
+ spacy_nlp = load_spacy_model()
35
+ if spacy_nlp: return spacy_nlp(text)
36
+ raise RuntimeError("spaCy model could not be loaded or process failed.")
37
+
38
+ # --- Transformers (BERT) ---
39
+ BERT_MODEL_NAME = "bert-base-uncased"
40
+ _tokenizer = None
41
+ _bert_model = None
42
+
43
+ def load_bert(model_name: str = BERT_MODEL_NAME) -> tuple[AutoTokenizer, AutoModel]:
44
+ global _tokenizer, _bert_model
45
+ if _tokenizer is None:
46
+ try:
47
+ console.print(f"Loading BERT tokenizer for '{model_name}'...")
48
+ _tokenizer = AutoTokenizer.from_pretrained(model_name)
49
+ console.print(f"[green]BERT Tokenizer for '{model_name}' loaded successfully.[/green]")
50
+ except Exception as e: console.print(f"[bold red]Error loading BERT tokenizer '{model_name}': {e}[/bold red]"); raise
51
+ if _bert_model is None:
52
+ try:
53
+ console.print(f"Loading full BERT model '{model_name}'...")
54
+ _bert_model = AutoModel.from_pretrained(model_name)
55
+ _bert_model.to(device); _bert_model.eval()
56
+ console.print(f"[green]Full BERT Model '{model_name}' loaded successfully to [bold]{device}[/bold].[/green]")
57
+ except Exception as e: console.print(f"[bold red]Error loading full BERT model '{model_name}': {e}[/bold red]"); raise
58
+ return _tokenizer, _bert_model
59
+
60
+ def get_bert_embeddings(text: str, max_length: int = 512) -> tuple[torch.Tensor, torch.Tensor]:
61
+ try: tokenizer, model = load_bert()
62
+ except Exception as e: raise RuntimeError(f"Failed to load BERT model or tokenizer: {e}")
63
+ inputs = tokenizer(text, return_tensors="pt", max_length=max_length, padding=True, truncation=True)
64
+ inputs_on_device = {k: v.to(device) for k, v in inputs.items()}
65
+ with torch.no_grad():
66
+ try: outputs = model(**inputs_on_device)
67
+ except Exception as e: console.print(f"[bold red]Error during BERT inference: {e}[/bold red]"); raise RuntimeError(f"BERT inference failed: {e}")
68
+ last_hidden_state = outputs.last_hidden_state; pooler_output = outputs.pooler_output
69
+ return last_hidden_state.cpu().detach(), pooler_output.cpu().detach()
70
+
71
+ def get_sentence_embedding(sentence_text: str, strategy: str = 'mean', max_length: int = 512) -> torch.Tensor:
72
+ """ Tek bir cümlenin embedding'ini hesaplar. """
73
+ # Not: Bu hala her cümle için ayrı ayrı model çağrısı yapar, verimli değil.
74
+ # Daha iyisi: Tüm metni bir kerede işleyip, sonra cümle spanlarına göre ortalama almak.
75
+ # Şimdilik bu basit versiyonla devam edelim.
76
+ try:
77
+ last_hidden_state, _ = get_bert_embeddings(sentence_text, max_length)
78
+ if strategy == 'cls':
79
+ return last_hidden_state[0, 0, :]
80
+ elif strategy == 'mean':
81
+ tokenizer, _ = load_bert()
82
+ inputs = tokenizer(sentence_text, return_tensors="pt", max_length=max_length, padding=True, truncation=True)
83
+ attention_mask = inputs['attention_mask'].cpu()
84
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
85
+ sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
86
+ sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
87
+ mean_embeddings = sum_embeddings / sum_mask
88
+ return mean_embeddings[0]
89
+ else:
90
+ raise ValueError(f"Unknown strategy: {strategy}")
91
+ except Exception as e:
92
+ console.print(f"[yellow]Warning: Failed to get embedding for sentence '{sentence_text[:50]}...': {e}[/yellow]")
93
+ # Hata durumunda sıfır vektörü döndürelim? Veya None? None daha iyi.
94
+ return None
95
+
96
+
97
+ # --- YENİ FONKSİYON ---
98
+ def get_all_sentence_embeddings(doc: Doc, strategy: str = 'mean', max_length: int = 512) -> List[torch.Tensor | None]:
99
+ """
100
+ Bir spaCy Doc içerisindeki tüm cümlelerin BERT embedding'lerini hesaplar.
101
+ (Mevcut haliyle her cümle için ayrı model çağrısı yapar - verimsiz)
102
+ """
103
+ console.print(f" -> Calculating BERT sentence embeddings for {len(list(doc.sents))} sentences (Strategy: {strategy})...", style="dim")
104
+ embeddings = []
105
+ for sent in doc.sents:
106
+ embedding = get_sentence_embedding(sent.text, strategy=strategy, max_length=max_length)
107
+ embeddings.append(embedding)
108
+ console.print(f" -> Sentence embeddings calculation complete.", style="dim")
109
+ return embeddings
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+
3
+ # Temel NLP ve Model Altyapısı
4
+ spacy>=3.7.0,<4.0.0
5
+ transformers[torch]>=4.30.0,<5.0.0 # PyTorch tabanlı transformers
6
+ torch>=2.0.0,<3.0.0 # Veya sisteminize uygun PyTorch sürümü
7
+ numpy<2.0 # ÖNEMLİ: NumPy 2.x ile uyumsuzlukları önlemek için
8
+
9
+ # Veri Modelleme ve Doğrulama
10
+ pydantic>=2.0.0,<3.0.0
11
+
12
+ # Komut Satırı Arayüzü ve Güzel Çıktı
13
+ typer[all]>=0.9.0,<0.13.0 # 'all' ile rich gibi ekstraları da kurar
14
+ rich>=13.0.0,<14.0.0
15
+
16
+ # Retorik Analiz için Duygu Analizi
17
+ vaderSentiment>=3.3.0,<4.0.0
18
+
19
+ # Argüman Görselleştirme için Grafik Kütüphanesi
20
+ networkx>=3.0,<4.0
21
+
22
+ # İleride gerekebilecekler (kurulu olabilir veya olmayabilir)
23
+ # scikit-learn>=1.2.0,<1.5.0
24
+
25
+ # Dil modeli (spaCy için ayrıca indirilecek - komut: python -m spacy download en_core_web_lg)
26
+ # Bert modeli (transformers cache'i ile otomatik indirilecek)
rhetoric_analyzer.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # rhetoric_analyzer.py
2
+ import spacy
3
+ from spacy.tokens import Doc, Span, Token
4
+ from typing import List
5
+ import data_models # Düz yapı importu
6
+ from rich.console import Console
7
+ # VADER'ı import et
8
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
9
+
10
+ console = Console()
11
+ # VADER duygu analizcisini başlat
12
+ analyzer = SentimentIntensityAnalyzer()
13
+
14
+ # Basit Retorik İpuçları
15
+
16
+ # Güçlü Duygu Eşiği (VADER compound score için)
17
+ # -1 (çok negatif) ile +1 (çok pozitif) arasında.
18
+ # |score| > 0.5 genellikle belirgin bir duygu ifade eder.
19
+ STRONG_SENTIMENT_THRESHOLD = 0.5
20
+
21
+ # Superlative'ler (En üstünlük belirten sıfatlar/zarflar)
22
+ # spaCy'nin POS tag'lerini kullanabiliriz: 'JJS' (superlative adjective), 'RBS' (superlative adverb)
23
+ SUPERLATIVE_TAGS = {"JJS", "RBS"}
24
+
25
+ # Retorik Sorular (Basit: Soru işareti var mı?)
26
+ # Daha gelişmiş analiz gerekir ama başlangıç için bu yeterli.
27
+
28
+ def analyze_sentence_sentiment(sent: Span) -> List[data_models.Finding]:
29
+ """VADER kullanarak cümlenin duygu skorunu analiz eder ve güçlü duyguları bulur."""
30
+ findings = []
31
+ # VADER'ın polarity_scores fonksiyonu bir dict döndürür: {'neg', 'neu', 'pos', 'compound'}
32
+ vs = analyzer.polarity_scores(sent.text)
33
+ compound_score = vs['compound']
34
+
35
+ description = None
36
+ severity = "Low" # Varsayılan
37
+
38
+ if compound_score >= STRONG_SENTIMENT_THRESHOLD:
39
+ description = f"Sentence potentially expresses strong positive sentiment (VADER score: {compound_score:.2f})."
40
+ severity = "Medium"
41
+ elif compound_score <= -STRONG_SENTIMENT_THRESHOLD:
42
+ description = f"Sentence potentially expresses strong negative sentiment (VADER score: {compound_score:.2f})."
43
+ severity = "Medium"
44
+
45
+ if description:
46
+ findings.append(data_models.Finding(
47
+ finding_type="RhetoricalDevice",
48
+ description=description,
49
+ severity=severity,
50
+ span_start=sent.start_char,
51
+ span_end=sent.end_char,
52
+ details={"device_type": "Strong Sentiment", "vader_score": vs}
53
+ ))
54
+ return findings
55
+
56
+ def detect_superlatives(sent: Span) -> List[data_models.Finding]:
57
+ """Cümlede superlative (en üstünlük) ifadeleri arar."""
58
+ findings = []
59
+ superlative_words = []
60
+ for token in sent:
61
+ if token.tag_ in SUPERLATIVE_TAGS:
62
+ superlative_words.append(token.text)
63
+
64
+ if superlative_words:
65
+ findings.append(data_models.Finding(
66
+ finding_type="RhetoricalDevice",
67
+ description=f"Use of superlative(s) detected: {', '.join(superlative_words)}.",
68
+ severity="Low", # Tek başına zayıf bir gösterge
69
+ span_start=sent.start_char,
70
+ span_end=sent.end_char,
71
+ details={"device_type": "Superlative", "words": superlative_words}
72
+ ))
73
+ return findings
74
+
75
+ def detect_rhetorical_questions(sent: Span) -> List[data_models.Finding]:
76
+ """Cümlede soru işareti olup olmadığını kontrol eder (çok basit)."""
77
+ findings = []
78
+ if sent.text.strip().endswith("?"):
79
+ findings.append(data_models.Finding(
80
+ finding_type="RhetoricalDevice",
81
+ description="Sentence ends with a question mark (potential rhetorical question).",
82
+ severity="Low", # Bağlam olmadan bilmek zor
83
+ span_start=sent.start_char,
84
+ span_end=sent.end_char,
85
+ details={"device_type": "Potential Question"}
86
+ ))
87
+ return findings
88
+
89
+ def simple_rhetoric_analyzer(doc: Doc) -> List[data_models.Finding]:
90
+ """
91
+ Metindeki cümleleri basit kurallarla analiz ederek potansiyel retorik araçları bulur.
92
+ """
93
+ all_findings = []
94
+ for sent in doc.sents:
95
+ # Her cümle için retorik kontrollerini çalıştır
96
+ all_findings.extend(analyze_sentence_sentiment(sent))
97
+ all_findings.extend(detect_superlatives(sent))
98
+ all_findings.extend(detect_rhetorical_questions(sent))
99
+ # Buraya diğer retorik fonksiyon çağrıları eklenebilir
100
+
101
+ console.print(f" -> Simple Rhetoric Analyzer found {len(all_findings)} potential rhetorical indicators.", style="dim")
102
+ return all_findings
synthesis_engine.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # synthesis_engine.py (V2 - Adım 2.4 Revizyonu: Kanıt Değerlendirmesi Kaldırıldı)
2
+ from typing import List, Dict, Tuple
3
+ import data_models
4
+ from rich.console import Console
5
+
6
+ console = Console()
7
+
8
+ # Eşik Değerleri (Kanıt kaldırıldı)
9
+ FALLACY_THRESHOLD_LOW = 1
10
+ FALLACY_THRESHOLD_MEDIUM = 0
11
+ # EVIDENCE_LACK_THRESHOLD_WEAK = 0.5 # Kaldırıldı
12
+ # EVIDENCE_LACK_THRESHOLD_MODERATE = 0.1 # Kaldırıldı
13
+ RHETORIC_THRESHOLD_HIGH = 5
14
+ RHETORIC_THRESHOLD_MEDIUM = 2
15
+
16
+ def generate_summary_ratings(
17
+ components: List[data_models.ArgumentComponent],
18
+ findings: List[data_models.Finding]
19
+ ) -> Dict[str, str]:
20
+ """
21
+ Bulunan bileşenlere ve bulgulara göre basit özet değerlendirmeler üretir.
22
+ (Kanıt değerlendirmesi kaldırıldı).
23
+ """
24
+ summary = {}
25
+
26
+ # --- Mantıksal Sağlamlık ---
27
+ fallacies = [f for f in findings if f.finding_type == "Fallacy"]
28
+ num_fallacies = len(fallacies)
29
+ # TODO: ML modelinden gelen güven skorları da değerlendirmeye katılabilir
30
+ if num_fallacies >= FALLACY_THRESHOLD_LOW:
31
+ summary["Logical Soundness"] = "Low (Potential fallacies detected)"
32
+ elif num_fallacies == FALLACY_THRESHOLD_MEDIUM:
33
+ summary["Logical Soundness"] = "Medium (No obvious fallacies detected by current rules/model)"
34
+ else: # Bu durum ML placeholder'da zor ama teorik olarak
35
+ summary["Logical Soundness"] = "High (Potentially sound)"
36
+
37
+
38
+ # --- Kanıtsal Dayanak ---
39
+ # BU BÖLÜM KALDIRILDI - Güvenilir analiz yapılamadığı için yorumda bırakıldı veya silindi.
40
+ # claims = [c for c in components if c.component_type == "Claim"]
41
+ # num_claims = len(claims)
42
+ # claims_lacking_evidence = sum(1 for f in findings if f.finding_type == "EvidenceStatus")
43
+ # if num_claims == 0:
44
+ # summary["Evidential Basis"] = "N/A (No claims identified)"
45
+ # elif claims_lacking_evidence == num_claims:
46
+ # summary["Evidential Basis"] = "Weak (All identified claims lack evidence indicators)"
47
+ # # ... (diğer eski kontroller) ...
48
+ summary["Evidential Basis"] = "Not Evaluated" # Geçici olarak devre dışı bırakıldı
49
+
50
+
51
+ # --- Retorik Bütünlük ---
52
+ rhetorical_findings = [f for f in findings if f.finding_type == "RhetoricalDevice"]
53
+ num_rhetoric = len(rhetorical_findings)
54
+ if num_rhetoric >= RHETORIC_THRESHOLD_HIGH:
55
+ summary["Rhetorical Clarity"] = "Questionable (High use of rhetorical devices detected)"
56
+ elif num_rhetoric >= RHETORIC_THRESHOLD_MEDIUM:
57
+ summary["Rhetorical Clarity"] = "Mixed (Some rhetorical devices detected)"
58
+ else:
59
+ summary["Rhetorical Clarity"] = "Appears Clear (Few rhetorical devices detected)"
60
+
61
+
62
+ console.print(f" -> Synthesis engine generated summary ratings (Evidence analysis excluded).", style="dim")
63
+ return summary