|
|
--- |
|
|
license: mit |
|
|
pipeline_tag: text-classification |
|
|
library_name: transformers |
|
|
tags: |
|
|
- TEXT |
|
|
- MODEL |
|
|
- text-classification |
|
|
- ai-detection |
|
|
- xlm-roberta |
|
|
- multilingual |
|
|
- ext-classification |
|
|
- human-vs-ai |
|
|
--- |
|
|
|
|
|
# Text Detector |
|
|
|
|
|
## π§ Model Description |
|
|
This model is designed to detect whether a text is AI-generated or human-written. |
|
|
It uses **XLM-RoBERTa** architecture for accurate **multilingual text classification**. |
|
|
|
|
|
--- |
|
|
|
|
|
## π Model Usage |
|
|
|
|
|
### π Python Usage |
|
|
```python |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
import torch |
|
|
|
|
|
# Load model and tokenizer |
|
|
tokenizer = AutoTokenizer.from_pretrained("yaya36095/text-detector") |
|
|
model = AutoModelForSequenceClassification.from_pretrained("yaya36095/text-detector") |
|
|
|
|
|
def detect_text(text): |
|
|
# Tokenize input |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) |
|
|
|
|
|
# Get prediction |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
|
|
|
# Process results |
|
|
scores = predictions[0].tolist() |
|
|
results = [ |
|
|
{"label": "HUMAN", "score": scores[0]}, |
|
|
{"label": "AI", "score": scores[1]} |
|
|
] |
|
|
|
|
|
return { |
|
|
"prediction": results[0]["label"], |
|
|
"confidence": f"{results[0]['score']*100:.2f}%", |
|
|
"detailed_scores": [ |
|
|
f"{r['label']}: {r['score']*100:.2f}%" for r in results |
|
|
] |
|
|
} |