|
import gradio as gr |
|
from docx import Document |
|
import io |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws") |
|
|
|
def paraphrase_text(text): |
|
input_text = f"paraphrase: {text} </s>" |
|
input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True) |
|
output_ids = model.generate(input_ids, max_length=256, do_sample=True, top_k=120, top_p=0.95, temperature=1.5) |
|
return tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
|
|
def chunk_text(text, max_sentences=4): |
|
import re |
|
sentences = re.split(r'(?<=[.!?]) +', text.strip()) |
|
return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)] |
|
|
|
def full_article_paraphrase(text): |
|
chunks = chunk_text(text) |
|
return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip()) |
|
|
|
def extract_text_from_docx(file_obj): |
|
file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj |
|
doc = Document(io.BytesIO(file_bytes)) |
|
return "\n".join([para.text for para in doc.paragraphs if para.text.strip()]) |
|
|
|
def save_docx(text): |
|
doc = Document() |
|
for para in text.split("\n\n"): |
|
doc.add_paragraph(para.strip()) |
|
filepath = "/tmp/paraphrased_output.docx" |
|
doc.save(filepath) |
|
return filepath |
|
|
|
|
|
def get_ai_score(text): |
|
return "AI Detection Score: Likely Human" if len(text) > 100 else "AI Detection Score: Short Text" |
|
|
|
def full_pipeline(input_text=None, file=None): |
|
if file is not None: |
|
input_text = extract_text_from_docx(file) |
|
if not input_text or len(input_text.strip()) < 10: |
|
return "Please enter or upload valid text.", None, "No text to analyze." |
|
result = full_article_paraphrase(input_text) |
|
docx_file = save_docx(result) |
|
ai_score = get_ai_score(result) |
|
return result, docx_file, ai_score |
|
|
|
demo = gr.Interface( |
|
fn=full_pipeline, |
|
inputs=[ |
|
gr.Textbox(label="Paste Text (optional)", lines=20, placeholder="Or upload a .docx file below..."), |
|
gr.File(label="Upload .docx File (optional)", file_types=[".docx"]) |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Paraphrased Output"), |
|
gr.File(label="Download .docx File"), |
|
gr.Textbox(label="AI Detection Score") |
|
], |
|
title="Smart Paraphraser", |
|
description="Paste or upload your article. Get paraphrased output, download as .docx, and see an AI detection score." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|