xMorion commited on
Commit
5aebb45
verified
1 Parent(s): 69e8269

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -23
app.py CHANGED
@@ -1,26 +1,63 @@
1
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
2
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # Cargar el tokenizer y el modelo desde el repositorio del modelo preentrenado
5
- MODEL = 'cardiffnlp/twitter-roberta-base-sentiment'
6
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
7
 
8
- # Crear el pipeline de resumen utilizando los objetos cargados
9
- resumidor = pipeline("summarization", model=MODEL, tokenizer=tokenizer)
10
-
11
- def generar_resumen(texto):
12
- # Generar el resumen usando el pipeline
13
- resumen = resumidor(texto, max_length=130, min_length=30, do_sample=False)
14
- return resumen[0]["summary_text"]
15
-
16
- # Crear la interfaz de Gradio
17
- demo = gr.Interface(
18
- fn=generar_resumen,
19
- inputs=gr.Textbox(lines=10, placeholder="Pega aqu铆 un texto largo en espa帽ol..."),
20
- outputs="text",
21
- title="Generaci贸n de Res煤menes Autom谩ticos",
22
- description="Introduce un texto largo en espa帽ol y obt茅n un resumen generado autom谩ticamente por un modelo preentrenado."
23
- )
24
-
25
- # Lanzar la aplicaci贸n
26
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification
2
+ from transformers import TFAutoModelForSequenceClassification
3
+ from transformers import AutoTokenizer
4
+ import numpy as np
5
+ from scipy.special import softmax
6
+ import csv
7
+ import urllib.request
8
+
9
+ # Preprocess text (username and link placeholders)
10
+ def preprocess(text):
11
+ new_text = []
12
+
13
+
14
+ for t in text.split(" "):
15
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
16
+ t = 'http' if t.startswith('http') else t
17
+ new_text.append(t)
18
+ return " ".join(new_text)
19
+
20
+ # Tasks:
21
+ # emoji, emotion, hate, irony, offensive, sentiment
22
+ # stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
23
+
24
+ task='sentiment'
25
+ MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
26
 
 
 
27
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
28
 
29
+ # download label mapping
30
+ labels=[]
31
+ mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
32
+ with urllib.request.urlopen(mapping_link) as f:
33
+ html = f.read().decode('utf-8').split("\n")
34
+ csvreader = csv.reader(html, delimiter='\t')
35
+ labels = [row[1] for row in csvreader if len(row) > 1]
36
+
37
+ # PT
38
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
39
+ model.save_pretrained(MODEL)
40
+
41
+ text = "Good night 馃槉"
42
+ text = preprocess(text)
43
+ encoded_input = tokenizer(text, return_tensors='pt')
44
+ output = model(**encoded_input)
45
+ scores = output[0][0].detach().numpy()
46
+ scores = softmax(scores)
47
+
48
+ # # TF
49
+ # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
50
+ # model.save_pretrained(MODEL)
51
+
52
+ # text = "Good night 馃槉"
53
+ # encoded_input = tokenizer(text, return_tensors='tf')
54
+ # output = model(encoded_input)
55
+ # scores = output[0][0].numpy()
56
+ # scores = softmax(scores)
57
+
58
+ ranking = np.argsort(scores)
59
+ ranking = ranking[::-1]
60
+ for i in range(scores.shape[0]):
61
+ l = labels[ranking[i]]
62
+ s = scores[ranking[i]]
63
+ print(f"{i+1}) {l} {np.round(float(s), 4)}")