Safetensors
Serbian
t5
procesaur commited on
Commit
d5be79d
·
verified ·
1 Parent(s): f2b2899

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -26
README.md CHANGED
@@ -31,30 +31,22 @@ language:
31
  </table>
32
 
33
  ```python
34
- >>> from transformers import pipeline
35
- >>> unmasker = pipeline('fill-mask', model='te-sla/teslaXLM')
36
- >>> unmasker("Kada bi čovek znao gde će pasti on bi<mask>.")
 
 
 
 
 
 
 
 
 
37
  ```
38
 
39
  ```python
40
- >>> from transformers import AutoTokenizer, AutoModelForMaskedLM
41
- >>> from torch import LongTensor, no_grad
42
- >>> from scipy import spatial
43
- >>> tokenizer = AutoTokenizer.from_pretrained('te-sla/teslaXLM')
44
- >>> model = AutoModelForMaskedLM.from_pretrained('te-sla/teslaXLM', output_hidden_states=True)
45
- >>> x = " pas"
46
- >>> y = " mačka"
47
- >>> z = " svemir"
48
- >>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
49
- >>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
50
- >>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
51
- >>> model.eval()
52
- >>> with no_grad():
53
- >>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
54
- >>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
55
- >>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
56
- >>> print(spatial.distance.cosine(vektor_x, vektor_y))
57
- >>> print(spatial.distance.cosine(vektor_x, vektor_z))
58
  ```
59
 
60
  <div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px">
@@ -99,18 +91,19 @@ language:
99
  </div>
100
 
101
  ## Cit.
102
-
103
  ```bibtex
104
- @inproceedings{skoricxlm,
105
- author = {Mihailo Škorić, Saša Petalinkar},
106
- title = {New XLM-R-based language models for Serbian and Serbo-Croatian},
107
  booktitle = {ARTIFICAL INTELLIGENCE CONFERENCE},
108
- year = {2024},
109
  address = {Belgrade}
110
  publisher = {SASA, Belgrade},
111
  url = {}
112
  }
113
  ```
 
114
  <br/>
115
  <div id="zastava">
116
  <div class="grb">
 
31
  </table>
32
 
33
  ```python
34
+ >>> from transformers import T5ForConditionalGeneration, T5TokenizerFast
35
+ >>> import torch
36
+
37
+ >>> model = T5ForConditionalGeneration.from_pretrained("te-sla/pilot5")
38
+ >>> tokenizer = T5TokenizerFast.from_pretrained("te-sla/pilot5")
39
+ >>> text = "ova sekcija sadrži ideje za prioritetne pravce/teme razvoja jezičkih tehnologija (NLP) za srpski jezik. Alternativni pravci razvoja su ukratko pobrojani u odeljku H2."
40
+ >>> input = t(text, return_tensors="pt")
41
+
42
+ >>> with torch.no_grad():
43
+ >>> output = model.generate(input_ids=input["input_ids"], attention_mask=input["attention_mask"], do_sample=False)
44
+ >>> decoded_output = t.decode(output[0], skip_special_tokens=True)
45
+ >>> print(decoded_output)
46
  ```
47
 
48
  ```python
49
+ >>> ova sekcija sadrži ideje za prioritetne pravce/teme razvoja jezičkih tehnologija (NLP) za srpski jezik. Alternativni pravci razvoja su ukratko pobrojani u odeljku H2.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ```
51
 
52
  <div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px">
 
91
  </div>
92
 
93
  ## Cit.
94
+ <!--div>
95
  ```bibtex
96
+ @inproceedings{skorict5,
97
+ author = {Mihailo Škorić},
98
+ title = {Pilot Text to Text Transfer Transformer Model for Serbian Language},
99
  booktitle = {ARTIFICAL INTELLIGENCE CONFERENCE},
100
+ year = {2025},
101
  address = {Belgrade}
102
  publisher = {SASA, Belgrade},
103
  url = {}
104
  }
105
  ```
106
+ </div-->
107
  <br/>
108
  <div id="zastava">
109
  <div class="grb">