Update README.md
Browse files
README.md
CHANGED
@@ -31,30 +31,22 @@ language:
|
|
31 |
</table>
|
32 |
|
33 |
```python
|
34 |
-
>>> from transformers import
|
35 |
-
>>>
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
```
|
38 |
|
39 |
```python
|
40 |
-
>>>
|
41 |
-
>>> from torch import LongTensor, no_grad
|
42 |
-
>>> from scipy import spatial
|
43 |
-
>>> tokenizer = AutoTokenizer.from_pretrained('te-sla/teslaXLM')
|
44 |
-
>>> model = AutoModelForMaskedLM.from_pretrained('te-sla/teslaXLM', output_hidden_states=True)
|
45 |
-
>>> x = " pas"
|
46 |
-
>>> y = " mačka"
|
47 |
-
>>> z = " svemir"
|
48 |
-
>>> tensor_x = LongTensor(tokenizer.encode(x, add_special_tokens=False)).unsqueeze(0)
|
49 |
-
>>> tensor_y = LongTensor(tokenizer.encode(y, add_special_tokens=False)).unsqueeze(0)
|
50 |
-
>>> tensor_z = LongTensor(tokenizer.encode(z, add_special_tokens=False)).unsqueeze(0)
|
51 |
-
>>> model.eval()
|
52 |
-
>>> with no_grad():
|
53 |
-
>>> vektor_x = model(input_ids=tensor_x).hidden_states[-1].squeeze()
|
54 |
-
>>> vektor_y = model(input_ids=tensor_y).hidden_states[-1].squeeze()
|
55 |
-
>>> vektor_z = model(input_ids=tensor_z).hidden_states[-1].squeeze()
|
56 |
-
>>> print(spatial.distance.cosine(vektor_x, vektor_y))
|
57 |
-
>>> print(spatial.distance.cosine(vektor_x, vektor_z))
|
58 |
```
|
59 |
|
60 |
<div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px">
|
@@ -99,18 +91,19 @@ language:
|
|
99 |
</div>
|
100 |
|
101 |
## Cit.
|
102 |
-
|
103 |
```bibtex
|
104 |
-
@inproceedings{
|
105 |
-
author = {Mihailo
|
106 |
-
title = {
|
107 |
booktitle = {ARTIFICAL INTELLIGENCE CONFERENCE},
|
108 |
-
year = {
|
109 |
address = {Belgrade}
|
110 |
publisher = {SASA, Belgrade},
|
111 |
url = {}
|
112 |
}
|
113 |
```
|
|
|
114 |
<br/>
|
115 |
<div id="zastava">
|
116 |
<div class="grb">
|
|
|
31 |
</table>
|
32 |
|
33 |
```python
|
34 |
+
>>> from transformers import T5ForConditionalGeneration, T5TokenizerFast
|
35 |
+
>>> import torch
|
36 |
+
|
37 |
+
>>> model = T5ForConditionalGeneration.from_pretrained("te-sla/pilot5")
|
38 |
+
>>> tokenizer = T5TokenizerFast.from_pretrained("te-sla/pilot5")
|
39 |
+
>>> text = "ova sekcija sadrži ideje za prioritetne pravce/teme razvoja jezičkih tehnologija (NLP) za srpski jezik. Alternativni pravci razvoja su ukratko pobrojani u odeljku H2."
|
40 |
+
>>> input = t(text, return_tensors="pt")
|
41 |
+
|
42 |
+
>>> with torch.no_grad():
|
43 |
+
>>> output = model.generate(input_ids=input["input_ids"], attention_mask=input["attention_mask"], do_sample=False)
|
44 |
+
>>> decoded_output = t.decode(output[0], skip_special_tokens=True)
|
45 |
+
>>> print(decoded_output)
|
46 |
```
|
47 |
|
48 |
```python
|
49 |
+
>>> ova sekcija sadrži ideje za prioritetne pravce/teme razvoja jezičkih tehnologija (NLP) za srpski jezik. Alternativni pravci razvoja su ukratko pobrojani u odeljku H2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
```
|
51 |
|
52 |
<div class="inline-flex flex-col" style="line-height: 1.5;padding-right:50px">
|
|
|
91 |
</div>
|
92 |
|
93 |
## Cit.
|
94 |
+
<!--div>
|
95 |
```bibtex
|
96 |
+
@inproceedings{skorict5,
|
97 |
+
author = {Mihailo Škorić},
|
98 |
+
title = {Pilot Text to Text Transfer Transformer Model for Serbian Language},
|
99 |
booktitle = {ARTIFICAL INTELLIGENCE CONFERENCE},
|
100 |
+
year = {2025},
|
101 |
address = {Belgrade}
|
102 |
publisher = {SASA, Belgrade},
|
103 |
url = {}
|
104 |
}
|
105 |
```
|
106 |
+
</div-->
|
107 |
<br/>
|
108 |
<div id="zastava">
|
109 |
<div class="grb">
|