Spaces:
Runtime error
Runtime error
Commit
·
76e1a38
1
Parent(s):
b97e015
updated README
Browse files- README.md +39 -4
- perplexity.py +6 -6
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -29,11 +29,46 @@ It is defined as the exponentiated average negative log-likelihood of a sequence
|
|
| 29 |
For more information, see https://huggingface.co/docs/transformers/perplexity
|
| 30 |
|
| 31 |
## How to Use
|
| 32 |
-
At minimum, this metric requires the model and
|
| 33 |
```python
|
| 34 |
-
>>>
|
|
|
|
| 35 |
>>> input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue"]
|
| 36 |
-
>>> results = perplexity.compute(model='distilgpt2',
|
| 37 |
>>> print(results)
|
| 38 |
{'accuracy': 1.0}
|
| 39 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
For more information, see https://huggingface.co/docs/transformers/perplexity
|
| 30 |
|
| 31 |
## How to Use
|
| 32 |
+
At minimum, this metric requires the model and data as inputs.
|
| 33 |
```python
|
| 34 |
+
>>> import evaluate
|
| 35 |
+
>>> perplexity = evaluate.load("perplexity", module_type="metric")
|
| 36 |
>>> input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue"]
|
| 37 |
+
>>> results = perplexity.compute(model='distilgpt2',data=input_texts)
|
| 38 |
>>> print(results)
|
| 39 |
{'accuracy': 1.0}
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
### Inputs
|
| 43 |
+
- **model** (`Union`[`str`,`AutoModelForCausalLM`]): model used for calculating Perplexity
|
| 44 |
+
- **data** (`list` of `str`): input text, each separate text snippet is one list entry.
|
| 45 |
+
- **device** (`str`): device to run on, defaults to 'cuda' when available.
|
| 46 |
+
- **max_length** (`int`): maximum sequence length, defaults to 2048.
|
| 47 |
+
|
| 48 |
+
### Output Values
|
| 49 |
+
- **loss** (`float`): the loss of the model predictions compared to the reference
|
| 50 |
+
- **perplexity**(`float`): measures the uncertainty of a model predicting text. Model performance is better when perplexity is lower.
|
| 51 |
+
|
| 52 |
+
Output Example(s):
|
| 53 |
+
```python
|
| 54 |
+
{'accuracy': 1.0}
|
| 55 |
+
```
|
| 56 |
+
This metric outputs a dictionary, containing the loss and perplexity score.
|
| 57 |
+
|
| 58 |
+
### Examples
|
| 59 |
+
```python
|
| 60 |
+
>>> import evaluate
|
| 61 |
+
>>> from datasets import load_dataset
|
| 62 |
+
>>> perplexity = evaluate.load("d-matrix/perplexity", module_type="metric")
|
| 63 |
+
>>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10]
|
| 64 |
+
>>> results = perplexity.compute(model='distilgpt2',data=input_texts)
|
| 65 |
+
>>> print(list(results.keys()))
|
| 66 |
+
['loss', 'perplexity']
|
| 67 |
+
>>> print(results['loss'])
|
| 68 |
+
3.8299286365509033
|
| 69 |
+
>>> print(results['perplexity'])
|
| 70 |
+
46.05925369262695
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## Citation(s)
|
| 74 |
+
https://huggingface.co/docs/transformers/perplexity
|
perplexity.py
CHANGED
|
@@ -20,7 +20,7 @@ Args:
|
|
| 20 |
causal versions of t5, and more (the full list can be found
|
| 21 |
in the AutoModelForCausalLM documentation here:
|
| 22 |
https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForCausalLM )
|
| 23 |
-
|
| 24 |
device (str): device to run on, defaults to 'cuda' when available.
|
| 25 |
max_length (int): maximum sequence length, defaults to 2048.
|
| 26 |
Returns:
|
|
@@ -31,7 +31,7 @@ Examples:
|
|
| 31 |
>>> perplexity = evaluate.load("dmx_perplexity", module_type="metric")
|
| 32 |
>>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10] # doctest: +SKIP
|
| 33 |
>>> results = perplexity.compute(model='distilgpt2',
|
| 34 |
-
...
|
| 35 |
>>> print(list(results.keys()))
|
| 36 |
['loss', 'perplexity']
|
| 37 |
>>> print(results['loss']) # doctest: +SKIP
|
|
@@ -40,8 +40,8 @@ Examples:
|
|
| 40 |
46.05925369262695
|
| 41 |
"""
|
| 42 |
|
| 43 |
-
|
| 44 |
-
class
|
| 45 |
def _info(self):
|
| 46 |
return evaluate.MetricInfo(
|
| 47 |
module_type="metric",
|
|
@@ -58,7 +58,7 @@ class DmxPerplexity(evaluate.Metric):
|
|
| 58 |
|
| 59 |
def _compute(
|
| 60 |
self,
|
| 61 |
-
|
| 62 |
model: Union[str, AutoModelForCausalLM],
|
| 63 |
device=None,
|
| 64 |
max_length=None,
|
|
@@ -91,7 +91,7 @@ class DmxPerplexity(evaluate.Metric):
|
|
| 91 |
max_seq_len = 2048
|
| 92 |
|
| 93 |
model = model.to(device)
|
| 94 |
-
encodings = tokenizer("\n\n".join(
|
| 95 |
|
| 96 |
stride = max_seq_len
|
| 97 |
seq_len = encodings.input_ids.size(1)
|
|
|
|
| 20 |
causal versions of t5, and more (the full list can be found
|
| 21 |
in the AutoModelForCausalLM documentation here:
|
| 22 |
https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForCausalLM )
|
| 23 |
+
data (list of str): input text, each separate text snippet is one list entry.
|
| 24 |
device (str): device to run on, defaults to 'cuda' when available.
|
| 25 |
max_length (int): maximum sequence length, defaults to 2048.
|
| 26 |
Returns:
|
|
|
|
| 31 |
>>> perplexity = evaluate.load("dmx_perplexity", module_type="metric")
|
| 32 |
>>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10] # doctest: +SKIP
|
| 33 |
>>> results = perplexity.compute(model='distilgpt2',
|
| 34 |
+
... data=input_texts)
|
| 35 |
>>> print(list(results.keys()))
|
| 36 |
['loss', 'perplexity']
|
| 37 |
>>> print(results['loss']) # doctest: +SKIP
|
|
|
|
| 40 |
46.05925369262695
|
| 41 |
"""
|
| 42 |
|
| 43 |
+
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
| 44 |
+
class Perplexity(evaluate.Metric):
|
| 45 |
def _info(self):
|
| 46 |
return evaluate.MetricInfo(
|
| 47 |
module_type="metric",
|
|
|
|
| 58 |
|
| 59 |
def _compute(
|
| 60 |
self,
|
| 61 |
+
data,
|
| 62 |
model: Union[str, AutoModelForCausalLM],
|
| 63 |
device=None,
|
| 64 |
max_length=None,
|
|
|
|
| 91 |
max_seq_len = 2048
|
| 92 |
|
| 93 |
model = model.to(device)
|
| 94 |
+
encodings = tokenizer("\n\n".join(data), return_tensors="pt")
|
| 95 |
|
| 96 |
stride = max_seq_len
|
| 97 |
seq_len = encodings.input_ids.size(1)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
evaluate
|
| 2 |
transformers
|
| 3 |
torch
|
|
|
|
| 1 |
+
git+https://github.com/huggingface/evaluate@main
|
| 2 |
evaluate
|
| 3 |
transformers
|
| 4 |
torch
|