Update summarize.py
Browse files- summarize.py +5 -3
summarize.py
CHANGED
|
@@ -63,8 +63,8 @@ def summarize_and_score(ids, mask, model, tokenizer, **kwargs):
|
|
| 63 |
remove_invalid_values=True,
|
| 64 |
)
|
| 65 |
score = round(summary_pred_ids.sequences_scores.cpu().numpy()[0], 4)
|
| 66 |
-
|
| 67 |
-
return summary, score
|
| 68 |
|
| 69 |
|
| 70 |
def summarize_via_tokenbatches(
|
|
@@ -111,7 +111,7 @@ def summarize_via_tokenbatches(
|
|
| 111 |
|
| 112 |
for _id, _mask in zip(in_id_arr, att_arr):
|
| 113 |
|
| 114 |
-
result, score = summarize_and_score(
|
| 115 |
ids=_id,
|
| 116 |
mask=_mask,
|
| 117 |
model=model,
|
|
@@ -119,10 +119,12 @@ def summarize_via_tokenbatches(
|
|
| 119 |
**kwargs,
|
| 120 |
)
|
| 121 |
score = round(float(score), 4)
|
|
|
|
| 122 |
_sum = {
|
| 123 |
"input_tokens": _id,
|
| 124 |
"summary": result,
|
| 125 |
"summary_score": score,
|
|
|
|
| 126 |
}
|
| 127 |
gen_summaries.append(_sum)
|
| 128 |
print(f"\t{result[0]}\nScore:\t{score}")
|
|
|
|
| 63 |
remove_invalid_values=True,
|
| 64 |
)
|
| 65 |
score = round(summary_pred_ids.sequences_scores.cpu().numpy()[0], 4)
|
| 66 |
+
len_res = len(summary_pred_ids.sequences.cpu().numpy()[0])
|
| 67 |
+
return summary, score, len_res
|
| 68 |
|
| 69 |
|
| 70 |
def summarize_via_tokenbatches(
|
|
|
|
| 111 |
|
| 112 |
for _id, _mask in zip(in_id_arr, att_arr):
|
| 113 |
|
| 114 |
+
result, score, l = summarize_and_score(
|
| 115 |
ids=_id,
|
| 116 |
mask=_mask,
|
| 117 |
model=model,
|
|
|
|
| 119 |
**kwargs,
|
| 120 |
)
|
| 121 |
score = round(float(score), 4)
|
| 122 |
+
rate = round(float(len(_id)-l)/len(_id)),3)
|
| 123 |
_sum = {
|
| 124 |
"input_tokens": _id,
|
| 125 |
"summary": result,
|
| 126 |
"summary_score": score,
|
| 127 |
+
"compression_rate": rate,
|
| 128 |
}
|
| 129 |
gen_summaries.append(_sum)
|
| 130 |
print(f"\t{result[0]}\nScore:\t{score}")
|