Update README for small ONNX model
Browse files
README.md
CHANGED
|
@@ -51,32 +51,21 @@ a fast character-based sentence and paragraph boundary detection system optimize
|
|
| 51 |
> **Security Advantage:** This ONNX model format provides enhanced security compared to SKOPS models, as it doesn't require bypassing security measures with `trust_model=True`. ONNX models are the recommended option for security-sensitive environments.
|
| 52 |
|
| 53 |
```python
|
| 54 |
-
from
|
| 55 |
-
from charboundary import TextSegmenter
|
| 56 |
-
from charboundary.onnx_support import enable_onnx
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
# Download the compressed model
|
| 62 |
-
model_path = hf_hub_download(repo_id="alea-institute/charboundary-small-onnx",
|
| 63 |
-
filename="model.onnx.xz")
|
| 64 |
-
|
| 65 |
-
# Load the model (handles .xz compression automatically)
|
| 66 |
-
segmenter = TextSegmenter.load(model_path)
|
| 67 |
|
| 68 |
# Use the model
|
| 69 |
text = "This is a test sentence. Here's another one!"
|
| 70 |
sentences = segmenter.segment_to_sentences(text)
|
| 71 |
print(sentences)
|
|
|
|
| 72 |
|
| 73 |
-
# Segment to
|
| 74 |
-
|
| 75 |
-
print(
|
| 76 |
-
|
| 77 |
-
# Get character-level spans
|
| 78 |
-
sentence_spans = segmenter.segment_to_sentence_spans(text)
|
| 79 |
-
print(sentence_spans) # [(0, 24), (25, 42)]
|
| 80 |
```
|
| 81 |
|
| 82 |
## Performance
|
|
|
|
| 51 |
> **Security Advantage:** This ONNX model format provides enhanced security compared to SKOPS models, as it doesn't require bypassing security measures with `trust_model=True`. ONNX models are the recommended option for security-sensitive environments.
|
| 52 |
|
| 53 |
```python
|
| 54 |
+
from charboundary import get_small_onnx_segmenter
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
# First load can be slow
|
| 57 |
+
segmenter = get_small_onnx_segmenter()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Use the model
|
| 60 |
text = "This is a test sentence. Here's another one!"
|
| 61 |
sentences = segmenter.segment_to_sentences(text)
|
| 62 |
print(sentences)
|
| 63 |
+
# Output: ['This is a test sentence.', " Here's another one!"]
|
| 64 |
|
| 65 |
+
# Segment to spans
|
| 66 |
+
sentence_spans = segmenter.get_sentence_spans(text)
|
| 67 |
+
print(sentence_spans)
|
| 68 |
+
# Output: [(0, 24), (24, 44)]
|
|
|
|
|
|
|
|
|
|
| 69 |
```
|
| 70 |
|
| 71 |
## Performance
|