Upload inference.py
Browse files- inference.py +18 -0
inference.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# inference.py
|
2 |
+
from tokenizers import Tokenizer
|
3 |
+
from typing import List, Dict
|
4 |
+
|
5 |
+
# 加载 tokenizer
|
6 |
+
tokenizer = Tokenizer.from_pretrained(".") # 从当前目录加载
|
7 |
+
|
8 |
+
def tokenize(text: str) -> Dict[str, List[str]]:
|
9 |
+
"""接收文本,返回分词结果"""
|
10 |
+
encoded = tokenizer.encode(text)
|
11 |
+
return {
|
12 |
+
"tokens": encoded.tokens,
|
13 |
+
"ids": encoded.ids
|
14 |
+
}
|
15 |
+
|
16 |
+
# 测试示例(可选)
|
17 |
+
if __name__ == "__main__":
|
18 |
+
print(tokenize("Hello, this is a test."))
|