Max1798 commited on
Commit
a988268
·
verified ·
1 Parent(s): 96ad625

Upload inference.py

Browse files
Files changed (1) hide show
  1. inference.py +18 -0
inference.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # inference.py
2
+ from tokenizers import Tokenizer
3
+ from typing import List, Dict
4
+
5
+ # 加载 tokenizer
6
+ tokenizer = Tokenizer.from_pretrained(".") # 从当前目录加载
7
+
8
+ def tokenize(text: str) -> Dict[str, List[str]]:
9
+ """接收文本,返回分词结果"""
10
+ encoded = tokenizer.encode(text)
11
+ return {
12
+ "tokens": encoded.tokens,
13
+ "ids": encoded.ids
14
+ }
15
+
16
+ # 测试示例(可选)
17
+ if __name__ == "__main__":
18
+ print(tokenize("Hello, this is a test."))