naveed-stockmark commited on
Commit
cdc1b6c
·
verified ·
1 Parent(s): bf2f906

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -38
utils.py CHANGED
@@ -1,41 +1,3 @@
1
- import json
2
-
3
- def dump_json(file, path):
4
- """Save json object"""
5
-
6
- with open(path, 'w', encoding='utf-8') as f:
7
- json.dump(file, f, indent=4, ensure_ascii=False)
8
- print("Saved json to path: " + str(path))
9
-
10
- def load_json(path):
11
- """load json object"""
12
- with open(path, 'rb') as f:
13
- data = json.load(f)
14
- print("Loaded json from path: " + str(path))
15
- return data
16
-
17
- def lead_k_sentences(text, k=50):
18
- """Select the first k sentences from a Japanese document"""
19
-
20
- DELIMITER = '。'
21
-
22
- if DELIMITER in text:
23
- segments = [seg for seg in text.split(DELIMITER)[:k] if len(seg) > 0]
24
- return DELIMITER.join(segments) + DELIMITER
25
- else:
26
- return text
27
-
28
- import jsonlines
29
-
30
- def read_jsonlines(path):
31
- with jsonlines.open(path) as reader:
32
- lines = [obj for obj in reader]
33
- return lines
34
-
35
- def write_jsonlines(file, path):
36
- with jsonlines.open(path, 'w') as writer:
37
- writer.write_all(file)
38
-
39
  import re
40
 
41
  def normalize_text(s):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
 
3
  def normalize_text(s):