KoichiYasuoka commited on
Commit
017da4d
·
1 Parent(s): 50651d9

model improved

Browse files
Files changed (2) hide show
  1. maker.py +14 -1
  2. pytorch_model.bin +1 -1
maker.py CHANGED
@@ -68,7 +68,7 @@ class UDTriangularDataset(object):
68
  ids=[self.tokenizer.cls_token_id]
69
  upos=["SYM|x"]
70
  for i,k in enumerate(v):
71
- if len(v)<128 or x[i]=="o":
72
  ids.append(k)
73
  upos.append(p[i]+"|"+d[i] if h[i]==i+1 else p[i])
74
  for j in range(i+1,len(v)):
@@ -76,6 +76,19 @@ class UDTriangularDataset(object):
76
  upos.append(p[j]+"|"+d[j] if h[j]==i+1 else p[i]+"|"+d[i] if h[i]==j+1 else p[j])
77
  ids.append(self.tokenizer.sep_token_id)
78
  upos.append("SYM|x")
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  return {"input_ids":ids[:8192],"labels":[self.label2id[p] for p in upos[:8192]]}
80
  from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
81
  tkz=AutoTokenizer.from_pretrained(src)
 
68
  ids=[self.tokenizer.cls_token_id]
69
  upos=["SYM|x"]
70
  for i,k in enumerate(v):
71
+ if len(v)<127 or x[i]=="o":
72
  ids.append(k)
73
  upos.append(p[i]+"|"+d[i] if h[i]==i+1 else p[i])
74
  for j in range(i+1,len(v)):
 
76
  upos.append(p[j]+"|"+d[j] if h[j]==i+1 else p[i]+"|"+d[i] if h[i]==j+1 else p[j])
77
  ids.append(self.tokenizer.sep_token_id)
78
  upos.append("SYM|x")
79
+ i=0
80
+ while len(ids)>8192:
81
+ try:
82
+ i=ids.index(self.tokenizer.sep_token_id,ids.index(self.tokenizer.sep_token_id,i+1)+1)-1
83
+ except:
84
+ break
85
+ while len(ids)>8192 and ids[i]!=self.tokenizer.sep_token_id:
86
+ if upos[i].endswith("|x"):
87
+ ids.pop(i)
88
+ upos.pop(i)
89
+ i-=1
90
+ else:
91
+ break
92
  return {"input_ids":ids[:8192],"labels":[self.label2id[p] for p in upos[:8192]]}
93
  from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
94
  tkz=AutoTokenizer.from_pretrained(src)
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07b563b8bff1160d679c663459a1b5ad87162917007bd0a6e787286a3e13affa
3
  size 1396387202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f755f0089a2ea760e0b50823ec2676e3ed4fb7d520a47fd38f9b279db57bafdb
3
  size 1396387202