AjayP13 commited on
Commit
921e2f9
·
verified ·
1 Parent(s): bf6a2f3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -2
README.md CHANGED
@@ -17,8 +17,33 @@ The output will be a JSON object.
17
 
18
  ```python
19
  import json
 
20
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Load tokenizer and model
23
  tokenizer = AutoTokenizer.from_pretrained('fineinstructions/template_instantiator', revision=None)
24
  tokenizer.padding_side = 'left'
@@ -33,9 +58,15 @@ inputs = [json.dumps({
33
  prompts = [tokenizer.apply_chat_template([{'role': 'user', 'content': i}], tokenize=False, add_generation_prompt=True) for i in inputs]
34
  generations = pipe(prompts, max_length=131072, truncation=True, temperature=None, top_p=None, do_sample=False)
35
  output = generations[0][0]['generated_text']
36
- print(output)
 
 
 
 
 
 
37
 
38
- ##### Output:
39
  # {
40
  # ..
41
  # }
 
17
 
18
  ```python
19
  import json
20
+ import re
21
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
22
 
23
+ # Helper to expand excerpts in the answer
24
+ def expand(document, text):
25
+ excerpt_pattern = r"<excerpt>(.*?)<\.\.\.>(.*?)</excerpt>"
26
+ matches = re.findall(excerpt_pattern, text, flags=re.DOTALL)
27
+ replacements = {}
28
+ for prefix, suffix in matches:
29
+ match = re.search(
30
+ re.escape(prefix) + r" (.*?) " + re.escape(suffix),
31
+ document,
32
+ flags=re.DOTALL,
33
+ )
34
+ try:
35
+ if match:
36
+ replacements[f"<excerpt>{prefix}<...>{suffix}</excerpt>"] = match.group(
37
+ 0
38
+ )
39
+ else:
40
+ return None
41
+ except Exception:
42
+ return None
43
+ for old, new in replacements.items():
44
+ text = text.replace(old, new)
45
+ return text
46
+
47
  # Load tokenizer and model
48
  tokenizer = AutoTokenizer.from_pretrained('fineinstructions/template_instantiator', revision=None)
49
  tokenizer.padding_side = 'left'
 
58
  prompts = [tokenizer.apply_chat_template([{'role': 'user', 'content': i}], tokenize=False, add_generation_prompt=True) for i in inputs]
59
  generations = pipe(prompts, max_length=131072, truncation=True, temperature=None, top_p=None, do_sample=False)
60
  output = generations[0][0]['generated_text']
61
+ output_json = json.loads()
62
+
63
+ # Expand the answer
64
+ output_json["answer"] = expand(document=inputs[0]["document"], answer=output_json["answer"])
65
+
66
+ # Print the output JSON
67
+ print(output_json)
68
 
69
+ ##### Output JSON:
70
  # {
71
  # ..
72
  # }