Commit
·
2703586
1
Parent(s):
30b460c
df experiment
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import re
|
2 |
import gradio
|
3 |
import torch
|
|
|
4 |
|
5 |
from PIL import Image
|
6 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
@@ -38,9 +39,10 @@ def process_document(image):
|
|
38 |
# postprocess
|
39 |
sequence = processor.batch_decode(outputs.sequences)[0]
|
40 |
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
|
41 |
-
sequence = re.sub(r"<.*?>", "", sequence).strip() # remove first task start token
|
42 |
|
43 |
-
|
|
|
44 |
|
45 |
demo = gradio.Interface(
|
46 |
fn=process_document,
|
|
|
1 |
import re
|
2 |
import gradio
|
3 |
import torch
|
4 |
+
import pandas as pd
|
5 |
|
6 |
from PIL import Image
|
7 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
|
|
39 |
# postprocess
|
40 |
sequence = processor.batch_decode(outputs.sequences)[0]
|
41 |
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
|
42 |
+
sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
|
43 |
|
44 |
+
js = processor.token2json(sequence)
|
45 |
+
return pd.json_normalize(js)
|
46 |
|
47 |
demo = gradio.Interface(
|
48 |
fn=process_document,
|