Keisuke Yamanaka - CNC commited on
Commit
92ea1dc
·
1 Parent(s): 4e09058

update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -89,12 +89,15 @@ class CNC_QA:
89
  fpath, fname = split(fullpath)
90
  fpath += '/'
91
  # Get elements
 
92
  raw_pdf_elements = self.extract_pdf_elements(fpath, fname)
93
 
94
  # Get text, tables
 
95
  texts, tables = self.categorize_elements(raw_pdf_elements)
96
 
97
  # Optional: Enforce a specific token size for texts
 
98
  text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
99
  chunk_size=4000, chunk_overlap=0
100
  )
@@ -102,10 +105,12 @@ class CNC_QA:
102
  texts_4k_token = text_splitter.split_text(joined_texts)
103
 
104
  # Get text, table summaries
 
105
  text_summaries, table_summaries = self.generate_text_summaries(
106
  texts_4k_token, tables, summarize_texts=True
107
  )
108
 
 
109
  img_base64_list, image_summaries = self.generate_img_summaries(fpath)
110
  return text_summaries,texts,table_summaries,tables,image_summaries,img_base64_list
111
 
 
89
  fpath, fname = split(fullpath)
90
  fpath += '/'
91
  # Get elements
92
+ print('Get elements')
93
  raw_pdf_elements = self.extract_pdf_elements(fpath, fname)
94
 
95
  # Get text, tables
96
+ print('Get text, tables')
97
  texts, tables = self.categorize_elements(raw_pdf_elements)
98
 
99
  # Optional: Enforce a specific token size for texts
100
+ print('Optional: Enforce a specific token size for texts')
101
  text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
102
  chunk_size=4000, chunk_overlap=0
103
  )
 
105
  texts_4k_token = text_splitter.split_text(joined_texts)
106
 
107
  # Get text, table summaries
108
+ print('Get text, table summaries')
109
  text_summaries, table_summaries = self.generate_text_summaries(
110
  texts_4k_token, tables, summarize_texts=True
111
  )
112
 
113
+ print('Image summaries')
114
  img_base64_list, image_summaries = self.generate_img_summaries(fpath)
115
  return text_summaries,texts,table_summaries,tables,image_summaries,img_base64_list
116