|  | from magic_pdf.libs.commons import fitz | 
					
						
						|  | from magic_pdf.libs.coordinate_transform import get_scale_ratio | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def parse_footers(page_ID: int, page: fitz.Page, json_from_DocXchain_obj: dict): | 
					
						
						|  | """ | 
					
						
						|  | :param page_ID: int类型,当前page在当前pdf文档中是第page_D页。 | 
					
						
						|  | :param page :fitz读取的当前页的内容 | 
					
						
						|  | :param res_dir_path: str类型,是每一个pdf文档,在当前.py文件的目录下生成一个与pdf文档同名的文件夹,res_dir_path就是文件夹的dir | 
					
						
						|  | :param json_from_DocXchain_obj: dict类型,把pdf文档送入DocXChain模型中后,提取bbox,结果保存到pdf文档同名文件夹下的 page_ID.json文件中了。json_from_DocXchain_obj就是打开后的dict | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | footer_bbox_from_DocXChain = [] | 
					
						
						|  |  | 
					
						
						|  | xf_json = json_from_DocXchain_obj | 
					
						
						|  | horizontal_scale_ratio, vertical_scale_ratio = get_scale_ratio(xf_json, page) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | for xf in xf_json['layout_dets']: | 
					
						
						|  | L = xf['poly'][0] / horizontal_scale_ratio | 
					
						
						|  | U = xf['poly'][1] / vertical_scale_ratio | 
					
						
						|  | R = xf['poly'][2] / horizontal_scale_ratio | 
					
						
						|  | D = xf['poly'][5] / vertical_scale_ratio | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | L, R = min(L, R), max(L, R) | 
					
						
						|  | U, D = min(U, D), max(U, D) | 
					
						
						|  | if xf['category_id'] == 6 and xf['score'] >= 0.3: | 
					
						
						|  | footer_bbox_from_DocXChain.append((L, U, R, D)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | footer_final_names = [] | 
					
						
						|  | footer_final_bboxs = [] | 
					
						
						|  | footer_ID = 0 | 
					
						
						|  | for L, U, R, D in footer_bbox_from_DocXChain: | 
					
						
						|  |  | 
					
						
						|  | new_footer_name = "footer_{}_{}.png".format(page_ID, footer_ID) | 
					
						
						|  |  | 
					
						
						|  | footer_final_names.append(new_footer_name) | 
					
						
						|  | footer_final_bboxs.append((L, U, R, D)) | 
					
						
						|  | footer_ID += 1 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | footer_final_bboxs.sort(key = lambda LURD: (LURD[1], LURD[0])) | 
					
						
						|  | curPage_all_footer_bboxs = footer_final_bboxs | 
					
						
						|  | return curPage_all_footer_bboxs | 
					
						
						|  |  | 
					
						
						|  |  |