Upload preprocessor_config.json
Browse files- preprocessor_config.json +60 -13
preprocessor_config.json
CHANGED
|
@@ -1,39 +1,86 @@
|
|
| 1 |
{
|
| 2 |
-
"auto_map": {
|
| 3 |
-
"AutoProcessor": "processing_florence2.Florence2Processor"
|
| 4 |
-
},
|
| 5 |
"_valid_processor_keys": [
|
| 6 |
"images",
|
| 7 |
"do_resize",
|
| 8 |
"size",
|
| 9 |
"resample",
|
|
|
|
|
|
|
| 10 |
"do_rescale",
|
| 11 |
"rescale_factor",
|
| 12 |
"do_normalize",
|
| 13 |
"image_mean",
|
| 14 |
"image_std",
|
|
|
|
| 15 |
"return_tensors",
|
| 16 |
"data_format",
|
| 17 |
-
"input_data_format"
|
| 18 |
-
"do_convert_rgb"
|
| 19 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
"do_convert_rgb": null,
|
| 21 |
"do_normalize": true,
|
| 22 |
"do_rescale": true,
|
| 23 |
"do_resize": true,
|
| 24 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"image_processor_type": "CLIPImageProcessor",
|
| 26 |
"image_seq_length": 577,
|
| 27 |
-
"
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
"processor_class": "Florence2Processor",
|
| 30 |
"resample": 3,
|
|
|
|
| 31 |
"size": {
|
| 32 |
-
"height": 768,
|
| 33 |
-
"width":768
|
| 34 |
-
},
|
| 35 |
-
"crop_size": {
|
| 36 |
"height": 768,
|
| 37 |
"width": 768
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
-
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
| 2 |
"_valid_processor_keys": [
|
| 3 |
"images",
|
| 4 |
"do_resize",
|
| 5 |
"size",
|
| 6 |
"resample",
|
| 7 |
+
"do_center_crop",
|
| 8 |
+
"crop_size",
|
| 9 |
"do_rescale",
|
| 10 |
"rescale_factor",
|
| 11 |
"do_normalize",
|
| 12 |
"image_mean",
|
| 13 |
"image_std",
|
| 14 |
+
"do_convert_rgb",
|
| 15 |
"return_tensors",
|
| 16 |
"data_format",
|
| 17 |
+
"input_data_format"
|
|
|
|
| 18 |
],
|
| 19 |
+
"auto_map": {
|
| 20 |
+
"AutoProcessor": "processing_florence2.Florence2Processor"
|
| 21 |
+
},
|
| 22 |
+
"crop_size": {
|
| 23 |
+
"height": 768,
|
| 24 |
+
"width": 768
|
| 25 |
+
},
|
| 26 |
+
"do_center_crop": false,
|
| 27 |
"do_convert_rgb": null,
|
| 28 |
"do_normalize": true,
|
| 29 |
"do_rescale": true,
|
| 30 |
"do_resize": true,
|
| 31 |
+
"image_mean": [
|
| 32 |
+
0.485,
|
| 33 |
+
0.456,
|
| 34 |
+
0.406
|
| 35 |
+
],
|
| 36 |
"image_processor_type": "CLIPImageProcessor",
|
| 37 |
"image_seq_length": 577,
|
| 38 |
+
"image_std": [
|
| 39 |
+
0.229,
|
| 40 |
+
0.224,
|
| 41 |
+
0.225
|
| 42 |
+
],
|
| 43 |
"processor_class": "Florence2Processor",
|
| 44 |
"resample": 3,
|
| 45 |
+
"rescale_factor": 0.00392156862745098,
|
| 46 |
"size": {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
"height": 768,
|
| 48 |
"width": 768
|
| 49 |
+
},
|
| 50 |
+
"tasks_answer_post_processing_type": {
|
| 51 |
+
"<OCR>": "pure_text",
|
| 52 |
+
"<OCR_WITH_REGION>": "ocr",
|
| 53 |
+
"<CAPTION>": "pure_text",
|
| 54 |
+
"<DETAILED_CAPTION>": "pure_text",
|
| 55 |
+
"<MORE_DETAILED_CAPTION>": "pure_text",
|
| 56 |
+
"<OD>": "description_with_bboxes",
|
| 57 |
+
"<DENSE_REGION_CAPTION>": "description_with_bboxes",
|
| 58 |
+
"<CAPTION_TO_PHRASE_GROUNDING>": "phrase_grounding",
|
| 59 |
+
"<REFERRING_EXPRESSION_SEGMENTATION>": "polygons",
|
| 60 |
+
"<REGION_TO_SEGMENTATION>": "polygons",
|
| 61 |
+
"<OPEN_VOCABULARY_DETECTION>": "description_with_bboxes_or_polygons",
|
| 62 |
+
"<REGION_TO_CATEGORY>": "pure_text",
|
| 63 |
+
"<REGION_TO_DESCRIPTION>": "pure_text",
|
| 64 |
+
"<REGION_TO_OCR>": "pure_text",
|
| 65 |
+
"<REGION_PROPOSAL>": "bboxes"
|
| 66 |
+
},
|
| 67 |
+
"task_prompts_without_inputs": {
|
| 68 |
+
"<OCR>": "What is the text in the image?",
|
| 69 |
+
"<OCR_WITH_REGION>": "What is the text in the image, with regions?",
|
| 70 |
+
"<CAPTION>": "What does the image describe?",
|
| 71 |
+
"<DETAILED_CAPTION>": "Describe in detail what is shown in the image.",
|
| 72 |
+
"<MORE_DETAILED_CAPTION>": "Describe with a paragraph what is shown in the image.",
|
| 73 |
+
"<OD>": "Locate the objects with category name in the image.",
|
| 74 |
+
"<DENSE_REGION_CAPTION>": "Locate the objects in the image, with their descriptions.",
|
| 75 |
+
"<REGION_PROPOSAL>": "Locate the region proposals in the image."
|
| 76 |
+
},
|
| 77 |
+
"task_prompts_with_input": {
|
| 78 |
+
"<CAPTION_TO_PHRASE_GROUNDING>": "Locate the phrases in the caption: {input}",
|
| 79 |
+
"<REFERRING_EXPRESSION_SEGMENTATION>": "Locate {input} in the image with mask",
|
| 80 |
+
"<REGION_TO_SEGMENTATION>": "What is the polygon mask of region {input}",
|
| 81 |
+
"<OPEN_VOCABULARY_DETECTION>": "Locate {input} in the image.",
|
| 82 |
+
"<REGION_TO_CATEGORY>": "What is the region {input}?",
|
| 83 |
+
"<REGION_TO_DESCRIPTION>": "What does the region {input} describe?",
|
| 84 |
+
"<REGION_TO_OCR>": "What text is in the region {input}?"
|
| 85 |
}
|
| 86 |
+
}
|