Spaces:

Shak33l-UiRev
/

Ui-Rev-Doc-Model

Running

App Files Files Community

Shak33l-UiRev commited on Nov 14, 2024

Commit

5a29686

verified ·

1 Parent(s): 956f2af

bugs

Browse files

Files changed (1) hide show

app.py +114 -28

app.py CHANGED Viewed

@@ -15,6 +15,15 @@ import io
 import base64
 import json
 from datetime import datetime
 @st.cache_resource
 def load_model(model_name):
@@ -27,9 +36,72 @@ def load_model(model_name):
         dict: Dictionary containing model components
     """
     try:
-        if model_name == "Donut":
             processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base")
             model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base")
             # Configure Donut specific parameters
             model.config.decoder_start_token_id = processor.tokenizer.bos_token_id
             model.config.pad_token_id = processor.tokenizer.pad_token_id
@@ -42,34 +114,13 @@ def load_model(model_name):
             model = LayoutLMv3ForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
             return {'model': model, 'processor': processor}
-        elif model_name == "OmniParser":
-            # Load YOLO model for icon detection
-            yolo_model = YOLO("microsoft/OmniParser-icon-detection")
-            # Load Florence-2 processor and model for captioning
-            processor = AutoProcessor.from_pretrained(
-                "microsoft/OmniParser-caption",
-                trust_remote_code=True
-            )
-            # Load the captioning model
-            caption_model = AutoModelForCausalLM.from_pretrained(
-                "microsoft/OmniParser-caption",
-                trust_remote_code=True
-            )
-            return {
-                'yolo': yolo_model,
-                'processor': processor,
-                'model': caption_model
-            }
         else:
             raise ValueError(f"Unknown model name: {model_name}")
     except Exception as e:
         st.error(f"Error loading model {model_name}: {str(e)}")
         return None
 @spaces.GPU
@@ -357,16 +408,20 @@ if uploaded_file is not None and selected_model:
                     st.info("Loading model...")
                 add_debug(f"Loading {selected_model} model and processor...")
-                model, processor = load_model(selected_model)
-                if model is None or processor is None:
                     with result_col:
                         st.error("Failed to load model. Please try again.")
                     add_debug("Model loading failed!", "error")
                 else:
                     add_debug("Model loaded successfully", "success")
-                    add_debug(f"Model device: {next(model.parameters()).device}")
-                    add_debug(f"Model memory usage: {torch.cuda.max_memory_allocated()/1024**2:.2f}MB") if torch.cuda.is_available() else None
                     # Update progress
                     with result_col:
@@ -379,7 +434,7 @@ if uploaded_file is not None and selected_model:
                     # Analyze document
                     add_debug("Starting document analysis...")
-                    results = analyze_document(image, selected_model, model, processor)
                     add_debug("Analysis completed", "success")
                     # Update progress
@@ -425,6 +480,37 @@ if uploaded_file is not None and selected_model:
                     add_debug("Traceback available in logs", "warning")
 # Add improved information about usage and limitations
 st.markdown("""
 ---
 ### Usage Notes:

 import base64
 import json
 from datetime import datetime
+import os
+import logging
+# Add this near the top of the file, after imports
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
 @st.cache_resource
 def load_model(model_name):
         dict: Dictionary containing model components
     """
     try:
+        if model_name == "OmniParser":
+            try:
+                # First try loading from HuggingFace Hub with correct repository structure
+                yolo_model = YOLO("microsoft/OmniParser/icon_detect")  # Updated path
+                processor = AutoProcessor.from_pretrained(
+                    "microsoft/OmniParser/icon_caption_florence",  # Updated path
+                    trust_remote_code=True
+                )
+                caption_model = AutoModelForCausalLM.from_pretrained(
+                    "microsoft/OmniParser/icon_caption_florence",  # Updated path
+                    trust_remote_code=True,
+                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+                )
+                if torch.cuda.is_available():
+                    caption_model = caption_model.to("cuda")
+                st.success("Successfully loaded OmniParser models")
+                return {
+                    'yolo': yolo_model,
+                    'processor': processor,
+                    'model': caption_model
+                }
+            except Exception as e:
+                st.error(f"Failed to load OmniParser from HuggingFace Hub: {str(e)}")
+                # Try loading from local weights if available
+                weights_path = "weights"
+                if os.path.exists(os.path.join(weights_path, "icon_detect/model.safetensors")):
+                    st.info("Attempting to load from local weights...")
+                    yolo_model = YOLO(os.path.join(weights_path, "icon_detect/model.safetensors"))
+                    processor = AutoProcessor.from_pretrained(
+                        os.path.join(weights_path, "icon_caption_florence"),
+                        trust_remote_code=True,
+                        local_files_only=True
+                    )
+                    caption_model = AutoModelForCausalLM.from_pretrained(
+                        os.path.join(weights_path, "icon_caption_florence"),
+                        trust_remote_code=True,
+                        local_files_only=True,
+                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+                    )
+                    if torch.cuda.is_available():
+                        caption_model = caption_model.to("cuda")
+                    st.success("Successfully loaded OmniParser from local weights")
+                    return {
+                        'yolo': yolo_model,
+                        'processor': processor,
+                        'model': caption_model
+                    }
+                else:
+                    st.error("Could not find local weights and HuggingFace Hub loading failed")
+                    raise ValueError("No valid model weights found for OmniParser")
+        elif model_name == "Donut":
             processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base")
             model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base")
             # Configure Donut specific parameters
             model.config.decoder_start_token_id = processor.tokenizer.bos_token_id
             model.config.pad_token_id = processor.tokenizer.pad_token_id
             model = LayoutLMv3ForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
             return {'model': model, 'processor': processor}
         else:
             raise ValueError(f"Unknown model name: {model_name}")
     except Exception as e:
         st.error(f"Error loading model {model_name}: {str(e)}")
+        logger.error(f"Error details: {str(e)}", exc_info=True)
         return None
 @spaces.GPU
                     st.info("Loading model...")
                 add_debug(f"Loading {selected_model} model and processor...")
+                models_dict = load_model(selected_model)
+                if models_dict is None:
                     with result_col:
                         st.error("Failed to load model. Please try again.")
                     add_debug("Model loading failed!", "error")
                 else:
                     add_debug("Model loaded successfully", "success")
+                    # For device info, we need to check which model we're using
+                    if selected_model == "OmniParser":
+                        model_device = next(models_dict['model'].parameters()).device
+                    else:
+                        model_device = next(models_dict['model'].parameters()).device
+                    add_debug(f"Model device: {model_device}")
                     # Update progress
                     with result_col:
                     # Analyze document
                     add_debug("Starting document analysis...")
+                    results = analyze_document(image, selected_model, models_dict)
                     add_debug("Analysis completed", "success")
                     # Update progress
                     add_debug("Traceback available in logs", "warning")
 # Add improved information about usage and limitations
+def verify_weights_directory():
+    """Verify the weights directory structure and files"""
+    weights_path = "weights"
+    required_files = {
+        os.path.join(weights_path, "icon_detect", "model.safetensors"): "YOLO model weights",
+        os.path.join(weights_path, "icon_detect", "model.yaml"): "YOLO model config",
+        os.path.join(weights_path, "icon_caption_florence", "model.safetensors"): "Florence model weights",
+        os.path.join(weights_path, "icon_caption_florence", "config.json"): "Florence model config",
+        os.path.join(weights_path, "icon_caption_florence", "generation_config.json"): "Florence generation config"
+    }
+    missing_files = []
+    for file_path, description in required_files.items():
+        if not os.path.exists(file_path):
+            missing_files.append(f"{description} at {file_path}")
+    if missing_files:
+        st.warning("Missing required model files:")
+        for missing in missing_files:
+            st.write(f"- {missing}")
+        return False
+    return True
+# Add this in your app's initialization
+if st.checkbox("Check Model Files"):
+    if verify_weights_directory():
+        st.success("All required model files are present")
+    else:
+        st.error("Some model files are missing. Please ensure all required files are in the weights directory")
 st.markdown("""
 ---
 ### Usage Notes: