Trying to Run on Windows, Triton is Required?
#43
by
buckeye17-bah
- opened
I'm trying to run this model on Windows 11. I'm able to run several other HF models in the same Python virtual environment, but this model seems to have a special requirement for Triton, which doesn't officially support Windows. Can anyone confirm that Triton is required, or does anyone know a work around? Reading the error message, the line which fails to execute is AutoModelForVision2Seq.from_pretrained
. I'm simply trying to run the example code provided in the README for "Single page image inference using Tranformers". The error occurs when DEVICE
is set to either "cuda" or "cpu". Here's the error message I get:
ImportError Traceback (most recent call last)
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\utils\import_utils.py:1967, in _LazyModule._get_module(self, module_name)
1966 try:
-> 1967 return importlib.import_module("." + module_name, self.__name__)
1968 except Exception as e:
File C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.12_3.12.2800.0_x64__qbz5n2kfra8p0\Lib\importlib\__init__.py:90, in import_module(name, package)
89 level += 1
---> 90 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1387, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1360, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1331, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:935, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:999, in exec_module(self, module)
File <frozen importlib._bootstrap>:488, in _call_with_frames_removed(f, *args, **kwds)
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\models\idefics3\modeling_idefics3.py:30
29 from ...modeling_outputs import BaseModelOutput, ModelOutput
---> 30 from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
31 from ...utils import (
32 add_start_docstrings,
33 add_start_docstrings_to_model_forward,
34 logging,
35 replace_return_docstrings,
36 )
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\modeling_utils.py:62
61 from .integrations.deepspeed import _load_state_dict_into_zero3_model, is_deepspeed_available
---> 62 from .integrations.flash_attention import flash_attention_forward
63 from .integrations.flex_attention import flex_attention_forward
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\integrations\flash_attention.py:5
3 import torch
----> 5 from ..modeling_flash_attention_utils import _flash_attention_forward, flash_attn_supports_top_left_mask
8 _use_top_left_mask = flash_attn_supports_top_left_mask()
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\modeling_flash_attention_utils.py:38
37 from flash_attn import flash_attn_func, flash_attn_varlen_func
---> 38 from flash_attn.layers.rotary import apply_rotary_emb # noqa
41 # patch functions in package `flash-attn` when using flash-attention on Ascend NPU.
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\flash_attn\layers\rotary.py:8
7 from einops import rearrange, repeat
----> 8 from flash_attn.ops.triton.rotary import apply_rotary
11 def rotate_half(x, interleaved=False):
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\flash_attn\ops\triton\rotary.py:7
5 import torch
----> 7 import triton
8 import triton.language as tl
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\__init__.py:8
4 # ---------------------------------------
5 # Note: import order is significant here.
6
7 # submodules
----> 8 from .runtime import (
9 autotune,
10 Config,
11 heuristics,
12 JITFunction,
13 KernelInterface,
14 reinterpret,
15 TensorWrapper,
16 OutOfResources,
17 InterpreterError,
18 MockTensor,
19 )
20 from .runtime.jit import jit
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\runtime\__init__.py:1
----> 1 from .autotuner import (Autotuner, Config, Heuristics, autotune, heuristics)
2 from .cache import RedisRemoteCacheBackend, RemoteCacheBackend
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\runtime\autotuner.py:9
7 from typing import Dict, Tuple, List, Optional
----> 9 from .jit import KernelInterface
10 from .errors import OutOfResources, PTXASError
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\runtime\jit.py:12
11 from typing import Callable, Generic, Iterable, Optional, TypeVar, Union, overload, Dict, Any, Tuple
---> 12 from ..runtime.driver import driver
13 from types import ModuleType
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\runtime\driver.py:1
----> 1 from ..backends import backends
2 from ..backends import DriverBase
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\backends\__init__.py:50
47 return backends
---> 50 backends = _discover_backends()
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\backends\__init__.py:43, in _discover_backends()
42 continue
---> 43 compiler = _load_module(name, os.path.join(root, name, 'compiler.py'))
44 driver = _load_module(name, os.path.join(root, name, 'driver.py'))
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\backends\__init__.py:12, in _load_module(name, path)
11 module = importlib.util.module_from_spec(spec)
---> 12 spec.loader.exec_module(module)
13 return module
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\triton\backends\amd\compiler.py:2
1 from triton.backends.compiler import BaseBackend, GPUTarget
----> 2 from triton._C.libtriton import ir, passes, llvm, amd
3 from dataclasses import dataclass
ImportError: DLL load failed while importing libtriton: A dynamic link library (DLL) initialization routine failed.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
Cell In[1], line 30
28 # Initialize processor and model
29 processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
---> 30 model = AutoModelForVision2Seq.from_pretrained(
31 "ds4sd/SmolDocling-256M-preview",
32 torch_dtype=torch.bfloat16,
33 _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
34 ).to(DEVICE)
36 # Create input messages
37 messages = [
38 {
39 "role": "user",
(...) 44 },
45 ]
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\models\auto\auto_factory.py:568, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
564 return model_class.from_pretrained(
565 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
566 )
567 elif type(config) in cls._model_mapping.keys():
--> 568 model_class = _get_model_class(config, cls._model_mapping)
569 if model_class.config_class == config.sub_configs.get("text_config", None):
570 config = config.get_text_config()
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\models\auto\auto_factory.py:388, in _get_model_class(config, model_mapping)
387 def _get_model_class(config, model_mapping):
--> 388 supported_models = model_mapping[type(config)]
389 if not isinstance(supported_models, (list, tuple)):
390 return supported_models
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\models\auto\auto_factory.py:770, in _LazyAutoMapping.__getitem__(self, key)
768 if model_type in self._model_mapping:
769 model_name = self._model_mapping[model_type]
--> 770 return self._load_attr_from_module(model_type, model_name)
772 # Maybe there was several model types associated with this config.
773 model_types = [k for k, v in self._config_mapping.items() if v == key.__name__]
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\models\auto\auto_factory.py:784, in _LazyAutoMapping._load_attr_from_module(self, model_type, attr)
782 if module_name not in self._modules:
783 self._modules[module_name] = importlib.import_module(f".{module_name}", "transformers.models")
--> 784 return getattribute_from_module(self._modules[module_name], attr)
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\models\auto\auto_factory.py:700, in getattribute_from_module(module, attr)
698 if isinstance(attr, tuple):
699 return tuple(getattribute_from_module(module, a) for a in attr)
--> 700 if hasattr(module, attr):
701 return getattr(module, attr)
702 # Some of the mappings have entries model_type -> object of another model type. In that case we try to grab the
703 # object at the top level.
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\utils\import_utils.py:1955, in _LazyModule.__getattr__(self, name)
1953 value = Placeholder
1954 elif name in self._class_to_module.keys():
-> 1955 module = self._get_module(self._class_to_module[name])
1956 value = getattr(module, name)
1957 elif name in self._modules:
File ~\git_repos\VLM-OCR-Pipeline\envs\smol_docling\Lib\site-packages\transformers\utils\import_utils.py:1969, in _LazyModule._get_module(self, module_name)
1967 return importlib.import_module("." + module_name, self.__name__)
1968 except Exception as e:
-> 1969 raise RuntimeError(
1970 f"Failed to import {self.__name__}.{module_name} because of the following error (look up to see its"
1971 f" traceback):\n{e}"
1972 ) from e
RuntimeError: Failed to import transformers.models.idefics3.modeling_idefics3 because of the following error (look up to see its traceback):
DLL load failed while importing libtriton: A dynamic link library (DLL) initialization routine failed.