Triton version

#23
by JiayiJennie - opened

May I know what's the triton version for running the model? I used triton 2.2.0 here.

When I try to load input into the model as the demo code shows: hidden_states = model(inputs)[0]
It occurs error below:

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1228, in ast_to_ttir(fn, signature, specialization, constants, debug, target)
1227 try:
-> 1228 generator.visit(fn.parse())
1229 except CompilationError as e:

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:303, in CodeGenerator.visit_Module(self, node)
302 def visit_Module(self, node):
--> 303 ast.NodeVisitor.generic_visit(self, node)

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:426, in NodeVisitor.generic_visit(self, node)
425 if isinstance(item, AST):
--> 426 self.visit(item)
427 elif isinstance(value, AST):

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:376, in CodeGenerator.visit_FunctionDef(self, node)
375 # visit function body
--> 376 self.visit_compound_statement(node.body)
377 # finalize function

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:298, in CodeGenerator.visit_compound_statement(self, stmts)
297 for stmt in stmts:
--> 298 ret_type = self.visit(stmt)
299 if ret_type is not None and isinstance(stmt, ast.Return):

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:885, in CodeGenerator.visit_For(self, node)
884 self.scf_stack.append(node)
--> 885 self.visit_compound_statement(node.body)
886 self.scf_stack.pop()

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:298, in CodeGenerator.visit_compound_statement(self, stmts)
297 for stmt in stmts:
--> 298 ret_type = self.visit(stmt)
299 if ret_type is not None and isinstance(stmt, ast.Return):

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:448, in CodeGenerator.visit_AugAssign(self, node)
447 assign = ast.Assign(targets=[node.target], value=rhs)
--> 448 self.visit(assign)
449 return self.dereference_name(name)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:428, in CodeGenerator.visit_Assign(self, node)
427 names = _names[0]
--> 428 values = self.visit(node.value)
429 if not _is_list_like(names):

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:477, in CodeGenerator.visit_BinOp(self, node)
476 lhs = self.visit(node.left)
--> 477 rhs = self.visit(node.right)
478 method_name = self._method_name_for_bin_op.get(type(node.op))

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1105, in CodeGenerator.visit(self, node)
1104 last_loc = self.builder.get_loc()
-> 1105 ret = super().visit(node)
1106 # Reset the location to the last one before the visit

File ~/miniconda3/envs/DNABERT/lib/python3.10/ast.py:418, in NodeVisitor.visit(self, node)
417 visitor = getattr(self, method, self.generic_visit)
--> 418 return visitor(node)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1027, in CodeGenerator.visit_Call(self, node)
1026 extra_kwargs['_generator'] = self
-> 1027 return fn(*args, **extra_kwargs, **kws)
1028 if fn in self.builtin_namespace.values():

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/language/core.py:27, in builtin..wrapper(*args, **kwargs)
25 raise ValueError("Did you forget to add @triton .jit ? "
26 "(_builder argument must be provided outside of JIT functions.)")
---> 27 return fn(*args, **kwargs)

TypeError: dot() got an unexpected keyword argument 'trans_b'

The above exception was the direct cause of the following exception:

CompilationError Traceback (most recent call last)
Cell In[2], line 8
5 model.to(device)
6 inputs = inputs.to(device)
----> 8 hidden_states = model(inputs)[0] # [1, sequence_length, 768]
10 # embedding with mean pooling
11 # embedding_mean = torch.mean(hidden_states[0], dim=0)
12 # print(embedding_mean.shape) # expect to be 768

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/bert_layers.py:608, in BertModel.forward(self, input_ids, token_type_ids, attention_mask, position_ids, output_all_encoded_layers, masked_tokens_mask, **kwargs)
605 first_col_mask[:, 0] = True
606 subset_mask = masked_tokens_mask | first_col_mask
--> 608 encoder_outputs = self.encoder(
609 embedding_output,
610 attention_mask,
611 output_all_encoded_layers=output_all_encoded_layers,
612 subset_mask=subset_mask)
614 if masked_tokens_mask is None:
615 sequence_output = encoder_outputs[-1]

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/bert_layers.py:446, in BertEncoder.forward(self, hidden_states, attention_mask, output_all_encoded_layers, subset_mask)
444 if subset_mask is None:
445 for layer_module in self.layer:
--> 446 hidden_states = layer_module(hidden_states,
447 cu_seqlens,
448 seqlen,
449 None,
450 indices,
451 attn_mask=attention_mask,
452 bias=alibi_attn_mask)
453 if output_all_encoded_layers:
454 all_encoder_layers.append(hidden_states)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/bert_layers.py:327, in BertLayer.forward(self, hidden_states, cu_seqlens, seqlen, subset_idx, indices, attn_mask, bias)
305 def forward(
306 self,
307 hidden_states: torch.Tensor,
(...)
313 bias: Optional[torch.Tensor] = None,
314 ) -> torch.Tensor:
315 """Forward pass for a BERT layer, including both attention and MLP.
316
317 Args:
(...)
325 bias: None or (batch, heads, max_seqlen_in_batch, max_seqlen_in_batch)
326 """
--> 327 attention_output = self.attention(hidden_states, cu_seqlens, seqlen,
328 subset_idx, indices, attn_mask, bias)
329 layer_output = self.mlp(attention_output)
330 return layer_output

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/bert_layers.py:240, in BertUnpadAttention.forward(self, input_tensor, cu_seqlens, max_s, subset_idx, indices, attn_mask, bias)
218 def forward(
219 self,
220 input_tensor: torch.Tensor,
(...)
226 bias: Optional[torch.Tensor] = None,
227 ) -> torch.Tensor:
228 """Forward pass for scaled self-attention without padding.
229
230 Arguments:
(...)
238 bias: None or (batch, heads, max_seqlen_in_batch, max_seqlen_in_batch)
239 """
--> 240 self_output = self.self(input_tensor, cu_seqlens, max_s, indices,
241 attn_mask, bias)
242 if subset_idx is not None:
243 return self.output(index_first_axis(self_output, subset_idx),
244 index_first_axis(input_tensor, subset_idx))

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/bert_layers.py:181, in BertUnpadSelfAttention.forward(self, hidden_states, cu_seqlens, max_seqlen_in_batch, indices, attn_mask, bias)
179 bias_dtype = bias.dtype
180 bias = bias.to(torch.float16)
--> 181 attention = flash_attn_qkvpacked_func(qkv, bias)
182 attention = attention.to(orig_dtype)
183 bias = bias.to(bias_dtype)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/torch/autograd/function.py:553, in Function.apply(cls, *args, **kwargs)
550 if not torch._C._are_functorch_transforms_active():
551 # See NOTE: [functorch vjp and autograd interaction]
552 args = _functorch.utils.unwrap_dead_wrappers(args)
--> 553 return super().apply(*args, **kwargs) # type: ignore[misc]
555 if not is_setup_ctx_defined:
556 raise RuntimeError(
557 "In order to use an autograd.Function with functorch transforms "
558 "(vmap, grad, jvp, jacrev, ...), it must override the setup_context "
559 "staticmethod. For more details, please see "
560 "https://pytorch.org/docs/master/notes/extending.func.html"
561 )

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/flash_attn_triton.py:1021, in _FlashAttnQKVPackedFunc.forward(ctx, qkv, bias, causal, softmax_scale)
1019 if qkv.stride(-1) != 1:
1020 qkv = qkv.contiguous()
-> 1021 o, lse, ctx.softmax_scale = _flash_attn_forward(
1022 qkv[:, :, 0],
1023 qkv[:, :, 1],
1024 qkv[:, :, 2],
1025 bias=bias,
1026 causal=causal,
1027 softmax_scale=softmax_scale)
1028 ctx.save_for_backward(qkv, o, lse, bias)
1029 ctx.causal = causal

File ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-S/1cdf84d992ace6f3e75c7356774b4da088c8dc7c/flash_attn_triton.py:826, in _flash_attn_forward(q, k, v, bias, causal, softmax_scale)
823 # BLOCK = 128
824 # num_warps = 4 if d <= 64 else 8
825 grid = lambda META: (triton.cdiv(seqlen_q, META['BLOCK_M']), batch * nheads)
--> 826 _fwd_kernel[grid]( # type: ignore
827 q,
828 k,
829 v,
830 bias,
831 o,
832 lse,
833 tmp,
834 softmax_scale,
835 q.stride(0),
836 q.stride(2),
837 q.stride(1),
838 k.stride(0),
839 k.stride(2),
840 k.stride(1),
841 v.stride(0),
842 v.stride(2),
843 v.stride(1),
844 *bias_strides,
845 o.stride(0),
846 o.stride(2),
847 o.stride(1),
848 nheads,
849 seqlen_q,
850 seqlen_k,
851 seqlen_q_rounded,
852 d,
853 seqlen_q // 32,
854 seqlen_k // 32, # key for triton cache (limit number of compilations)
855 # Can't use kwargs here because triton autotune expects key to be args, not kwargs
856 # IS_CAUSAL=causal, BLOCK_HEADDIM=d,
857 bias_type,
858 causal,
859 BLOCK_HEADDIM,
860 # BLOCK_M=BLOCK, BLOCK_N=BLOCK,
861 # num_warps=num_warps,
862 # num_stages=1,
863 )
864 return o, lse, softmax_scale

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/runtime/autotuner.py:156, in Autotuner.run(self, *args, **kwargs)
154 if config.pre_hook is not None:
155 config.pre_hook(full_nargs)
--> 156 ret = self.fn.run(
157 *args,
158 num_warps=config.num_warps,
159 num_stages=config.num_stages,
160 num_ctas=config.num_ctas,
161 enable_warp_specialization=config.enable_warp_specialization,
162 **kwargs,
163 **config.kwargs,
164 )
165 self.nargs = None
166 return ret

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/runtime/autotuner.py:305, in Heuristics.run(self, *args, **kwargs)
303 for v, heur in self.values.items():
304 kwargs[v] = heur({**dict(zip(self.arg_names, args)), **kwargs})
--> 305 return self.fn.run(*args, **kwargs)

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/runtime/jit.py:532, in JITFunction.run(self, *args, **kwargs)
517 if self._call_hook(
518 key,
519 signature,
(...)
528 configs,
529 ):
530 return None
--> 532 self.cache[device][key] = compile(
533 self,
534 signature=signature,
535 device=device,
536 constants=constants,
537 num_warps=num_warps,
538 num_ctas=num_ctas,
539 num_stages=num_stages,
540 enable_warp_specialization=enable_warp_specialization,
541 enable_fp_fusion=enable_fp_fusion,
542 extern_libs=extern_libs,
543 configs=configs,
544 debug=self.debug,
545 device_type=device_type,
546 )
548 bin = self.cache[device][key]
549 if not warmup:

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/compiler.py:543, in compile(fn, **kwargs)
541 path = metadata_group.get(ir_filename)
542 if path is None:
--> 543 next_module = compile_kernel(module)
544 if ir_name == "amdgcn":
545 extra_file_name = f"{name}.hsaco_path"

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/compiler.py:435, in compile..(src)
432 stages = dict()
433 stages["ast"] = (lambda path: fn, None)
434 stages["ttir"] = (lambda path: parse_mlir_module(path, context), lambda src: optimize_ttir(
--> 435 ast_to_ttir(src, signature, configs[0], constants, debug=debug, target=target), target))
436 if is_cuda:
437 stages["ttgir"] = (lambda path: parse_mlir_module(path, context), lambda src: optimize_ttgir(
438 ttir_to_ttgir(src, num_warps, num_ctas, target), num_stages, num_warps, num_ctas, target, cluster_info,
439 enable_warp_specialization, enable_persistent, optimize_epilogue))

File /opt/local/stow/pip-3.10/local/lib/python3.10/dist-packages/triton/compiler/code_generator.py:1237, in ast_to_ttir(fn, signature, specialization, constants, debug, target)
1235 if node is None:
1236 raise
-> 1237 raise CompilationError(fn.src, node, repr(e)) from e
1238 ret = generator.module
1239 # module takes ownership of the context

CompilationError: at 114:24: else:
if EVEN_HEADDIM:
k = tl.load(k_ptrs + start_n * stride_kn,
mask=(start_n + offs_n)[:, None] < seqlen_k,
other=0.0)
else:
k = tl.load(k_ptrs + start_n * stride_kn,
mask=((start_n + offs_n)[:, None] < seqlen_k) &
(offs_d[None, :] < headdim),
other=0.0)
qk = tl.zeros([BLOCK_M, BLOCK_N], dtype=tl.float32)
qk += tl.dot(q, k, trans_b=True)
^
TypeError("dot() got an unexpected keyword argument 'trans_b'")

solved problem by using triton==2.0.0.dev20221202

I have a similar error

"TypeError("dot() got an unexpected keyword argument 'trans_b'")"

even if I set triton==2.0.0.dev20221202 when I input:

dna = "ACGTAGCATCGGATCTATCTATCGAC"

if torch.cuda.is_available():
model.to('cuda')
inputs = tokenizer(dna, return_tensors='pt')['input_ids'].to('cuda')
else:
inputs = tokenizer(dna, return_tensors='pt')['input_ids']

hidden_states = model(inputs)[0] # [1, sequence_length, 768]

I have run into the same problem as @YiXW . Are you able to solve it?

This comment has been hidden

I have run into the same problem as @YiXW . Are you able to solve it?

@YiXW Thank you so much for your help. I really appreciate it. Unfortunately, it did not workout for me.

I have tried triton==2.0.0.dev20221202 and it worked for me for a while. However, when I try to reproduce the environment on my institutional cluster, I haven run into this problem:
ERROR: Could not find a version that satisfies the requirement triton==2.0.0.dev20221202 (from versions: 0.4.1, 0.4.2, 1.0.0, 1.1.0, 1.1.1, 2.0.0, 2.1.0, 2.2.0, 2.3.0, 2.3.1, 3.0.0)
ERROR: No matching distribution found for triton==2.0.0.dev20221202
Could I please get any suggestion from any one on how to get around it, for example if I can find triton 2.0.0.dev20221202 from any repository to install manually, etc.?

Thank you so much!

I was able to get it work, and I am posting my .yml of the conda environment I created just in case anyone else needs it. If you still run into the problem with this version, try to follow the instruction on DNABERT-2 github to install the triton from source and remove it. Hope this helpful.

yml file for conda:

name: dnabert2_20240725
channels:
  - conda-forge
  - defaults
dependencies:
  - _libgcc_mutex=0.1=main
  - _openmp_mutex=5.1=1_gnu
  - ca-certificates=2024.3.11=h06a4308_0
  - ld_impl_linux-64=2.38=h1181459_1
  - libffi=3.4.4=h6a678d5_1
  - libgcc-ng=11.2.0=h1234567_1
  - libgomp=11.2.0=h1234567_1
  - libstdcxx-ng=11.2.0=h1234567_1
  - ncurses=6.4=h6a678d5_0
  - openssl=3.0.13=h7f8727e_1
  - pip=24.0=py38h06a4308_0
  - python=3.8.19=h955ad1f_0
  - readline=8.2=h5eee18b_0
  - setuptools=69.5.1=py38h06a4308_0
  - sqlite=3.45.3=h5eee18b_0
  - tk=8.6.14=h39e8969_0
  - wheel=0.43.0=py38h06a4308_0
  - xz=5.4.6=h5eee18b_1
  - zlib=1.2.13=h5eee18b_1
  - pip:
    - accelerate==0.30.0
    - aiohttp==3.9.5
    - aiosignal==1.3.1
    - antlr4-python3-runtime==4.9.3
    - anyio==4.3.0
    - argon2-cffi==23.1.0
    - argon2-cffi-bindings==21.2.0
    - arrow==1.3.0
    - asttokens==2.4.1
    - async-lru==2.0.4
    - async-timeout==4.0.3
    - attrs==23.2.0
    - babel==2.15.0
    - backcall==0.2.0
    - beautifulsoup4==4.12.3
    - bleach==6.1.0
    - certifi==2024.2.2
    - cffi==1.16.0
    - charset-normalizer==3.3.2
    - comm==0.2.2
    - contourpy==1.1.1
    - cycler==0.12.1
    - datasets==2.19.1
    - debugpy==1.8.1
    - decorator==5.1.1
    - defusedxml==0.7.1
    - dill==0.3.8
    - einops==0.8.0
    - evaluate==0.4.2
    - exceptiongroup==1.2.1
    - executing==2.0.1
    - fastjsonschema==2.19.1
    - filelock==3.14.0
    - fonttools==4.53.0
    - fqdn==1.5.1
    - frozenlist==1.4.1
    - fsspec==2024.3.1
    - h11==0.14.0
    - httpcore==1.0.5
    - httpx==0.27.0
    - huggingface-hub==0.23.0
    - idna==3.7
    - importlib-metadata==7.1.0
    - importlib-resources==6.4.0
    - ipykernel==6.29.4
    - ipython==8.12.3
    - ipywidgets==8.1.2
    - isoduration==20.11.0
    - jedi==0.19.1
    - jinja2==3.1.4
    - joblib==1.4.2
    - json5==0.9.25
    - jsonpointer==2.4
    - jsonschema==4.22.0
    - jsonschema-specifications==2023.12.1
    - jupyter==1.0.0
    - jupyter-client==8.6.1
    - jupyter-console==6.6.3
    - jupyter-core==5.7.2
    - jupyter-events==0.10.0
    - jupyter-lsp==2.2.5
    - jupyter-server==2.14.0
    - jupyter-server-terminals==0.5.3
    - jupyterlab==4.1.8
    - jupyterlab-pygments==0.3.0
    - jupyterlab-server==2.27.1
    - jupyterlab-widgets==3.0.10
    - kiwisolver==1.4.5
    - markupsafe==2.1.5
    - matplotlib==3.7.5
    - matplotlib-inline==0.1.7
    - mistune==3.0.2
    - mpmath==1.3.0
    - multidict==6.0.5
    - multiprocess==0.70.16
    - nbclient==0.10.0
    - nbconvert==7.16.4
    - nbformat==5.10.4
    - nest-asyncio==1.6.0
    - networkx==3.1
    - notebook==7.1.3
    - notebook-shim==0.2.4
    - nvidia-cublas-cu12==12.1.3.1
    - nvidia-cuda-cupti-cu12==12.1.105
    - nvidia-cuda-nvrtc-cu12==12.1.105
    - nvidia-cuda-runtime-cu12==12.1.105
    - nvidia-cudnn-cu12==8.9.2.26
    - nvidia-cufft-cu12==11.0.2.54
    - nvidia-curand-cu12==10.3.2.106
    - nvidia-cusolver-cu12==11.4.5.107
    - nvidia-cusparse-cu12==12.1.0.106
    - nvidia-nccl-cu12==2.20.5
    - nvidia-nvjitlink-cu12==12.4.127
    - nvidia-nvtx-cu12==12.1.105
    - omegaconf==2.3.0
    - overrides==7.7.0
    - packaging==24.0
    - pandas==2.0.3
    - pandocfilters==1.5.1
    - parso==0.8.4
    - peft==0.10.0
    - pexpect==4.9.0
    - pickleshare==0.7.5
    - pillow==10.3.0
    - pkgutil-resolve-name==1.3.10
    - platformdirs==4.2.1
    - prometheus-client==0.20.0
    - prompt-toolkit==3.0.43
    - psutil==5.9.8
    - ptyprocess==0.7.0
    - pure-eval==0.2.2
    - pyarrow==16.0.0
    - pyarrow-hotfix==0.6
    - pycparser==2.22
    - pygments==2.18.0
    - pyparsing==3.1.2
    - python-dateutil==2.9.0.post0
    - python-json-logger==2.0.7
    - pytz==2024.1
    - pyyaml==6.0.1
    - pyzmq==26.0.3
    - qtconsole==5.5.2
    - qtpy==2.4.1
    - referencing==0.35.1
    - regex==2024.4.28
    - requests==2.31.0
    - rfc3339-validator==0.1.4
    - rfc3986-validator==0.1.1
    - rpds-py==0.18.1
    - safetensors==0.4.3
    - scikit-learn==1.3.2
    - scipy==1.10.1
    - seaborn==0.13.2
    - send2trash==1.8.3
    - six==1.16.0
    - sniffio==1.3.1
    - soupsieve==2.5
    - stack-data==0.6.3
    - sympy==1.12
    - terminado==0.18.1
    - threadpoolctl==3.5.0
    - tinycss2==1.3.0
    - tokenizers==0.13.3
    - tomli==2.0.1
    - torch==2.3.0
    - tornado==6.4
    - tqdm==4.66.4
    - traitlets==5.14.3
    - transformers==4.29.2
    - types-python-dateutil==2.9.0.20240316
    - typing-extensions==4.11.0
    - tzdata==2024.1
    - uri-template==1.3.0
    - urllib3==2.2.1
    - wcwidth==0.2.13
    - webcolors==1.13
    - webencodings==0.5.1
    - websocket-client==1.8.0
    - widgetsnbextension==4.0.10
    - xxhash==3.4.1
    - yarl==1.9.4
    - zipp==3.18.1

Install the triton from source and remove it:

git clone https://github.com/openai/triton.git;
cd triton/python;
pip install cmake; # build-time dependency
pip install -e .
pip uninstall triton

@anonymouscat Thank you so much for your help. I really appreciate it. But I still meet a problem.I used the instructions you gave and a new error occurred. Is there anything you can do?

AttributeError Traceback (most recent call last)
Cell In[8], line 9
2 print(sum(param.numel() for param in model.parameters()))
3 trainer = Trainer(model=model,
4 args=train_Args,
5 train_dataset=train_dataset,
6 eval_dataset=test_dataset,
7 data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
8 compute_metrics=eval_predict)
----> 9 trainer.train()
10 print(trainer.evaluate())

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1664, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1659 self.model_wrapped = self.model
1661 inner_training_loop = find_executable_batch_size(
1662 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1663 )
-> 1664 return inner_training_loop(
1665 args=args,
1666 resume_from_checkpoint=resume_from_checkpoint,
1667 trial=trial,
1668 ignore_keys_for_eval=ignore_keys_for_eval,
1669 )

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1750, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1748 self.lr_scheduler = lr_scheduler
1749 elif not delay_optimizer_creation:
-> 1750 self.create_optimizer_and_scheduler(num_training_steps=max_steps)
1752 self.state = TrainerState()
1753 self.state.is_hyper_param_search = trial is not None

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1054, in Trainer.create_optimizer_and_scheduler(self, num_training_steps)
1046 def create_optimizer_and_scheduler(self, num_training_steps: int):
1047 """
1048 Setup the optimizer and the learning rate scheduler.
1049
(...)
1052 create_scheduler) in a subclass.
1053 """
-> 1054 self.create_optimizer()
1055 if IS_SAGEMAKER_MP_POST_1_10 and smp.state.cfg.fp16:
1056 # If smp >= 1.10 and fp16 is enabled, we unwrap the optimizer
1057 optimizer = self.optimizer.optimizer

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1098, in Trainer.create_optimizer(self)
1092 self.optimizer = OSS(
1093 params=optimizer_grouped_parameters,
1094 optim=optimizer_cls,
1095 **optimizer_kwargs,
1096 )
1097 else:
-> 1098 self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
1099 if optimizer_cls.name == "Adam8bit":
1100 import bitsandbytes

File ~/miniconda3/lib/python3.10/site-packages/transformers/optimization.py:423, in AdamW.init(self, params, lr, betas, eps, weight_decay, correct_bias, no_deprecation_warning)
421 raise ValueError(f"Invalid epsilon value: {eps} - should be >= 0.0")
422 defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay, "correct_bias": correct_bias}
--> 423 super().init(params, defaults)

File ~/miniconda3/lib/python3.10/site-packages/torch/optim/optimizer.py:367, in Optimizer.init(self, params, defaults)
364 param_groups = [{"params": param_groups}]
366 for param_group in param_groups:
--> 367 self.add_param_group(cast(dict, param_group))
369 # Allows _cuda_graph_capture_health_check to rig a poor man's TORCH_WARN_ONCE in python,
370 # which I don't think exists
371 # https://github.com/pytorch/pytorch/issues/72948
372 self._warned_capturable_if_run_uncaptured = True

File ~/miniconda3/lib/python3.10/site-packages/torch/_compile.py:26, in _disable_dynamo..inner(*args, **kwargs)
24 disable_fn = getattr(fn, "__dynamo_disable", None)
25 if disable_fn is None:
---> 26 import torch._dynamo
28 disable_fn = torch._dynamo.disable(fn, recursive)
29 fn.__dynamo_disable = disable_fn

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/init.py:2
1 import torch
----> 2 from . import convert_frame, eval_frame, resume_execution
3 from .backends.registry import list_backends, lookup_backend, register_backend
4 from .callback import callback_handler, on_compile_end, on_compile_start

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:48
45 from torch.utils._python_dispatch import _disable_current_modes
46 from torch.utils._traceback import format_traceback_short
---> 48 from . import config, exc, trace_rules
49 from .backends.registry import CompilerFn
50 from .bytecode_analysis import remove_dead_code, remove_pointless_jumps

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/exc.py:12
8 import torch.guards
10 from . import config
---> 12 from .utils import counters
15 def exportdb_error_message(case_name):
16 return (
17 "For more information about this error, see: "
18 + "https://pytorch.org/docs/main/generated/exportdb/index.html#"
19 + case_name.replace("
", "-")
20 )

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/utils.py:1066
1063 if has_triton_package():
1064 import triton
-> 1066 common_constant_types.add(triton.language.dtype)
1069 def is_safe_constant(v):
1070 if istype(v, (tuple, frozenset)):

AttributeError: module 'triton' has no attribute 'language'

@anonymouscat Thank you so much for your help. I really appreciate it. But I still meet a problem.I used the instructions you gave and a new error occurred. Is there anything you can do?

AttributeError Traceback (most recent call last)
Cell In[8], line 9
2 print(sum(param.numel() for param in model.parameters()))
3 trainer = Trainer(model=model,
4 args=train_Args,
5 train_dataset=train_dataset,
6 eval_dataset=test_dataset,
7 data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
8 compute_metrics=eval_predict)
----> 9 trainer.train()
10 print(trainer.evaluate())

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1664, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1659 self.model_wrapped = self.model
1661 inner_training_loop = find_executable_batch_size(
1662 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1663 )
-> 1664 return inner_training_loop(
1665 args=args,
1666 resume_from_checkpoint=resume_from_checkpoint,
1667 trial=trial,
1668 ignore_keys_for_eval=ignore_keys_for_eval,
1669 )

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1750, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1748 self.lr_scheduler = lr_scheduler
1749 elif not delay_optimizer_creation:
-> 1750 self.create_optimizer_and_scheduler(num_training_steps=max_steps)
1752 self.state = TrainerState()
1753 self.state.is_hyper_param_search = trial is not None

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1054, in Trainer.create_optimizer_and_scheduler(self, num_training_steps)
1046 def create_optimizer_and_scheduler(self, num_training_steps: int):
1047 """
1048 Setup the optimizer and the learning rate scheduler.
1049
(...)
1052 create_scheduler) in a subclass.
1053 """
-> 1054 self.create_optimizer()
1055 if IS_SAGEMAKER_MP_POST_1_10 and smp.state.cfg.fp16:
1056 # If smp >= 1.10 and fp16 is enabled, we unwrap the optimizer
1057 optimizer = self.optimizer.optimizer

File ~/miniconda3/lib/python3.10/site-packages/transformers/trainer.py:1098, in Trainer.create_optimizer(self)
1092 self.optimizer = OSS(
1093 params=optimizer_grouped_parameters,
1094 optim=optimizer_cls,
1095 **optimizer_kwargs,
1096 )
1097 else:
-> 1098 self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
1099 if optimizer_cls.name == "Adam8bit":
1100 import bitsandbytes

File ~/miniconda3/lib/python3.10/site-packages/transformers/optimization.py:423, in AdamW.init(self, params, lr, betas, eps, weight_decay, correct_bias, no_deprecation_warning)
421 raise ValueError(f"Invalid epsilon value: {eps} - should be >= 0.0")
422 defaults = {"lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay, "correct_bias": correct_bias}
--> 423 super().init(params, defaults)

File ~/miniconda3/lib/python3.10/site-packages/torch/optim/optimizer.py:367, in Optimizer.init(self, params, defaults)
364 param_groups = [{"params": param_groups}]
366 for param_group in param_groups:
--> 367 self.add_param_group(cast(dict, param_group))
369 # Allows _cuda_graph_capture_health_check to rig a poor man's TORCH_WARN_ONCE in python,
370 # which I don't think exists
371 # https://github.com/pytorch/pytorch/issues/72948
372 self._warned_capturable_if_run_uncaptured = True

File ~/miniconda3/lib/python3.10/site-packages/torch/_compile.py:26, in _disable_dynamo..inner(*args, **kwargs)
24 disable_fn = getattr(fn, "__dynamo_disable", None)
25 if disable_fn is None:
---> 26 import torch._dynamo
28 disable_fn = torch._dynamo.disable(fn, recursive)
29 fn.__dynamo_disable = disable_fn

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/init.py:2
1 import torch
----> 2 from . import convert_frame, eval_frame, resume_execution
3 from .backends.registry import list_backends, lookup_backend, register_backend
4 from .callback import callback_handler, on_compile_end, on_compile_start

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py:48
45 from torch.utils._python_dispatch import _disable_current_modes
46 from torch.utils._traceback import format_traceback_short
---> 48 from . import config, exc, trace_rules
49 from .backends.registry import CompilerFn
50 from .bytecode_analysis import remove_dead_code, remove_pointless_jumps

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/exc.py:12
8 import torch.guards
10 from . import config
---> 12 from .utils import counters
15 def exportdb_error_message(case_name):
16 return (
17 "For more information about this error, see: "
18 + "https://pytorch.org/docs/main/generated/exportdb/index.html#"
19 + case_name.replace("
", "-")
20 )

File ~/miniconda3/lib/python3.10/site-packages/torch/_dynamo/utils.py:1066
1063 if has_triton_package():
1064 import triton
-> 1066 common_constant_types.add(triton.language.dtype)
1069 def is_safe_constant(v):
1070 if istype(v, (tuple, frozenset)):

AttributeError: module 'triton' has no attribute 'language'

This bug has been resolved, please see:https://github.com/LucaOne/LucaOne/issues/28

Sign up or log in to comment