torch==2.5.1
accelerate
codetiming
datasets
dill
# flash-attn
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
hydra-core
liger-kernel
numpy
pandas
datasets
peft
pyarrow>=15.0.0
pybind11
pylatexenc
pylint==3.3.6
qwen_vl_utils
ray[default]
tensordict<=0.6.2
torchdata
transformers
vllm==0.7.3
wandb
word2number
math_verify
mathruler
tensorboard
transformers==4.51.0