htlou commited on
Commit
c864ad9
·
verified ·
1 Parent(s): 4c4d1a4

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/offline-run-20250126_124721-63zh6b52/run-63zh6b52.wandb filter=lfs diff=lfs merge=lfs -text
arguments.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_cfgs:
2
+ eval_data_files: null
3
+ eval_datasets: null
4
+ eval_optional_args: []
5
+ eval_size: null
6
+ eval_split: null
7
+ eval_subset: null
8
+ eval_template: null
9
+ train_data_files: pickapic_40k.pt
10
+ train_datasets: /data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs
11
+ train_optional_args: []
12
+ train_size: null
13
+ train_split: train
14
+ train_subset: null
15
+ train_template: Chameleon_preference
16
+ logger_cfgs:
17
+ cache_dir: null
18
+ log_project: align-anything
19
+ log_run_name: dpo
20
+ log_type: wandb
21
+ output_dir: ../outputs/dpo_pickapic_40k
22
+ save_interval: 400.0
23
+ model_cfgs:
24
+ model_max_length: 4096
25
+ model_name_or_path: /data/align-anything/hantao/models/0830_4k_sft_flux
26
+ trust_remote_code: true
27
+ special_tokens: null
28
+ train_cfgs:
29
+ adam_betas:
30
+ - 0.9
31
+ - 0.95
32
+ bf16: true
33
+ ds_cfgs: ds_z3_config.json
34
+ epochs: 3.0
35
+ eval_interval: 10
36
+ eval_strategy: epoch
37
+ fp16: false
38
+ freeze_language_model: true
39
+ freeze_mm_proj: true
40
+ freeze_vision_tower: false
41
+ gradient_accumulation_steps: 2.0
42
+ gradient_checkpointing: true
43
+ learning_rate: 5.0e-07
44
+ lr_scheduler_type: cosine
45
+ lr_warmup_ratio: 0.03
46
+ per_device_eval_batch_size: 4.0
47
+ per_device_train_batch_size: 4.0
48
+ regularization: 0.001
49
+ scale_coeff: 0.1
50
+ seed: 42
51
+ weight_decay: 0.01
config.json ADDED
The diff for this file is too large to render. See raw diff
 
environ.txt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CONDA_DEFAULT_ENV=hantao_cham
2
+ CONDA_EXE=/data/align-anything/miniconda3/bin/conda
3
+ CONDA_PREFIX=/data/align-anything/miniconda3/envs/hantao_cham
4
+ CONDA_PREFIX_1=/data/align-anything/miniconda3
5
+ CONDA_PROMPT_MODIFIER=(hantao_cham)
6
+ CONDA_PYTHON_EXE=/data/align-anything/miniconda3/bin/python
7
+ CONDA_SHLVL=2
8
+ CROSS_RANK=0
9
+ CROSS_SIZE=1
10
+ CUDA_MODULE_LOADING=LAZY
11
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
12
+ DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/2000/bus
13
+ HOME=/home/align-anything
14
+ LANG=en_US.UTF-8
15
+ LD_LIBRARY_PATH=/data/align-anything/miniconda3/envs/hantao_cham/lib/python3.11/site-packages/cv2/../../lib64:
16
+ LESSCLOSE=/usr/bin/lesspipe %s %s
17
+ LESSOPEN=| /usr/bin/lesspipe %s
18
+ LOCAL_RANK=0
19
+ LOCAL_SIZE=8
20
+ LOGLEVEL=WARNING
21
+ LOGNAME=align-anything
22
+ LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:
23
+ MASTER_ADDR=127.0.0.1
24
+ MASTER_PORT=52085
25
+ MOTD_SHOWN=pam
26
+ OLDPWD=/home/align-anything
27
+ PATH=/data/align-anything/miniconda3/envs/hantao_cham/bin:/data/align-anything/miniconda3/condabin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
28
+ PWD=/data/align-anything/hantao/align-anything/scripts
29
+ PYGAME_HIDE_SUPPORT_PROMPT=1
30
+ PYTHONHASHSEED=42
31
+ PYTHONPATH=/data/align-anything/hantao/align-anything
32
+ QT_QPA_FONTDIR=/data/align-anything/miniconda3/envs/hantao_cham/lib/python3.11/site-packages/cv2/qt/fonts
33
+ QT_QPA_PLATFORM_PLUGIN_PATH=/data/align-anything/miniconda3/envs/hantao_cham/lib/python3.11/site-packages/cv2/qt/plugins
34
+ RANK=0
35
+ SHELL=/bin/bash
36
+ SHLVL=3
37
+ SSH_CLIENT=203.93.11.123 60580 30400
38
+ SSH_CONNECTION=39.173.142.74 12446 10.10.212.194 30400
39
+ SSH_TTY=/dev/pts/3
40
+ TERM=screen
41
+ TMUX=/tmp/tmux-2000/default,32165,6
42
+ TMUX_PANE=%6
43
+ USER=align-anything
44
+ WANDB_API_KEY=7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33
45
+ WANDB_MODE=offline
46
+ WANDB_SERVICE=2-849127-tcp-localhost-37123
47
+ WORLD_SIZE=8
48
+ XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
49
+ XDG_RUNTIME_DIR=/run/user/2000
50
+ XDG_SESSION_CLASS=user
51
+ XDG_SESSION_ID=12
52
+ XDG_SESSION_TYPE=tty
53
+ _=/data/align-anything/miniconda3/envs/hantao_cham/bin/deepspeed
54
+ _CE_CONDA=
55
+ _CE_M=
preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 512,
4
+ "width": 512
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 1.0,
13
+ 1.0,
14
+ 1.0
15
+ ],
16
+ "image_processor_type": "ChameleonImageProcessor",
17
+ "image_std": [
18
+ 1.0,
19
+ 1.0,
20
+ 1.0
21
+ ],
22
+ "processor_class": "ChameleonProcessor",
23
+ "resample": 1,
24
+ "rescale_factor": 0.0078,
25
+ "size": {
26
+ "shortest_edge": 512
27
+ }
28
+ }
processor_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 1024,
3
+ "image_token": "<image>",
4
+ "processor_class": "ChameleonProcessor"
5
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09edf08c05d71e0691a1ac9c5a04ec7962c0fc79490a3b5d6fe8aa79bcb87138
3
+ size 14165009930
script.sh ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Copyright 2024 PKU-Alignment Team. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # ==============================================================================
17
+
18
+ # sleep 15m
19
+
20
+ # Initialize variables
21
+ MODEL_NAME_OR_PATH="/data/align-anything/hantao/models/0830_4k_sft_flux"
22
+ TRAIN_DATASETS="/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs"
23
+ OUTPUT_DIR="../outputs/dpo_pickapic_40k"
24
+ # For wandb online logging
25
+ export WANDB_API_KEY="7e2dcc0c310ebcb7cdcafd5e9320d6be55cf1a33"
26
+ export WANDB_MODE=offline
27
+ # Source the setup script
28
+ source ./setup.sh
29
+
30
+ # Execute deepspeed command
31
+ deepspeed \
32
+ --master_port ${MASTER_PORT} \
33
+ --module align_anything.trainers.text_image_to_text_image.dpo \
34
+ --model_name_or_path ${MODEL_NAME_OR_PATH} \
35
+ --train_datasets ${TRAIN_DATASETS} \
36
+ --output_dir ${OUTPUT_DIR} \
37
+ --per_device_train_batch_size 4 \
38
+ --per_device_eval_batch_size 4 \
39
+ --gradient_accumulation_steps 2 \
40
+ --train_template Chameleon_preference \
41
+ --train_split train \
42
+ --train_data_files pickapic_40k.pt \
43
+ --learning_rate 5e-7 \
44
+ --epochs 3 \
45
+ --lr_scheduler_type cosine \
46
+ --save_interval 400
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "<reserved08706>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-26T12:47:21.807628078Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2025-01-26T12:47:21.807642964Z","level":"INFO","msg":"created symlink","path":"../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_124721-63zh6b52/logs/debug-core.log"}
3
+ {"time":"2025-01-26T12:47:21.818216674Z","level":"INFO","msg":"created new stream","id":"63zh6b52"}
4
+ {"time":"2025-01-26T12:47:21.818241372Z","level":"INFO","msg":"stream: started","id":"63zh6b52"}
5
+ {"time":"2025-01-26T12:47:21.818271255Z","level":"INFO","msg":"sender: started","stream_id":{"value":"63zh6b52"}}
6
+ {"time":"2025-01-26T12:47:21.818280379Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"63zh6b52"}}
7
+ {"time":"2025-01-26T12:47:21.818334818Z","level":"INFO","msg":"handler: started","stream_id":{"value":"63zh6b52"}}
8
+ {"time":"2025-01-26T12:47:21.834067258Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
9
+ {"time":"2025-01-26T12:47:21.837691453Z","level":"INFO","msg":"Starting system monitor"}
10
+ {"time":"2025-01-26T17:45:26.817146242Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2025-01-26T17:45:26.831287884Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2025-01-26T17:45:27.844713987Z","level":"INFO","msg":"stream: closing","id":"63zh6b52"}
13
+ {"time":"2025-01-26T17:45:27.84474064Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"63zh6b52"}}
14
+ {"time":"2025-01-26T17:45:27.844756097Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"63zh6b52"}}
15
+ {"time":"2025-01-26T17:45:27.844784263Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"63zh6b52"}}
16
+ {"time":"2025-01-26T17:45:27.847613043Z","level":"INFO","msg":"stream: closed","id":"63zh6b52"}
wandb/debug.log ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-26 12:47:21,735 INFO MainThread:849127 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Configure stats pid to 849127
3
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
4
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
5
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'offline'}
6
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'offline', '_disable_service': None}
7
+ 2025-01-26 12:47:21,736 WARNING MainThread:849127 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
8
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
9
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:_log_setup():532] Logging user logs to ../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_124721-63zh6b52/logs/debug.log
10
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:_log_setup():533] Logging internal logs to ../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_124721-63zh6b52/logs/debug-internal.log
11
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:init():617] calling init triggers
12
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
13
+ config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 5e-07, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'pickapic_40k.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/dpo_pickapic_40k', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/0830_4k_sft_flux', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
14
+ 2025-01-26 12:47:21,737 INFO MainThread:849127 [wandb_init.py:init():667] starting backend
15
+ 2025-01-26 12:47:21,737 INFO MainThread:849127 [wandb_init.py:init():671] sending inform_init request
16
+ 2025-01-26 12:47:21,744 INFO MainThread:849127 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-01-26 12:47:21,745 INFO MainThread:849127 [wandb_init.py:init():684] backend started and connected
18
+ 2025-01-26 12:47:21,747 INFO MainThread:849127 [wandb_init.py:init():779] updated telemetry
19
+ 2025-01-26 12:47:21,758 INFO MainThread:849127 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
20
+ 2025-01-26 12:47:21,829 INFO MainThread:849127 [wandb_init.py:init():863] starting run threads in backend
21
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_console_start():2465] atexit reg
22
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_redirect():2313] redirect: wrap_raw
23
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_redirect():2378] Wrapping output streams.
24
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_redirect():2403] Redirects installed.
25
+ 2025-01-26 12:47:22,202 INFO MainThread:849127 [wandb_init.py:init():907] run started, returning control to user process
26
+ 2025-01-26 17:45:26,813 INFO MainThread:849127 [wandb_run.py:_finish():2164] finishing run align-anything/63zh6b52
27
+ 2025-01-26 17:45:26,815 INFO MainThread:849127 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
28
+ 2025-01-26 17:45:26,816 INFO MainThread:849127 [wandb_run.py:_restore():2410] restore
29
+ 2025-01-26 17:45:26,816 INFO MainThread:849127 [wandb_run.py:_restore():2416] restore done
30
+ 2025-01-26 17:45:27,838 INFO MainThread:849127 [wandb_run.py:_footer_history_summary_info():4049] rendering history
31
+ 2025-01-26 17:45:27,841 INFO MainThread:849127 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
wandb/offline-run-20250126_113111-ux2nx7b1/files/output.log ADDED
File without changes
wandb/offline-run-20250126_113111-ux2nx7b1/files/requirements.txt ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ align-anything==0.0.1.dev0
2
+ uvloop==0.20.0
3
+ absl-py==2.1.0
4
+ term-image==0.7.2
5
+ distro==1.9.0
6
+ bitsandbytes==0.44.1
7
+ uvicorn==0.31.1
8
+ safetensors==0.4.5
9
+ gradio_client==1.4.0
10
+ propcache==0.2.0
11
+ GitPython==3.1.43
12
+ pyzmq==26.2.0
13
+ pyzmq==26.0.0
14
+ graze==0.1.27
15
+ sae-lens==3.20.0
16
+ nvidia-nvtx-cu12==12.1.105
17
+ einops==0.8.0
18
+ plotly==5.24.1
19
+ nvidia-cusolver-cu12==11.4.5.107
20
+ Pygments==2.15.1
21
+ Pygments==2.18.0
22
+ traitlets==5.14.3
23
+ psutil==5.9.0
24
+ psutil==6.0.0
25
+ wcwidth==0.2.5
26
+ wcwidth==0.2.13
27
+ six==1.16.0
28
+ smmap==5.0.1
29
+ python-dotenv==1.0.1
30
+ multiprocess==0.70.16
31
+ matplotlib-inline==0.1.6
32
+ matplotlib-inline==0.1.7
33
+ pyarrow==17.0.0
34
+ jupyter_client==8.6.0
35
+ nbclient==0.10.2
36
+ outlines_core==0.1.14
37
+ pytest==7.2.0
38
+ ptyprocess==0.7.0
39
+ lazy_loader==0.4
40
+ debugpy==1.8.11
41
+ asttokens==2.0.5
42
+ contourpy==1.3.0
43
+ better-abc==0.0.3
44
+ aiofiles==23.2.1
45
+ torchlibrosa==0.1.0
46
+ patsy==1.0.1
47
+ protobuf==3.20.3
48
+ nvidia-cuda-runtime-cu12==12.1.105
49
+ pycparser==2.22
50
+ PyYAML==6.0.2
51
+ hjson==3.1.0
52
+ xxhash==3.5.0
53
+ evaluate==0.4.3
54
+ beautifulsoup4==4.12.3
55
+ platformdirs==4.3.6
56
+ platformdirs==3.10.0
57
+ diskcache==5.6.3
58
+ fairscale==0.4.13
59
+ certifi==2024.8.30
60
+ docker-pycreds==0.4.0
61
+ braceexpand==0.1.7
62
+ virtualenv==20.26.6
63
+ pandocfilters==1.5.1
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ grpcio==1.66.2
66
+ yarl==1.15.0
67
+ aiohttp==3.10.10
68
+ referencing==0.35.1
69
+ fsspec==2024.6.1
70
+ nvidia-nccl-cu12==2.20.5
71
+ executing==0.8.3
72
+ Jinja2==3.1.4
73
+ timm==0.6.13
74
+ opencv-python==4.6.0.66
75
+ mmsg==0.1.dev20+g585c63a.d20241111
76
+ tomlkit==0.12.0
77
+ nbconvert==7.16.5
78
+ py2store==0.1.20
79
+ pandas==2.2.3
80
+ prompt-toolkit==3.0.43
81
+ identify==2.6.1
82
+ deepspeed==0.15.2
83
+ ray==2.37.0
84
+ align-anything==0.0.1.dev0
85
+ nvidia-cufft-cu12==11.0.2.54
86
+ datasets==2.21.0
87
+ mistral_common==1.4.4
88
+ httptools==0.6.1
89
+ scipy==1.14.1
90
+ ipykernel==6.29.5
91
+ mdurl==0.1.2
92
+ clip==0.2.0
93
+ cycler==0.12.1
94
+ pyairports==2.1.1
95
+ charset-normalizer==3.4.0
96
+ torch==2.4.0
97
+ mpmath==1.3.0
98
+ tzdata==2024.2
99
+ tokenizers==0.19.1
100
+ dol==0.3.6
101
+ transformer-lens==0.0.0
102
+ nest-asyncio==1.6.0
103
+ nbformat==5.10.4
104
+ bleach==6.2.0
105
+ sentencepiece==0.2.0
106
+ statsmodels==0.14.4
107
+ aiohappyeyeballs==2.4.3
108
+ fastjsonschema==2.21.1
109
+ tornado==6.4.2
110
+ ffmpy==0.4.0
111
+ nvidia-curand-cu12==10.3.2.106
112
+ kiwisolver==1.4.7
113
+ tenacity==9.0.0
114
+ audioread==3.0.1
115
+ cffi==1.17.1
116
+ clint==0.5.1
117
+ partial-json-parser==0.2.1.1.post4
118
+ dill==0.3.8
119
+ ninja==1.11.1.1
120
+ tqdm==4.66.5
121
+ jaxtyping==0.2.36
122
+ gitdb==4.0.11
123
+ jedi==0.19.2
124
+ regex==2024.9.11
125
+ nvidia-cusparse-cu12==12.1.0.106
126
+ aiosignal==1.3.1
127
+ jsonschema-specifications==2024.10.1
128
+ yt-dlp==2024.8.6
129
+ triton==3.0.0
130
+ pydub==0.25.1
131
+ nodeenv==1.9.1
132
+ pooch==1.8.2
133
+ MarkupSafe==2.1.5
134
+ fastapi==0.115.0
135
+ setproctitle==1.3.3
136
+ pycountry==24.6.1
137
+ anyio==4.6.0
138
+ matplotlib==3.9.2
139
+ config2py==0.1.36
140
+ diffusers==0.30.3
141
+ jupyterlab_pygments==0.3.0
142
+ librosa==0.10.2.post1
143
+ tiktoken==0.6.0
144
+ filelock==3.16.1
145
+ jiter==0.6.1
146
+ sentry-sdk==2.16.0
147
+ starlette==0.38.6
148
+ py-cpuinfo==9.0.0
149
+ typer==0.12.5
150
+ zipp==3.20.2
151
+ args==0.1.0
152
+ jsonschema==4.23.0
153
+ llvmlite==0.43.0
154
+ lxml==4.9.4
155
+ interegular==0.3.3
156
+ wheel==0.44.0
157
+ blobfile==2.1.1
158
+ frechet-audio-distance==0.1.2
159
+ pytz==2024.2
160
+ pytorch-fid==0.3.0
161
+ optree==0.13.0
162
+ lark==1.2.2
163
+ pytest-profiling==1.8.1
164
+ beartype==0.14.1
165
+ msgpack==1.1.0
166
+ prometheus_client==0.21.0
167
+ typing_extensions==4.12.2
168
+ mutagen==1.47.0
169
+ pexpect==4.8.0
170
+ pycryptodomex==3.21.0
171
+ fonttools==4.54.1
172
+ ftfy==6.3.0
173
+ stack-data==0.2.0
174
+ orjson==3.10.7
175
+ vllm==0.6.2
176
+ watchfiles==0.24.0
177
+ iniconfig==2.0.0
178
+ idna==3.10
179
+ proglog==0.1.10
180
+ sniffio==1.3.1
181
+ pyparsing==3.1.4
182
+ h11==0.14.0
183
+ networkx==3.4.1
184
+ importlib_resources==6.5.2
185
+ xformers==0.0.27.post2
186
+ hpsv2==1.2.0
187
+ pluggy==1.5.0
188
+ gguf==0.10.0
189
+ imageio==2.35.1
190
+ pure-eval==0.2.2
191
+ importlib_metadata==8.5.0
192
+ urllib3==2.2.3
193
+ nvidia-nvjitlink-cu12==12.6.77
194
+ airportsdata==20241001
195
+ semantic-version==2.10.0
196
+ fancy-einsum==0.0.3
197
+ typeguard==4.4.1
198
+ decorator==4.4.2
199
+ decorator==5.1.1
200
+ attrs==24.2.0
201
+ Brotli==1.1.0
202
+ numpy==1.26.4
203
+ soxr==0.5.0.post1
204
+ requests==2.32.3
205
+ tinycss2==1.4.0
206
+ nltk==3.9.1
207
+ pytest-split==0.8.0
208
+ httpcore==1.0.6
209
+ webdataset==0.2.100
210
+ rpds-py==0.20.0
211
+ shellingham==1.5.4
212
+ annotated-types==0.7.0
213
+ plotly-express==0.4.1
214
+ transformers==4.44.0.dev0
215
+ pillow==10.4.0
216
+ nvidia-ml-py==12.560.30
217
+ packaging==24.1
218
+ packaging==24.2
219
+ peft==0.13.2
220
+ imageio-ffmpeg==0.5.1
221
+ outlines==0.1.3
222
+ setuptools==75.1.0
223
+ pydantic==2.9.2
224
+ zstandard==0.22.0
225
+ defusedxml==0.7.1
226
+ cloudpickle==3.1.0
227
+ torchvision==0.19.0
228
+ threadpoolctl==3.5.0
229
+ soupsieve==2.6
230
+ cfgv==3.4.0
231
+ tensorboard==2.18.0
232
+ moviepy==1.0.3
233
+ nvidia-cuda-cupti-cu12==12.1.105
234
+ msgspec==0.18.6
235
+ logger==1.4
236
+ comm==0.2.1
237
+ Markdown==3.7
238
+ huggingface-hub==0.25.2
239
+ scikit-learn==1.5.2
240
+ distlib==0.3.9
241
+ shortuuid==1.0.13
242
+ nvidia-cublas-cu12==12.1.3.1
243
+ pip==24.2
244
+ image-reward==1.5
245
+ gprof2dot==2024.6.6
246
+ click==8.1.7
247
+ lm-format-enforcer==0.10.6
248
+ joblib==1.4.2
249
+ torchaudio==2.4.0
250
+ rich==13.9.2
251
+ resampy==0.4.3
252
+ numba==0.60.0
253
+ gradio==5.0.2
254
+ tensorboard-data-server==0.7.2
255
+ automated-interpretability==0.0.6
256
+ soundfile==0.12.1
257
+ multidict==6.1.0
258
+ wandb==0.18.3
259
+ openai==1.51.2
260
+ nvidia-cudnn-cu12==9.1.0.70
261
+ boostedblob==0.15.6
262
+ python-dateutil==2.9.0.post0
263
+ frozenlist==1.4.1
264
+ jupyter_core==5.7.2
265
+ python-multipart==0.0.12
266
+ Werkzeug==3.0.4
267
+ markdown-it-py==3.0.0
268
+ parso==0.8.4
269
+ pydantic_core==2.23.4
270
+ i2==0.1.45
271
+ ipython==8.30.0
272
+ prometheus-fastapi-instrumentator==7.0.0
273
+ mistune==3.1.0
274
+ sympy==1.13.3
275
+ accelerate==1.0.1
276
+ babe==0.0.7
277
+ httpx==0.27.2
278
+ webencodings==0.5.1
279
+ ruff==0.6.9
280
+ pre_commit==4.0.1
281
+ websockets==12.0
282
+ importlib_resources==6.4.0
283
+ packaging==24.1
284
+ typing_extensions==4.12.2
285
+ wheel==0.43.0
286
+ zipp==3.19.2
287
+ jaraco.text==3.12.1
288
+ inflect==7.3.1
289
+ more-itertools==10.3.0
290
+ autocommand==2.2.2
291
+ platformdirs==4.2.2
292
+ typeguard==4.3.0
293
+ jaraco.functools==4.0.1
294
+ jaraco.context==5.3.0
295
+ tomli==2.0.1
296
+ jaraco.collections==5.1.0
297
+ importlib_metadata==8.0.0
298
+ backports.tarfile==1.2.0
wandb/offline-run-20250126_113111-ux2nx7b1/files/wandb-metadata.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-204-generic-x86_64-with-glibc2.31",
3
+ "python": "3.11.11",
4
+ "startedAt": "2025-01-26T11:31:11.908125Z",
5
+ "args": [
6
+ "--local_rank=0",
7
+ "--model_name_or_path",
8
+ "/data/align-anything/hantao/models/0830_4k_sft_flux",
9
+ "--train_datasets",
10
+ "/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs",
11
+ "--output_dir",
12
+ "../outputs/dpo_pickapic_40k",
13
+ "--per_device_train_batch_size",
14
+ "4",
15
+ "--per_device_eval_batch_size",
16
+ "4",
17
+ "--gradient_accumulation_steps",
18
+ "2",
19
+ "--train_template",
20
+ "Chameleon_preference",
21
+ "--train_split",
22
+ "train",
23
+ "--train_data_files",
24
+ "pickapic_40k.pt",
25
+ "--learning_rate",
26
+ "5e-7",
27
+ "--epochs",
28
+ "3",
29
+ "--lr_scheduler_type",
30
+ "cosine",
31
+ "--save_interval",
32
+ "400"
33
+ ],
34
+ "program": "-m align_anything.trainers.text_image_to_text_image.dpo",
35
+ "git": {
36
+ "remote": "https://github.com/PKU-Alignment/align-anything.git",
37
+ "commit": "6fde660afc9985323f147930eedf188a5699adc7"
38
+ },
39
+ "root": "../outputs/dpo_pickapic_40k",
40
+ "host": "lyg0194",
41
+ "username": "align-anything",
42
+ "executable": "/data/align-anything/miniconda3/envs/hantao_cham/bin/python",
43
+ "cpu_count": 64,
44
+ "cpu_count_logical": 128,
45
+ "gpu": "[NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB]",
46
+ "gpu_count": 8,
47
+ "disk": {
48
+ "/": {
49
+ "total": "939477946368",
50
+ "used": "783301410816"
51
+ }
52
+ },
53
+ "memory": {
54
+ "total": "1081823932416"
55
+ },
56
+ "cpu": {
57
+ "count": 64,
58
+ "countLogical": 128
59
+ },
60
+ "gpu_nvidia": [
61
+ {
62
+ "name": "NVIDIA A100-SXM4-80GB",
63
+ "memoryTotal": "85899345920",
64
+ "cudaCores": 6912,
65
+ "architecture": "Ampere"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere"
72
+ },
73
+ {
74
+ "name": "NVIDIA A100-SXM4-80GB",
75
+ "memoryTotal": "85899345920",
76
+ "cudaCores": 6912,
77
+ "architecture": "Ampere"
78
+ },
79
+ {
80
+ "name": "NVIDIA A100-SXM4-80GB",
81
+ "memoryTotal": "85899345920",
82
+ "cudaCores": 6912,
83
+ "architecture": "Ampere"
84
+ },
85
+ {
86
+ "name": "NVIDIA A100-SXM4-80GB",
87
+ "memoryTotal": "85899345920",
88
+ "cudaCores": 6912,
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A100-SXM4-80GB",
93
+ "memoryTotal": "85899345920",
94
+ "cudaCores": 6912,
95
+ "architecture": "Ampere"
96
+ },
97
+ {
98
+ "name": "NVIDIA A100-SXM4-80GB",
99
+ "memoryTotal": "85899345920",
100
+ "cudaCores": 6912,
101
+ "architecture": "Ampere"
102
+ },
103
+ {
104
+ "name": "NVIDIA A100-SXM4-80GB",
105
+ "memoryTotal": "85899345920",
106
+ "cudaCores": 6912,
107
+ "architecture": "Ampere"
108
+ }
109
+ ],
110
+ "cudaVersion": "12.6"
111
+ }
wandb/offline-run-20250126_113111-ux2nx7b1/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-26T11:31:11.828607506Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpj1e4eh4p/port-828281.txt","pid":828281,"debug":false,"disable-analytics":false}
2
+ {"time":"2025-01-26T11:31:11.828667125Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2025-01-26T11:31:11.829545106Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":828281}
4
+ {"time":"2025-01-26T11:31:11.829817195Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":40111,"Zone":""}}
5
+ {"time":"2025-01-26T11:31:11.871501641Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:44254"}
6
+ {"time":"2025-01-26T11:31:11.916569353Z","level":"INFO","msg":"handleInformInit: received","streamId":"ux2nx7b1","id":"127.0.0.1:44254"}
7
+ {"time":"2025-01-26T11:31:12.038491861Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ux2nx7b1","id":"127.0.0.1:44254"}
8
+ {"time":"2025-01-26T11:32:19.852145609Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"ux2nx7b1","id":"127.0.0.1:44254"}
9
+ {"time":"2025-01-26T11:32:19.85427083Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"ux2nx7b1","id":"127.0.0.1:44254"}
10
+ {"time":"2025-01-26T11:32:20.195957638Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:44254"}
11
+ {"time":"2025-01-26T11:32:20.196022797Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:44254"}
12
+ {"time":"2025-01-26T11:32:20.19603868Z","level":"INFO","msg":"server is shutting down"}
13
+ {"time":"2025-01-26T11:32:20.196045403Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:44254"}
14
+ {"time":"2025-01-26T11:32:20.196198581Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:44254"}
15
+ {"time":"2025-01-26T11:32:20.19621631Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:44254"}
16
+ {"time":"2025-01-26T11:32:20.19623621Z","level":"INFO","msg":"server is closed"}
wandb/offline-run-20250126_113111-ux2nx7b1/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-26T11:31:12.010650203Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2025-01-26T11:31:12.010688718Z","level":"INFO","msg":"created symlink","path":"../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_113111-ux2nx7b1/logs/debug-core.log"}
3
+ {"time":"2025-01-26T11:31:12.038462746Z","level":"INFO","msg":"created new stream","id":"ux2nx7b1"}
4
+ {"time":"2025-01-26T11:31:12.038487893Z","level":"INFO","msg":"stream: started","id":"ux2nx7b1"}
5
+ {"time":"2025-01-26T11:31:12.03851056Z","level":"INFO","msg":"sender: started","stream_id":{"value":"ux2nx7b1"}}
6
+ {"time":"2025-01-26T11:31:12.038513668Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"ux2nx7b1"}}
7
+ {"time":"2025-01-26T11:31:12.038533311Z","level":"INFO","msg":"handler: started","stream_id":{"value":"ux2nx7b1"}}
8
+ {"time":"2025-01-26T11:31:12.053735689Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
9
+ {"time":"2025-01-26T11:31:12.058521123Z","level":"INFO","msg":"Starting system monitor"}
10
+ {"time":"2025-01-26T11:32:18.829306067Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2025-01-26T11:32:18.842646605Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2025-01-26T11:32:19.852250402Z","level":"INFO","msg":"stream: closing","id":"ux2nx7b1"}
13
+ {"time":"2025-01-26T11:32:19.852289448Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"ux2nx7b1"}}
14
+ {"time":"2025-01-26T11:32:19.852328944Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"ux2nx7b1"}}
15
+ {"time":"2025-01-26T11:32:19.852402516Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"ux2nx7b1"}}
16
+ {"time":"2025-01-26T11:32:19.85425144Z","level":"INFO","msg":"stream: closed","id":"ux2nx7b1"}
wandb/offline-run-20250126_113111-ux2nx7b1/logs/debug.log ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Configure stats pid to 828281
3
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
4
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
5
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'offline'}
6
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'offline', '_disable_service': None}
7
+ 2025-01-26 11:31:11,900 WARNING MainThread:828281 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
8
+ 2025-01-26 11:31:11,900 INFO MainThread:828281 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
9
+ 2025-01-26 11:31:11,901 INFO MainThread:828281 [wandb_init.py:_log_setup():532] Logging user logs to ../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_113111-ux2nx7b1/logs/debug.log
10
+ 2025-01-26 11:31:11,901 INFO MainThread:828281 [wandb_init.py:_log_setup():533] Logging internal logs to ../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_113111-ux2nx7b1/logs/debug-internal.log
11
+ 2025-01-26 11:31:11,901 INFO MainThread:828281 [wandb_init.py:init():617] calling init triggers
12
+ 2025-01-26 11:31:11,901 INFO MainThread:828281 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
13
+ config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 5e-07, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'pickapic_40k.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/dpo_pickapic_40k', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/0830_4k_sft_flux', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
14
+ 2025-01-26 11:31:11,901 INFO MainThread:828281 [wandb_init.py:init():667] starting backend
15
+ 2025-01-26 11:31:11,901 INFO MainThread:828281 [wandb_init.py:init():671] sending inform_init request
16
+ 2025-01-26 11:31:11,907 INFO MainThread:828281 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-01-26 11:31:11,907 INFO MainThread:828281 [wandb_init.py:init():684] backend started and connected
18
+ 2025-01-26 11:31:11,911 INFO MainThread:828281 [wandb_init.py:init():779] updated telemetry
19
+ 2025-01-26 11:31:11,927 INFO MainThread:828281 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
20
+ 2025-01-26 11:31:12,048 INFO MainThread:828281 [wandb_init.py:init():863] starting run threads in backend
21
+ 2025-01-26 11:31:12,828 INFO MainThread:828281 [wandb_run.py:_console_start():2465] atexit reg
22
+ 2025-01-26 11:31:12,828 INFO MainThread:828281 [wandb_run.py:_redirect():2313] redirect: wrap_raw
23
+ 2025-01-26 11:31:12,828 INFO MainThread:828281 [wandb_run.py:_redirect():2378] Wrapping output streams.
24
+ 2025-01-26 11:31:12,828 INFO MainThread:828281 [wandb_run.py:_redirect():2403] Redirects installed.
25
+ 2025-01-26 11:31:12,836 INFO MainThread:828281 [wandb_init.py:init():907] run started, returning control to user process
26
+ 2025-01-26 11:32:18,826 INFO MainThread:828281 [wandb_run.py:_finish():2164] finishing run align-anything/ux2nx7b1
27
+ 2025-01-26 11:32:18,828 INFO MainThread:828281 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
28
+ 2025-01-26 11:32:18,828 INFO MainThread:828281 [wandb_run.py:_restore():2410] restore
29
+ 2025-01-26 11:32:18,828 INFO MainThread:828281 [wandb_run.py:_restore():2416] restore done
30
+ 2025-01-26 11:32:19,848 INFO MainThread:828281 [wandb_run.py:_footer_history_summary_info():4049] rendering history
31
+ 2025-01-26 11:32:19,849 INFO MainThread:828281 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
wandb/offline-run-20250126_113111-ux2nx7b1/run-ux2nx7b1.wandb ADDED
Binary file (54.1 kB). View file
 
wandb/offline-run-20250126_124721-63zh6b52/files/output.log ADDED
File without changes
wandb/offline-run-20250126_124721-63zh6b52/files/requirements.txt ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ align-anything==0.0.1.dev0
2
+ uvloop==0.20.0
3
+ absl-py==2.1.0
4
+ term-image==0.7.2
5
+ distro==1.9.0
6
+ bitsandbytes==0.44.1
7
+ uvicorn==0.31.1
8
+ safetensors==0.4.5
9
+ gradio_client==1.4.0
10
+ propcache==0.2.0
11
+ GitPython==3.1.43
12
+ pyzmq==26.2.0
13
+ pyzmq==26.0.0
14
+ graze==0.1.27
15
+ sae-lens==3.20.0
16
+ nvidia-nvtx-cu12==12.1.105
17
+ einops==0.8.0
18
+ plotly==5.24.1
19
+ nvidia-cusolver-cu12==11.4.5.107
20
+ Pygments==2.15.1
21
+ Pygments==2.18.0
22
+ traitlets==5.14.3
23
+ psutil==5.9.0
24
+ psutil==6.0.0
25
+ wcwidth==0.2.5
26
+ wcwidth==0.2.13
27
+ six==1.16.0
28
+ smmap==5.0.1
29
+ python-dotenv==1.0.1
30
+ multiprocess==0.70.16
31
+ matplotlib-inline==0.1.6
32
+ matplotlib-inline==0.1.7
33
+ pyarrow==17.0.0
34
+ jupyter_client==8.6.0
35
+ nbclient==0.10.2
36
+ outlines_core==0.1.14
37
+ pytest==7.2.0
38
+ ptyprocess==0.7.0
39
+ lazy_loader==0.4
40
+ debugpy==1.8.11
41
+ asttokens==2.0.5
42
+ contourpy==1.3.0
43
+ better-abc==0.0.3
44
+ aiofiles==23.2.1
45
+ torchlibrosa==0.1.0
46
+ patsy==1.0.1
47
+ protobuf==3.20.3
48
+ nvidia-cuda-runtime-cu12==12.1.105
49
+ pycparser==2.22
50
+ PyYAML==6.0.2
51
+ hjson==3.1.0
52
+ xxhash==3.5.0
53
+ evaluate==0.4.3
54
+ beautifulsoup4==4.12.3
55
+ platformdirs==4.3.6
56
+ platformdirs==3.10.0
57
+ diskcache==5.6.3
58
+ fairscale==0.4.13
59
+ certifi==2024.8.30
60
+ docker-pycreds==0.4.0
61
+ braceexpand==0.1.7
62
+ virtualenv==20.26.6
63
+ pandocfilters==1.5.1
64
+ nvidia-cuda-nvrtc-cu12==12.1.105
65
+ grpcio==1.66.2
66
+ yarl==1.15.0
67
+ aiohttp==3.10.10
68
+ referencing==0.35.1
69
+ fsspec==2024.6.1
70
+ nvidia-nccl-cu12==2.20.5
71
+ executing==0.8.3
72
+ Jinja2==3.1.4
73
+ timm==0.6.13
74
+ opencv-python==4.6.0.66
75
+ mmsg==0.1.dev20+g585c63a.d20241111
76
+ tomlkit==0.12.0
77
+ nbconvert==7.16.5
78
+ py2store==0.1.20
79
+ pandas==2.2.3
80
+ prompt-toolkit==3.0.43
81
+ identify==2.6.1
82
+ deepspeed==0.15.2
83
+ ray==2.37.0
84
+ align-anything==0.0.1.dev0
85
+ nvidia-cufft-cu12==11.0.2.54
86
+ datasets==2.21.0
87
+ mistral_common==1.4.4
88
+ httptools==0.6.1
89
+ scipy==1.14.1
90
+ ipykernel==6.29.5
91
+ mdurl==0.1.2
92
+ clip==0.2.0
93
+ cycler==0.12.1
94
+ pyairports==2.1.1
95
+ charset-normalizer==3.4.0
96
+ torch==2.4.0
97
+ mpmath==1.3.0
98
+ tzdata==2024.2
99
+ tokenizers==0.19.1
100
+ dol==0.3.6
101
+ transformer-lens==0.0.0
102
+ nest-asyncio==1.6.0
103
+ nbformat==5.10.4
104
+ bleach==6.2.0
105
+ sentencepiece==0.2.0
106
+ statsmodels==0.14.4
107
+ aiohappyeyeballs==2.4.3
108
+ fastjsonschema==2.21.1
109
+ tornado==6.4.2
110
+ ffmpy==0.4.0
111
+ nvidia-curand-cu12==10.3.2.106
112
+ kiwisolver==1.4.7
113
+ tenacity==9.0.0
114
+ audioread==3.0.1
115
+ cffi==1.17.1
116
+ clint==0.5.1
117
+ partial-json-parser==0.2.1.1.post4
118
+ dill==0.3.8
119
+ ninja==1.11.1.1
120
+ tqdm==4.66.5
121
+ jaxtyping==0.2.36
122
+ gitdb==4.0.11
123
+ jedi==0.19.2
124
+ regex==2024.9.11
125
+ nvidia-cusparse-cu12==12.1.0.106
126
+ aiosignal==1.3.1
127
+ jsonschema-specifications==2024.10.1
128
+ yt-dlp==2024.8.6
129
+ triton==3.0.0
130
+ pydub==0.25.1
131
+ nodeenv==1.9.1
132
+ pooch==1.8.2
133
+ MarkupSafe==2.1.5
134
+ fastapi==0.115.0
135
+ setproctitle==1.3.3
136
+ pycountry==24.6.1
137
+ anyio==4.6.0
138
+ matplotlib==3.9.2
139
+ config2py==0.1.36
140
+ diffusers==0.30.3
141
+ jupyterlab_pygments==0.3.0
142
+ librosa==0.10.2.post1
143
+ tiktoken==0.6.0
144
+ filelock==3.16.1
145
+ jiter==0.6.1
146
+ sentry-sdk==2.16.0
147
+ starlette==0.38.6
148
+ py-cpuinfo==9.0.0
149
+ typer==0.12.5
150
+ zipp==3.20.2
151
+ args==0.1.0
152
+ jsonschema==4.23.0
153
+ llvmlite==0.43.0
154
+ lxml==4.9.4
155
+ interegular==0.3.3
156
+ wheel==0.44.0
157
+ blobfile==2.1.1
158
+ frechet-audio-distance==0.1.2
159
+ pytz==2024.2
160
+ pytorch-fid==0.3.0
161
+ optree==0.13.0
162
+ lark==1.2.2
163
+ pytest-profiling==1.8.1
164
+ beartype==0.14.1
165
+ msgpack==1.1.0
166
+ prometheus_client==0.21.0
167
+ typing_extensions==4.12.2
168
+ mutagen==1.47.0
169
+ pexpect==4.8.0
170
+ pycryptodomex==3.21.0
171
+ fonttools==4.54.1
172
+ ftfy==6.3.0
173
+ stack-data==0.2.0
174
+ orjson==3.10.7
175
+ vllm==0.6.2
176
+ watchfiles==0.24.0
177
+ iniconfig==2.0.0
178
+ idna==3.10
179
+ proglog==0.1.10
180
+ sniffio==1.3.1
181
+ pyparsing==3.1.4
182
+ h11==0.14.0
183
+ networkx==3.4.1
184
+ importlib_resources==6.5.2
185
+ xformers==0.0.27.post2
186
+ hpsv2==1.2.0
187
+ pluggy==1.5.0
188
+ gguf==0.10.0
189
+ imageio==2.35.1
190
+ pure-eval==0.2.2
191
+ importlib_metadata==8.5.0
192
+ urllib3==2.2.3
193
+ nvidia-nvjitlink-cu12==12.6.77
194
+ airportsdata==20241001
195
+ semantic-version==2.10.0
196
+ fancy-einsum==0.0.3
197
+ typeguard==4.4.1
198
+ decorator==4.4.2
199
+ decorator==5.1.1
200
+ attrs==24.2.0
201
+ Brotli==1.1.0
202
+ numpy==1.26.4
203
+ soxr==0.5.0.post1
204
+ requests==2.32.3
205
+ tinycss2==1.4.0
206
+ nltk==3.9.1
207
+ pytest-split==0.8.0
208
+ httpcore==1.0.6
209
+ webdataset==0.2.100
210
+ rpds-py==0.20.0
211
+ shellingham==1.5.4
212
+ annotated-types==0.7.0
213
+ plotly-express==0.4.1
214
+ transformers==4.44.0.dev0
215
+ pillow==10.4.0
216
+ nvidia-ml-py==12.560.30
217
+ packaging==24.1
218
+ packaging==24.2
219
+ peft==0.13.2
220
+ imageio-ffmpeg==0.5.1
221
+ outlines==0.1.3
222
+ setuptools==75.1.0
223
+ pydantic==2.9.2
224
+ zstandard==0.22.0
225
+ defusedxml==0.7.1
226
+ cloudpickle==3.1.0
227
+ torchvision==0.19.0
228
+ threadpoolctl==3.5.0
229
+ soupsieve==2.6
230
+ cfgv==3.4.0
231
+ tensorboard==2.18.0
232
+ moviepy==1.0.3
233
+ nvidia-cuda-cupti-cu12==12.1.105
234
+ msgspec==0.18.6
235
+ logger==1.4
236
+ comm==0.2.1
237
+ Markdown==3.7
238
+ huggingface-hub==0.25.2
239
+ scikit-learn==1.5.2
240
+ distlib==0.3.9
241
+ shortuuid==1.0.13
242
+ nvidia-cublas-cu12==12.1.3.1
243
+ pip==24.2
244
+ image-reward==1.5
245
+ gprof2dot==2024.6.6
246
+ click==8.1.7
247
+ lm-format-enforcer==0.10.6
248
+ joblib==1.4.2
249
+ torchaudio==2.4.0
250
+ rich==13.9.2
251
+ resampy==0.4.3
252
+ numba==0.60.0
253
+ gradio==5.0.2
254
+ tensorboard-data-server==0.7.2
255
+ automated-interpretability==0.0.6
256
+ soundfile==0.12.1
257
+ multidict==6.1.0
258
+ wandb==0.18.3
259
+ openai==1.51.2
260
+ nvidia-cudnn-cu12==9.1.0.70
261
+ boostedblob==0.15.6
262
+ python-dateutil==2.9.0.post0
263
+ frozenlist==1.4.1
264
+ jupyter_core==5.7.2
265
+ python-multipart==0.0.12
266
+ Werkzeug==3.0.4
267
+ markdown-it-py==3.0.0
268
+ parso==0.8.4
269
+ pydantic_core==2.23.4
270
+ i2==0.1.45
271
+ ipython==8.30.0
272
+ prometheus-fastapi-instrumentator==7.0.0
273
+ mistune==3.1.0
274
+ sympy==1.13.3
275
+ accelerate==1.0.1
276
+ babe==0.0.7
277
+ httpx==0.27.2
278
+ webencodings==0.5.1
279
+ ruff==0.6.9
280
+ pre_commit==4.0.1
281
+ websockets==12.0
282
+ importlib_resources==6.4.0
283
+ packaging==24.1
284
+ typing_extensions==4.12.2
285
+ wheel==0.43.0
286
+ zipp==3.19.2
287
+ jaraco.text==3.12.1
288
+ inflect==7.3.1
289
+ more-itertools==10.3.0
290
+ autocommand==2.2.2
291
+ platformdirs==4.2.2
292
+ typeguard==4.3.0
293
+ jaraco.functools==4.0.1
294
+ jaraco.context==5.3.0
295
+ tomli==2.0.1
296
+ jaraco.collections==5.1.0
297
+ importlib_metadata==8.0.0
298
+ backports.tarfile==1.2.0
wandb/offline-run-20250126_124721-63zh6b52/files/wandb-metadata.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.4.0-204-generic-x86_64-with-glibc2.31",
3
+ "python": "3.11.11",
4
+ "startedAt": "2025-01-26T12:47:21.745210Z",
5
+ "args": [
6
+ "--local_rank=0",
7
+ "--model_name_or_path",
8
+ "/data/align-anything/hantao/models/0830_4k_sft_flux",
9
+ "--train_datasets",
10
+ "/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs",
11
+ "--output_dir",
12
+ "../outputs/dpo_pickapic_40k",
13
+ "--per_device_train_batch_size",
14
+ "4",
15
+ "--per_device_eval_batch_size",
16
+ "4",
17
+ "--gradient_accumulation_steps",
18
+ "2",
19
+ "--train_template",
20
+ "Chameleon_preference",
21
+ "--train_split",
22
+ "train",
23
+ "--train_data_files",
24
+ "pickapic_40k.pt",
25
+ "--learning_rate",
26
+ "5e-7",
27
+ "--epochs",
28
+ "3",
29
+ "--lr_scheduler_type",
30
+ "cosine",
31
+ "--save_interval",
32
+ "400"
33
+ ],
34
+ "program": "-m align_anything.trainers.text_image_to_text_image.dpo",
35
+ "git": {
36
+ "remote": "https://github.com/PKU-Alignment/align-anything.git",
37
+ "commit": "6fde660afc9985323f147930eedf188a5699adc7"
38
+ },
39
+ "root": "../outputs/dpo_pickapic_40k",
40
+ "host": "lyg0194",
41
+ "username": "align-anything",
42
+ "executable": "/data/align-anything/miniconda3/envs/hantao_cham/bin/python",
43
+ "cpu_count": 64,
44
+ "cpu_count_logical": 128,
45
+ "gpu": "[NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB, NVIDIA A100-SXM4-80GB]",
46
+ "gpu_count": 8,
47
+ "disk": {
48
+ "/": {
49
+ "total": "939477946368",
50
+ "used": "783301562368"
51
+ }
52
+ },
53
+ "memory": {
54
+ "total": "1081823932416"
55
+ },
56
+ "cpu": {
57
+ "count": 64,
58
+ "countLogical": 128
59
+ },
60
+ "gpu_nvidia": [
61
+ {
62
+ "name": "NVIDIA A100-SXM4-80GB",
63
+ "memoryTotal": "85899345920",
64
+ "cudaCores": 6912,
65
+ "architecture": "Ampere"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere"
72
+ },
73
+ {
74
+ "name": "NVIDIA A100-SXM4-80GB",
75
+ "memoryTotal": "85899345920",
76
+ "cudaCores": 6912,
77
+ "architecture": "Ampere"
78
+ },
79
+ {
80
+ "name": "NVIDIA A100-SXM4-80GB",
81
+ "memoryTotal": "85899345920",
82
+ "cudaCores": 6912,
83
+ "architecture": "Ampere"
84
+ },
85
+ {
86
+ "name": "NVIDIA A100-SXM4-80GB",
87
+ "memoryTotal": "85899345920",
88
+ "cudaCores": 6912,
89
+ "architecture": "Ampere"
90
+ },
91
+ {
92
+ "name": "NVIDIA A100-SXM4-80GB",
93
+ "memoryTotal": "85899345920",
94
+ "cudaCores": 6912,
95
+ "architecture": "Ampere"
96
+ },
97
+ {
98
+ "name": "NVIDIA A100-SXM4-80GB",
99
+ "memoryTotal": "85899345920",
100
+ "cudaCores": 6912,
101
+ "architecture": "Ampere"
102
+ },
103
+ {
104
+ "name": "NVIDIA A100-SXM4-80GB",
105
+ "memoryTotal": "85899345920",
106
+ "cudaCores": 6912,
107
+ "architecture": "Ampere"
108
+ }
109
+ ],
110
+ "cudaVersion": "12.6"
111
+ }
wandb/offline-run-20250126_124721-63zh6b52/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-26T12:47:21.51662411Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpvwlkiv2l/port-849127.txt","pid":849127,"debug":false,"disable-analytics":false}
2
+ {"time":"2025-01-26T12:47:21.516663849Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2025-01-26T12:47:21.517359984Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":849127}
4
+ {"time":"2025-01-26T12:47:21.517346119Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37123,"Zone":""}}
5
+ {"time":"2025-01-26T12:47:21.707954103Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:45908"}
6
+ {"time":"2025-01-26T12:47:21.7462694Z","level":"INFO","msg":"handleInformInit: received","streamId":"63zh6b52","id":"127.0.0.1:45908"}
7
+ {"time":"2025-01-26T12:47:21.818245429Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"63zh6b52","id":"127.0.0.1:45908"}
8
+ {"time":"2025-01-26T17:45:27.844462641Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"63zh6b52","id":"127.0.0.1:45908"}
9
+ {"time":"2025-01-26T17:45:27.84762455Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"63zh6b52","id":"127.0.0.1:45908"}
10
+ {"time":"2025-01-26T17:45:28.170588957Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:45908"}
11
+ {"time":"2025-01-26T17:45:28.170611834Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:45908"}
12
+ {"time":"2025-01-26T17:45:28.170618277Z","level":"INFO","msg":"server is shutting down"}
13
+ {"time":"2025-01-26T17:45:28.170635524Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:45908"}
14
+ {"time":"2025-01-26T17:45:28.170765916Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:45908"}
15
+ {"time":"2025-01-26T17:45:28.170855331Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:45908"}
16
+ {"time":"2025-01-26T17:45:28.170876583Z","level":"INFO","msg":"server is closed"}
wandb/offline-run-20250126_124721-63zh6b52/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-01-26T12:47:21.807628078Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2025-01-26T12:47:21.807642964Z","level":"INFO","msg":"created symlink","path":"../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_124721-63zh6b52/logs/debug-core.log"}
3
+ {"time":"2025-01-26T12:47:21.818216674Z","level":"INFO","msg":"created new stream","id":"63zh6b52"}
4
+ {"time":"2025-01-26T12:47:21.818241372Z","level":"INFO","msg":"stream: started","id":"63zh6b52"}
5
+ {"time":"2025-01-26T12:47:21.818271255Z","level":"INFO","msg":"sender: started","stream_id":{"value":"63zh6b52"}}
6
+ {"time":"2025-01-26T12:47:21.818280379Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"63zh6b52"}}
7
+ {"time":"2025-01-26T12:47:21.818334818Z","level":"INFO","msg":"handler: started","stream_id":{"value":"63zh6b52"}}
8
+ {"time":"2025-01-26T12:47:21.834067258Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
9
+ {"time":"2025-01-26T12:47:21.837691453Z","level":"INFO","msg":"Starting system monitor"}
10
+ {"time":"2025-01-26T17:45:26.817146242Z","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2025-01-26T17:45:26.831287884Z","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2025-01-26T17:45:27.844713987Z","level":"INFO","msg":"stream: closing","id":"63zh6b52"}
13
+ {"time":"2025-01-26T17:45:27.84474064Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"63zh6b52"}}
14
+ {"time":"2025-01-26T17:45:27.844756097Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"63zh6b52"}}
15
+ {"time":"2025-01-26T17:45:27.844784263Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"63zh6b52"}}
16
+ {"time":"2025-01-26T17:45:27.847613043Z","level":"INFO","msg":"stream: closed","id":"63zh6b52"}
wandb/offline-run-20250126_124721-63zh6b52/logs/debug.log ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-01-26 12:47:21,735 INFO MainThread:849127 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Configure stats pid to 849127
3
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Loading settings from /home/align-anything/.config/wandb/settings
4
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Loading settings from /data/align-anything/hantao/align-anything/scripts/wandb/settings
5
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***', 'mode': 'offline'}
6
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'offline', '_disable_service': None}
7
+ 2025-01-26 12:47:21,736 WARNING MainThread:849127 [wandb_setup.py:_flush():79] Could not find program at -m align_anything.trainers.text_image_to_text_image.dpo
8
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m align_anything.trainers.text_image_to_text_image.dpo'}
9
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:_log_setup():532] Logging user logs to ../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_124721-63zh6b52/logs/debug.log
10
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:_log_setup():533] Logging internal logs to ../outputs/dpo_pickapic_40k/wandb/offline-run-20250126_124721-63zh6b52/logs/debug-internal.log
11
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:init():617] calling init triggers
12
+ 2025-01-26 12:47:21,736 INFO MainThread:849127 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
13
+ config: {'train_cfgs': {'ds_cfgs': 'ds_z3_config.json', 'epochs': 3.0, 'seed': 42, 'per_device_train_batch_size': 4.0, 'per_device_eval_batch_size': 4.0, 'gradient_accumulation_steps': 2.0, 'gradient_checkpointing': True, 'learning_rate': 5e-07, 'lr_scheduler_type': 'cosine', 'lr_warmup_ratio': 0.03, 'weight_decay': 0.01, 'adam_betas': [0.9, 0.95], 'bf16': True, 'fp16': False, 'eval_strategy': 'epoch', 'eval_interval': 10, 'regularization': 0.001, 'scale_coeff': 0.1, 'freeze_mm_proj': True, 'freeze_vision_tower': False, 'freeze_language_model': True}, 'data_cfgs': {'train_datasets': '/data/align-anything/hantao/align-anything/projects/text_image_to_text_image/outputs', 'train_template': 'Chameleon_preference', 'train_size': None, 'train_split': 'train', 'train_subset': None, 'train_data_files': 'pickapic_40k.pt', 'train_optional_args': [], 'eval_datasets': None, 'eval_template': None, 'eval_size': None, 'eval_split': None, 'eval_subset': None, 'eval_data_files': None, 'eval_optional_args': []}, 'logger_cfgs': {'log_type': 'wandb', 'log_project': 'align-anything', 'log_run_name': 'dpo', 'output_dir': '../outputs/dpo_pickapic_40k', 'cache_dir': None, 'save_interval': 400.0}, 'model_cfgs': {'model_name_or_path': '/data/align-anything/hantao/models/0830_4k_sft_flux', 'trust_remote_code': True, 'model_max_length': 4096}, 'special_tokens': None}
14
+ 2025-01-26 12:47:21,737 INFO MainThread:849127 [wandb_init.py:init():667] starting backend
15
+ 2025-01-26 12:47:21,737 INFO MainThread:849127 [wandb_init.py:init():671] sending inform_init request
16
+ 2025-01-26 12:47:21,744 INFO MainThread:849127 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-01-26 12:47:21,745 INFO MainThread:849127 [wandb_init.py:init():684] backend started and connected
18
+ 2025-01-26 12:47:21,747 INFO MainThread:849127 [wandb_init.py:init():779] updated telemetry
19
+ 2025-01-26 12:47:21,758 INFO MainThread:849127 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
20
+ 2025-01-26 12:47:21,829 INFO MainThread:849127 [wandb_init.py:init():863] starting run threads in backend
21
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_console_start():2465] atexit reg
22
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_redirect():2313] redirect: wrap_raw
23
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_redirect():2378] Wrapping output streams.
24
+ 2025-01-26 12:47:22,199 INFO MainThread:849127 [wandb_run.py:_redirect():2403] Redirects installed.
25
+ 2025-01-26 12:47:22,202 INFO MainThread:849127 [wandb_init.py:init():907] run started, returning control to user process
26
+ 2025-01-26 17:45:26,813 INFO MainThread:849127 [wandb_run.py:_finish():2164] finishing run align-anything/63zh6b52
27
+ 2025-01-26 17:45:26,815 INFO MainThread:849127 [wandb_run.py:_atexit_cleanup():2428] got exitcode: 0
28
+ 2025-01-26 17:45:26,816 INFO MainThread:849127 [wandb_run.py:_restore():2410] restore
29
+ 2025-01-26 17:45:26,816 INFO MainThread:849127 [wandb_run.py:_restore():2416] restore done
30
+ 2025-01-26 17:45:27,838 INFO MainThread:849127 [wandb_run.py:_footer_history_summary_info():4049] rendering history
31
+ 2025-01-26 17:45:27,841 INFO MainThread:849127 [wandb_run.py:_footer_history_summary_info():4081] rendering summary
wandb/offline-run-20250126_124721-63zh6b52/run-63zh6b52.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c8906c1bcf8f2447b0a973ae3a8cbce64961cc652d034560a502552c7b7dafd
3
+ size 20321740