eltorio
commited on
Commit
·
1392114
1
Parent(s):
2b6a5b3
refactor docker image
Browse files- Dockerfile +27 -10
- learn.py +2 -1
- preload.py +1 -0
- start.sh +19 -3
Dockerfile
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
-
# build with: docker build . --tag sctg/roco-idefics3:0.0.
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
COPY --chmod=777 start.sh /start.sh
|
| 9 |
COPY learn.py /learn.py
|
| 10 |
COPY preload.py /preload.py
|
|
@@ -12,6 +14,21 @@ COPY preload.py /preload.py
|
|
| 12 |
USER root
|
| 13 |
RUN chown -R 42420:42420 /workspace
|
| 14 |
USER 42420
|
| 15 |
-
RUN
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# build with: docker build . --tag sctg/roco-idefics3:0.0.4 --tag sctg/roco-idefics3:latest --push
|
| 2 |
+
# run with
|
| 3 |
+
# docker run --gpus all --user=42420:42420 -e HF_TOKEN=hf_TOKEN -it sctg/roco-idefics3:0.0.4 bash -i /start.sh hf_TOKEN
|
| 4 |
+
FROM nvidia/cuda:11.6.1-devel-ubuntu20.04
|
| 5 |
+
# FROM nvidia/cuda:11.0.3-devel-ubuntu20.04
|
| 6 |
+
# RUN mkdir -p /workspace
|
| 7 |
+
RUN /usr/sbin/addgroup --gid 42420 ovh
|
| 8 |
+
RUN /usr/sbin/useradd -u 42420 --gid 42420 -m -d /workspace -s /bin/bash ovh
|
| 9 |
+
RUN apt update -y && apt-get install -y curl git git-lfs screen
|
| 10 |
COPY --chmod=777 start.sh /start.sh
|
| 11 |
COPY learn.py /learn.py
|
| 12 |
COPY preload.py /preload.py
|
|
|
|
| 14 |
USER root
|
| 15 |
RUN chown -R 42420:42420 /workspace
|
| 16 |
USER 42420
|
| 17 |
+
RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > /workspace/miniconda.sh
|
| 18 |
+
RUN /bin/bash /workspace/miniconda.sh -b -p /workspace/.miniconda3
|
| 19 |
+
RUN . /workspace/.miniconda3/bin/activate && conda init --all
|
| 20 |
+
RUN . /workspace/.miniconda3/bin/activate \
|
| 21 |
+
&& pip install -U "safetensors>=0.4.5" \
|
| 22 |
+
&& pip install -U "https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-0.44.2.dev0-py3-none-manylinux_2_24_x86_64.whl" \
|
| 23 |
+
&& pip install -U git+https://github.com/huggingface/transformers.git\
|
| 24 |
+
&& pip install huggingface_hub[cli] accelerate datasets peft\
|
| 25 |
+
&& pip install -U Pillow
|
| 26 |
+
RUN . /workspace/.miniconda3/bin/activate && conda install -y jupyter
|
| 27 |
+
# Mandatory to run the jobs in rootless mode
|
| 28 |
+
# USER root
|
| 29 |
+
# RUN chown -R 42420:42420 /workspace
|
| 30 |
+
USER 42420
|
| 31 |
+
WORKDIR /workspace
|
| 32 |
+
# RUN export HOME=/workspace && cd /workspace && . /workspace/.miniconda3/bin/activate \
|
| 33 |
+
# && mkdir -p /workspace/data \
|
| 34 |
+
# && python /preload.py
|
learn.py
CHANGED
|
@@ -148,4 +148,5 @@ trainer = Trainer(
|
|
| 148 |
train_dataset = train_dataset,
|
| 149 |
)
|
| 150 |
|
| 151 |
-
trainer.train()
|
|
|
|
|
|
| 148 |
train_dataset = train_dataset,
|
| 149 |
)
|
| 150 |
|
| 151 |
+
trainer.train()
|
| 152 |
+
|
preload.py
CHANGED
|
@@ -3,4 +3,5 @@
|
|
| 3 |
# License: Apache License 2.0
|
| 4 |
from datasets import load_dataset
|
| 5 |
dataset_id = "eltorio/ROCO-radiology"
|
|
|
|
| 6 |
train_dataset = load_dataset(dataset_id, split="train", cache_dir=cache_dir)
|
|
|
|
| 3 |
# License: Apache License 2.0
|
| 4 |
from datasets import load_dataset
|
| 5 |
dataset_id = "eltorio/ROCO-radiology"
|
| 6 |
+
cache_dir = "/workspace/data"
|
| 7 |
train_dataset = load_dataset(dataset_id, split="train", cache_dir=cache_dir)
|
start.sh
CHANGED
|
@@ -1,10 +1,26 @@
|
|
| 1 |
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
cd /workspace
|
| 3 |
-
git config --global credential.helper store
|
| 4 |
git lfs install
|
| 5 |
export HF_TOKEN=$1
|
|
|
|
| 6 |
echo "HF_TOKEN: $HF_TOKEN"
|
| 7 |
-
|
| 8 |
-
git clone https://huggingface.co/eltorio/IDEFICS3_ROCO
|
| 9 |
. /workspace/.miniconda3/bin/activate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
python /learn.py
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
JOB_URL_SCHEME=${JOB_URL_SCHEME:-"http://"}
|
| 3 |
+
JOB_ID=${JOB_ID:-'localhost'}
|
| 4 |
+
JOB_HOST=${JOB_HOST:-'local'}
|
| 5 |
+
export HOME=/workspace
|
| 6 |
cd /workspace
|
|
|
|
| 7 |
git lfs install
|
| 8 |
export HF_TOKEN=$1
|
| 9 |
+
unset $1
|
| 10 |
echo "HF_TOKEN: $HF_TOKEN"
|
| 11 |
+
. /workspace/.bashrc
|
|
|
|
| 12 |
. /workspace/.miniconda3/bin/activate
|
| 13 |
+
git clone https://huggingface.co/eltorio/IDEFICS3_ROCO
|
| 14 |
+
git config --global credential.helper store
|
| 15 |
+
|
| 16 |
+
huggingface-cli login --add-to-git-credential --token $HF_TOKEN
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
jupyter lab --ip=0.0.0.0 --port=8080 --no-browser --allow-root \
|
| 20 |
+
--notebook-dir=/workspace \
|
| 21 |
+
--LabApp.token='' \
|
| 22 |
+
--LabApp.custom_display_url=${JOB_URL_SCHEME}${JOB_ID}-8080.${JOB_HOST} \
|
| 23 |
+
--LabApp.allow_remote_access=True \
|
| 24 |
+
--LabApp.allow_origin='*' \
|
| 25 |
+
--LabApp.disable_check_xsrf=True &
|
| 26 |
python /learn.py
|