Spaces:

AIEnergyScore
/

BulkCalcSpace

Paused

App Files Files Community

meg HF Staff commited on Dec 14, 2024

Commit

13c6f75

verified ·

1 Parent(s): 21e1e2b

Initial commit

Browse files

Files changed (11) hide show

Dockerfile +79 -0
app.py +21 -0
attempts.txt +0 -0
change_hardware.py +48 -0
entrypoint.sh +36 -0
failed_attempts.txt +0 -0
hardware.txt +1 -0
models.txt +1 -0
pause_space.py +7 -0
requirements.txt +10 -0
tasks.txt +1 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,79 @@

+FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
+ARG PYTORCH_VERSION=2.4.0
+ARG PYTHON_VERSION=3.9
+ARG CUDA_VERSION=12.1
+ARG MAMBA_VERSION=24.3.0-0
+ARG CUDA_CHANNEL=nvidia
+ARG INSTALL_CHANNEL=pytorch
+# Automatically set by buildx
+ARG TARGETPLATFORM
+#ENV HOME=/home/user \
+#	PATH=/home/user/.local/bin:/opt/conda/bin:$PATH
+ENV PATH=/opt/conda/bin:$PATH
+RUN mkdir -p .cache
+#RUN mkdir -p data
+# I'm not sure how to allow later python files used here to write to .cache without making it world-writeable.
+RUN chmod 777 -R .cache
+#RUN chmod 777 -R data
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        build-essential \
+        ca-certificates \
+        ccache \
+        curl \
+        python3 \
+        python3-pip \
+        git && \
+        rm -rf /var/lib/apt/lists/*
+# Install conda
+# translating Docker's TARGETPLATFORM into mamba arches
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
+         *)              MAMBA_ARCH=x86_64   ;; \
+    esac && \
+    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
+RUN chmod +x ~/mambaforge.sh && \
+    bash ~/mambaforge.sh -b -p /opt/conda && \
+    rm ~/mambaforge.sh
+# Install pytorch
+# On arm64 we exit with an error code
+RUN case ${TARGETPLATFORM} in \
+         "linux/arm64")  exit 1 ;; \
+         *)              /opt/conda/bin/conda update -y conda &&  \
+                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
+    esac && \
+    /opt/conda/bin/conda clean -ya
+COPY ./requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+RUN git clone -b energy_star_dev https://github.com/huggingface/optimum-benchmark.git /optimum-benchmark && cd optimum-benchmark && pip install -e .
+COPY ./*.txt /
+COPY ./.cache /.cache
+COPY ./entrypoint.sh /entrypoint.sh
+COPY ./pause_space.py /pause_space.py
+COPY ./parse_requests.py /parse_requests.py
+COPY ./process_runs.py /process_runs.py
+COPY ./app/runs /app/runs
+RUN chmod 777 *.py
+RUN chmod 777 -R /app/runs
+RUN chmod 777 -R /.cache
+RUN chmod 777 /attempts.txt
+RUN chmod 777 /failed_attempts.txt
+RUN chmod +x /entrypoint.sh
+# Expose the secret token at buildtime and use its value as git remote URL
+RUN --mount=type=secret,id=BULK_ENERGY_TOKEN,mode=0444,required=true \
+ git init && \
+ git remote add origin $(cat /run/secrets/BULK_ENERGY_TOKEN)
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from fastapi import FastAPI
+from huggingface_hub import HfApi
+TOKEN = os.environ.get("BULK_ENERGY_TOKEN")
+API = HfApi(token=TOKEN)
+REPO_ID = "AIEnergyScore/BulkCalcSpace"
+app = FastAPI()
+@app.get("/")
+def start_train():
+    space_variables = API.get_space_variables(repo_id=REPO_ID)
+    if 'STATUS' in space_variables and space_variables['STATUS'] != 'COMPUTING':
+        print("Beginning processing.")
+        API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='COMPUTING')
+        os.system(f"./entrypoint.sh {REPO_ID}")
+    API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='NOT_COMPUTING')
+    print("Pausing space")
+    API.pause_space(REPO_ID)
+    return {"Status": space_variables['STATUS']}

attempts.txt ADDED Viewed

File without changes

change_hardware.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import argparse
+from huggingface_hub import HfApi
+REPO_ID = "AIEnergyScore/benchmark-hugs-models"
+TOKEN = os.environ.get("HF_TOKEN")
+API = HfApi(token=TOKEN)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--hardware",
+        default="a10g-large",
+        type=str,
+        required=False,
+        help="hardware to use for benchmarking.",
+    )
+    args = parser.parse_args()
+    return args
+# Based on huggingface_hub _space_api.py
+# CPU_BASIC = "cpu-basic"
+#     CPU_UPGRADE = "cpu-upgrade"
+#     T4_SMALL = "t4-small"
+#     T4_MEDIUM = "t4-medium"
+#     L4X1 = "l4x1"
+#     L4X4 = "l4x4"
+#     ZERO_A10G = "zero-a10g"
+#     A10G_SMALL = "a10g-small"
+#     A10G_LARGE = "a10g-large"
+#     A10G_LARGEX2 = "a10g-largex2"
+#     A10G_LARGEX4 = "a10g-largex4"
+#     A100_LARGE = "a100-large"
+#     V5E_1X1 = "v5e-1x1"
+#     V5E_2X2 = "v5e-2x2"
+#     V5E_2X4 = "v5e-2x4"
+#curr_runtime = API.get_space_runtime(repo_id=REPO_ID)
+#print(curr_runtime)
+#requested_hardware = curr_runtime.requested_hardware
+#print(requested_hardware)
+#hardware_idx = hardware_options.index(requested_hardware)
+#next_hardware = hardware_options[hardware_idx + 1]
+if __name__ == '__main__':
+    args = parse_args()
+    curr_runtime = API.get_space_runtime(repo_id=REPO_ID)
+    curr_hardware = curr_runtime.requested_hardware
+    if curr_hardware != args.hardware:
+        API.request_space_hardware(repo_id=REPO_ID, hardware=args.hardware)

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,36 @@

+#!/bin/bash
+config_dir="/optimum-benchmark/examples/energy_star/"
+# This script is meant to be called from a python script \
+# that provides the REPO_ID as the first argument.
+REPO_ID = $1
+echo "Attempting to run."
+cat /tasks.txt | while read -r task; do # Example alternative: for task in "text_generation" "question_answering"; do
+  cat /hardware.txt | while read -r hardware; do # Example alternative: for hardware in "a100-large" "l4x1" "l40sx1"; do
+    echo "Attempting to benchmark ${hardware}"
+    python /change_hardware.py --hardware ${hardware}
+    # For each model
+    cat /models.txt | while read -r model; do # Example alternative: for model in "NousResearch/Hermes-3-Llama-3.1-8B" "Qwen/Qwen2.5-7B-Instruct"; do
+      # Read the name of the model and the experiment.
+      echo "Benchmarking Model: ${model}, Task: ${task}, Hardware: ${hardware}"
+      # Initialize the directory for output.
+      now=$(date +%Y-%m-%d-%H-%M-%S)
+      run_dir="/app/runs/${task}/${model}/${now}"
+      mkdir -p "$run_dir"
+      # Save the task/model run directory to text file, for tracking purposes.
+      echo "${run_dir}" >> /attempts.txt
+      { # try
+        # Let the benchmarking begin!
+        optimum-benchmark --config-name "${task}" --config-dir="${config_dir}" backend.model="${model}" backend.processor="${model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log"
+      } || { # catch
+          echo "${run_dir}" >> /failed_attempts.txt
+      }
+    done
+  done
+done
+echo "Finished"# updating requests dataset and results dataset."
+#python /process_runs.py

failed_attempts.txt ADDED Viewed

File without changes

hardware.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ a100-large

models.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ NousResearch/Hermes-3-Llama-3.1-8B

pause_space.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from huggingface_hub import HfApi
+import os
+#REPO_ID =
+#API.add_space_variable(repo_id=REPO_ID, key='STATUS', value='COMPUTING')
+TOKEN = os.environ.get("BULK_COMPUTE_SPACE")
+API = HfApi(token=TOKEN)
+API.pause_space("AIEnergyScore/launch-computation-example")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+accelerate==0.33.0
+codecarbon==2.5.1
+datasets==2.20.0
+diffusers==0.30.0
+huggingface-hub==0.24.5
+librosa==0.10.1
+omegaconf==2.3.0
+# optimum-benchmark @ git+https://github.com/huggingface/optimum-benchmark@energy_star_dev
+torch==2.4.0
+transformers==4.44.0

tasks.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ text_generation