diff --git a/Dockerfile b/Dockerfile index 16f0ab9a..9e48c612 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,14 @@ -FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.09 AS base-deps +FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.27 AS base-deps + +SHELL ["/bin/bash", "-c"] # Install system packages for HTSLib + SAMtools + curl and jq for workflows # OpenJDK is for running WOMtool/Cromwell +# Then, boostrap dependencies for setting up and running the Python application RUN apt-get update -y && \ apt-get install -y samtools tabix bcftools curl jq openjdk-17-jre && \ - rm -rf /var/lib/apt/lists/* - -# Boostrap dependencies for setting up and running the Python application -RUN pip install --no-cache-dir poetry==1.3.2 gunicorn==20.1.0 "pysam>=0.20.0,<0.21.0" + rm -rf /var/lib/apt/lists/* && \ + pip install --no-cache-dir poetry==1.3.2 gunicorn==20.1.0 "pysam>=0.20.0,<0.21.0" WORKDIR / ENV CROMWELL_VERSION=84 @@ -33,11 +34,13 @@ RUN poetry install --without dev --no-root # Manually copy only what's relevant # (Don't use .dockerignore, which allows us to have development containers too) COPY bento_wes bento_wes -COPY entrypoint.bash entrypoint.bash -COPY LICENSE LICENSE -COPY README.md README.md +COPY entrypoint.bash . +COPY run.bash . +COPY LICENSE . +COPY README.md . # Install the module itself, locally (similar to `pip install -e .`) RUN poetry install --without dev -CMD [ "bash", "./entrypoint.bash" ] +ENTRYPOINT [ "bash", "./entrypoint.bash" ] +CMD [ "bash", "./run.bash" ] diff --git a/dev.Dockerfile b/dev.Dockerfile index 09920b29..859a986d 100644 --- a/dev.Dockerfile +++ b/dev.Dockerfile @@ -1,13 +1,15 @@ -FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.09 AS base-deps +FROM ghcr.io/bento-platform/bento_base_image:python-debian-2023.02.27 AS base-deps + +SHELL ["/bin/bash", "-c"] # Install system packages for HTSLib + SAMtools + curl and jq for workflows # OpenJDK is for running WOMtool/Cromwell +# Then, bootstrap dependencies for setting up and running the Python application RUN apt-get update -y && \ apt-get install -y samtools tabix bcftools curl jq openjdk-17-jre && \ - rm -rf /var/lib/apt/lists/* - -# Boostrap dependencies for setting up and running the Python application -RUN pip install --no-cache-dir poetry==1.3.2 gunicorn==20.1.0 "pysam>=0.20.0,<0.21.0" + rm -rf /var/lib/apt/lists/* && \ + source /env/bin/activate && \ + pip install --no-cache-dir gunicorn==20.1.0 "pysam>=0.20.0,<0.21.0" WORKDIR / ENV CROMWELL_VERSION=84 @@ -28,9 +30,11 @@ COPY poetry.lock . # Install production + development dependencies # Without --no-root, we get errors related to the code not being copied in yet. # But we don't want the code here, otherwise Docker cache doesn't work well. -RUN poetry install --no-root +RUN source /env/bin/activate && poetry install --no-root -# Copy in the entrypoint so we have somewhere to start -COPY entrypoint.dev.bash . +# Copy in the entrypoint & run script so we have somewhere to start +COPY entrypoint.bash . +COPY run.dev.bash . -CMD [ "bash", "./entrypoint.dev.bash" ] +ENTRYPOINT [ "bash", "./entrypoint.bash" ] +CMD [ "bash", "./run.dev.bash" ] diff --git a/entrypoint.bash b/entrypoint.bash index 060accca..5d8e5f10 100644 --- a/entrypoint.bash +++ b/entrypoint.bash @@ -1,38 +1,16 @@ #!/bin/bash -export FLASK_DEBUG=false -if [ -z "${INTERNAL_PORT}" ]; then - # Set default internal port to 5000 - export INTERNAL_PORT=5000 -fi +cd /wes || exit -# Clean up after any crashed previous container runs -job_store_path="${SERVICE_TEMP:-tmp}/toil_job_store" -if [ -d "${job_store_path}" ]; then - echo "[bento_wes] [entrypoint] Cleaning Toil job store" - toil clean "file:${SERVICE_TEMP:-tmp}/toil_job_store" -fi +# Create bento_user + home +source /create_service_user.bash -# Start Celery worker with log level dependent on BENTO_DEBUG -echo "[bento_wes] [entrypoint] Starting celery worker" -celery_log_level="INFO" -if [[ - "${BENTO_DEBUG}" == "true" || - "${BENTO_DEBUG}" == "True" || - "${BENTO_DEBUG}" == "1" || - "${CHORD_DEBUG}" == "true" || - "${CHORD_DEBUG}" == "True" || - "${CHORD_DEBUG}" == "1" -]]; then - celery_log_level="DEBUG" +# Fix permissions on /wes +chown -R bento_user:bento_user /wes +chmod -R o-rwx /wes/tmp # Remove all access from others for /wes/tmp +if [[ -d /env ]]; then + chown -R bento_user:bento_user /env fi -celery --app bento_wes.app worker --loglevel="${celery_log_level}" & -# Start API server -echo "[bento_wes] [entrypoint] Starting gunicorn" -# using 1 worker, multiple threads -# see https://stackoverflow.com/questions/38425620/gunicorn-workers-and-threads -gunicorn bento_wes.app:application \ - --workers 1 \ - --threads "$(( 2 * $(nproc --all) + 1))" \ - --bind "0.0.0.0:${INTERNAL_PORT}" +# Drop into bento_user from root and execute the CMD specified for the image +exec gosu bento_user "$@" diff --git a/pyproject.toml b/pyproject.toml index 647bc738..cbc4fed2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "bento_wes" -version = "0.8.3" +version = "0.8.4" description = "Workflow execution service for the Bento platform." authors = [ "David Lougheed ", diff --git a/run.bash b/run.bash new file mode 100644 index 00000000..060accca --- /dev/null +++ b/run.bash @@ -0,0 +1,38 @@ +#!/bin/bash + +export FLASK_DEBUG=false +if [ -z "${INTERNAL_PORT}" ]; then + # Set default internal port to 5000 + export INTERNAL_PORT=5000 +fi + +# Clean up after any crashed previous container runs +job_store_path="${SERVICE_TEMP:-tmp}/toil_job_store" +if [ -d "${job_store_path}" ]; then + echo "[bento_wes] [entrypoint] Cleaning Toil job store" + toil clean "file:${SERVICE_TEMP:-tmp}/toil_job_store" +fi + +# Start Celery worker with log level dependent on BENTO_DEBUG +echo "[bento_wes] [entrypoint] Starting celery worker" +celery_log_level="INFO" +if [[ + "${BENTO_DEBUG}" == "true" || + "${BENTO_DEBUG}" == "True" || + "${BENTO_DEBUG}" == "1" || + "${CHORD_DEBUG}" == "true" || + "${CHORD_DEBUG}" == "True" || + "${CHORD_DEBUG}" == "1" +]]; then + celery_log_level="DEBUG" +fi +celery --app bento_wes.app worker --loglevel="${celery_log_level}" & + +# Start API server +echo "[bento_wes] [entrypoint] Starting gunicorn" +# using 1 worker, multiple threads +# see https://stackoverflow.com/questions/38425620/gunicorn-workers-and-threads +gunicorn bento_wes.app:application \ + --workers 1 \ + --threads "$(( 2 * $(nproc --all) + 1))" \ + --bind "0.0.0.0:${INTERNAL_PORT}" diff --git a/entrypoint.dev.bash b/run.dev.bash similarity index 82% rename from entrypoint.dev.bash rename to run.dev.bash index 377ab2ac..99a4ed73 100644 --- a/entrypoint.dev.bash +++ b/run.dev.bash @@ -3,6 +3,12 @@ # Set .gitconfig for development /set_gitconfig.bash +# Source the development virtual environment +source /env/bin/activate + +# Update dependencies and install module locally (similar to pip install -e: "editable mode") +poetry install + export FLASK_APP="bento_wes.app:application" if [ -z "${INTERNAL_PORT}" ]; then @@ -10,8 +16,8 @@ if [ -z "${INTERNAL_PORT}" ]; then export INTERNAL_PORT=5000 fi -# Install any dependency changes if needed -python -m poetry install +# Create temporary directory if needed +mkdir -p /wes/tmp # Clean up after any crashed previous container runs job_store_path="${SERVICE_TEMP:-tmp}/toil_job_store"