Skip to content

Commit

Permalink
docker: add some tweaks from Ken
Browse files Browse the repository at this point in the history
  • Loading branch information
mloubout committed Jul 27, 2023
1 parent 44ee680 commit 34cfdf4
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 60 deletions.
86 changes: 44 additions & 42 deletions .github/workflows/docker-bases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,47 +162,6 @@ jobs:
build-args: 'arch=nvc-host'
tags: 'devitocodes/bases:cpu-nvc'

#######################################################
################### Nvidia clang ######################
#######################################################
deploy-nvidia-clang-base:
name: "nvidia-clang-base"
runs-on: ["self-hosted", "nvidiagpu"]
env:
DOCKER_BUILDKIT: "1"

steps:
- name: Checkout devito
uses: actions/checkout@v3

- name: Check event name
run: echo ${{ github.event_name }}

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: cleanup
run: docker system prune -a -f

- name: Nvidia clang image
uses: docker/build-push-action@v3
with:
context: .
file: './docker/Dockerfile.nvidia'
push: true
target: 'clang'
build-args: 'arch=clang'
tags: 'devitocodes/bases:nvidia-clang'

#######################################################
##################### AMD #############################
#######################################################
Expand Down Expand Up @@ -252,4 +211,47 @@ jobs:
target: 'hip'
build-args: |
arch=hip
tags: devitocodes/bases:amd-hip
tags: devitocodes/bases:amd-hip

#######################################################
################### Nvidia clang ######################
# Currently disabled as nvc+openacc is the config of choice and this is time and resources consuming
# Keeping it as legacy in case we want to reactivate it
#######################################################
# deploy-nvidia-clang-base:
# name: "nvidia-clang-base"
# runs-on: ["self-hosted", "nvidiagpu"]
# env:
# DOCKER_BUILDKIT: "1"

# steps:
# - name: Checkout devito
# uses: actions/checkout@v3

# - name: Check event name
# run: echo ${{ github.event_name }}

# - name: Set up QEMU
# uses: docker/setup-qemu-action@v2

# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v2

# - name: Login to DockerHub
# uses: docker/login-action@v2
# with:
# username: ${{ secrets.DOCKER_USERNAME }}
# password: ${{ secrets.DOCKER_PASSWORD }}

# - name: cleanup
# run: docker system prune -a -f

# - name: Nvidia clang image
# uses: docker/build-push-action@v3
# with:
# context: .
# file: './docker/Dockerfile.nvidia'
# push: true
# target: 'clang'
# build-args: 'arch=clang'
# tags: 'devitocodes/bases:nvidia-clang'
6 changes: 0 additions & 6 deletions .github/workflows/docker-devito.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ jobs:
test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py'
runner: ["self-hosted", "nvidiagpu"]

- base: 'bases:nvidia-clang'
tag: 'nvidia-clang'
flag: '--gpus all'
test: 'tests/test_gpu_openmp.py tests/test_gpu_common.py'
runner: ["self-hosted", "nvidiagpu"]

# Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/
- base: 'bases:amd'
tag: 'amd'
Expand Down
8 changes: 0 additions & 8 deletions .github/workflows/pytest-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,12 @@ jobs:

matrix:
name: [
pytest-gpu-omp-nvidia,
pytest-gpu-acc-nvidia,
pytest-gpu-omp-amd
]
test_examples: ["examples/seismic/tti/tti_example.py examples/seismic/acoustic/acoustic_example.py examples/seismic/viscoacoustic/viscoacoustic_example.py examples/seismic/viscoelastic/viscoelastic_example.py examples/seismic/elastic/elastic_example.py"]

include:
- name: pytest-gpu-omp-nvidia
test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py"
base: "devitocodes/bases:nvidia-clang"
tags: ["self-hosted", "nvidiagpu"]
test_drive_cmd: "nvidia-smi"
flags: '--gpus all --rm --name testrun-clang-nvidia'

- name: pytest-gpu-acc-nvidia
test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py"
base: "devitocodes/bases:nvidia-nvc"
Expand Down
11 changes: 8 additions & 3 deletions docker/Dockerfile.nvidia
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@ ARG arch="nvc"
########################################################################
# Build base image with apt setup and common env
########################################################################
FROM ${pyversion}-slim-bullseye as sdk-base
FROM ubuntu:22.04 as sdk-base

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update -y && apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl
RUN apt-get update -y && \
apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl

#Install python
RUN apt-get update && \
apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip

# nodesource: nvdashboard requires nodejs>=10
RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
Expand Down Expand Up @@ -81,7 +86,7 @@ RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head -
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl

# Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path
RUN export CUDA_V=$(ls /opt/nvhpc/${NVARCH}/cuda/ | grep '\.') && \
RUN export CUDA_V=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \
ls /opt/nvhpc/comm_libs/${CUDA_V}/hpcx/ &&\
if [ -d /opt/nvhpc/comm_libs/${CUDA_V}/hpcx ]; then \
rm -rf /opt/nvhpc/comm_libs/hpcx && rm -rf /opt/nvhpc/comm_libs/openmpi4 && \
Expand Down
2 changes: 1 addition & 1 deletion requirements-nvidia.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cupy-cuda110
cupy-cuda12x
dask-cuda
jupyterlab>=3
jupyterlab-nvdashboard
Expand Down

0 comments on commit 34cfdf4

Please sign in to comment.