diff --git a/.github/workflows/docker-bases.yml b/.github/workflows/docker-bases.yml index d2bed47180..9ef5871386 100644 --- a/.github/workflows/docker-bases.yml +++ b/.github/workflows/docker-bases.yml @@ -162,47 +162,6 @@ jobs: build-args: 'arch=nvc-host' tags: 'devitocodes/bases:cpu-nvc' -####################################################### -################### Nvidia clang ###################### -####################################################### - deploy-nvidia-clang-base: - name: "nvidia-clang-base" - runs-on: ["self-hosted", "nvidiagpu"] - env: - DOCKER_BUILDKIT: "1" - - steps: - - name: Checkout devito - uses: actions/checkout@v3 - - - name: Check event name - run: echo ${{ github.event_name }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: cleanup - run: docker system prune -a -f - - - name: Nvidia clang image - uses: docker/build-push-action@v3 - with: - context: . - file: './docker/Dockerfile.nvidia' - push: true - target: 'clang' - build-args: 'arch=clang' - tags: 'devitocodes/bases:nvidia-clang' - ####################################################### ##################### AMD ############################# ####################################################### @@ -252,4 +211,47 @@ jobs: target: 'hip' build-args: | arch=hip - tags: devitocodes/bases:amd-hip \ No newline at end of file + tags: devitocodes/bases:amd-hip + +####################################################### +################### Nvidia clang ###################### +# Currently disabled as nvc+openacc is the config of choice and this is time and resources consuming +# Keeping it as legacy in case we want to reactivate it +####################################################### +# deploy-nvidia-clang-base: +# name: "nvidia-clang-base" +# runs-on: ["self-hosted", "nvidiagpu"] +# env: +# DOCKER_BUILDKIT: "1" + +# steps: +# - name: Checkout devito +# uses: actions/checkout@v3 + +# - name: Check event name +# run: echo ${{ github.event_name }} + +# - name: Set up QEMU +# uses: docker/setup-qemu-action@v2 + +# - name: Set up Docker Buildx +# uses: docker/setup-buildx-action@v2 + +# - name: Login to DockerHub +# uses: docker/login-action@v2 +# with: +# username: ${{ secrets.DOCKER_USERNAME }} +# password: ${{ secrets.DOCKER_PASSWORD }} + +# - name: cleanup +# run: docker system prune -a -f + +# - name: Nvidia clang image +# uses: docker/build-push-action@v3 +# with: +# context: . +# file: './docker/Dockerfile.nvidia' +# push: true +# target: 'clang' +# build-args: 'arch=clang' +# tags: 'devitocodes/bases:nvidia-clang' \ No newline at end of file diff --git a/.github/workflows/docker-devito.yml b/.github/workflows/docker-devito.yml index 459093ed6e..597eed6740 100644 --- a/.github/workflows/docker-devito.yml +++ b/.github/workflows/docker-devito.yml @@ -24,12 +24,6 @@ jobs: test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py' runner: ["self-hosted", "nvidiagpu"] - - base: 'bases:nvidia-clang' - tag: 'nvidia-clang' - flag: '--gpus all' - test: 'tests/test_gpu_openmp.py tests/test_gpu_common.py' - runner: ["self-hosted", "nvidiagpu"] - # Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/ - base: 'bases:amd' tag: 'amd' diff --git a/.github/workflows/pytest-gpu.yml b/.github/workflows/pytest-gpu.yml index e52134add2..71f0033106 100644 --- a/.github/workflows/pytest-gpu.yml +++ b/.github/workflows/pytest-gpu.yml @@ -46,20 +46,12 @@ jobs: matrix: name: [ - pytest-gpu-omp-nvidia, pytest-gpu-acc-nvidia, pytest-gpu-omp-amd ] test_examples: ["examples/seismic/tti/tti_example.py examples/seismic/acoustic/acoustic_example.py examples/seismic/viscoacoustic/viscoacoustic_example.py examples/seismic/viscoelastic/viscoelastic_example.py examples/seismic/elastic/elastic_example.py"] include: - - name: pytest-gpu-omp-nvidia - test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py" - base: "devitocodes/bases:nvidia-clang" - tags: ["self-hosted", "nvidiagpu"] - test_drive_cmd: "nvidia-smi" - flags: '--gpus all --rm --name testrun-clang-nvidia' - - name: pytest-gpu-acc-nvidia test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py" base: "devitocodes/bases:nvidia-nvc" diff --git a/docker/Dockerfile.nvidia b/docker/Dockerfile.nvidia index b2bfb62fde..b82b5fcd1e 100644 --- a/docker/Dockerfile.nvidia +++ b/docker/Dockerfile.nvidia @@ -8,11 +8,16 @@ ARG arch="nvc" ######################################################################## # Build base image with apt setup and common env ######################################################################## -FROM ${pyversion}-slim-bullseye as sdk-base +FROM ubuntu:22.04 as sdk-base ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update -y && apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl +RUN apt-get update -y && \ + apt-get install -y -q gpg apt-utils curl wget vim libnuma-dev tmux numactl + +#Install python +RUN apt-get update && \ + apt-get install -y dh-autoreconf python3-venv python3-dev python3-pip # nodesource: nvdashboard requires nodejs>=10 RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg @@ -81,7 +86,7 @@ RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head - ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl # Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path -RUN export CUDA_V=$(ls /opt/nvhpc/${NVARCH}/cuda/ | grep '\.') && \ +RUN export CUDA_V=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \ ls /opt/nvhpc/comm_libs/${CUDA_V}/hpcx/ &&\ if [ -d /opt/nvhpc/comm_libs/${CUDA_V}/hpcx ]; then \ rm -rf /opt/nvhpc/comm_libs/hpcx && rm -rf /opt/nvhpc/comm_libs/openmpi4 && \ diff --git a/requirements-nvidia.txt b/requirements-nvidia.txt index 5f749fc61c..2e7534ea43 100644 --- a/requirements-nvidia.txt +++ b/requirements-nvidia.txt @@ -1,4 +1,4 @@ -cupy-cuda110 +cupy-cuda12x dask-cuda jupyterlab>=3 jupyterlab-nvdashboard