Skip to content

LUMI fixes

LUMI fixes #947

Workflow file for this run

name: Self-hosted CI
on:
push:
branches-ignore:
- documentation
pull_request:
branches-ignore:
- documentation
defaults:
run:
shell: bash
jobs:
CI:
runs-on:
labels: cscs-ci
continue-on-error: ${{ matrix.experimental }}
strategy:
fail-fast: false
matrix:
config-name: [nvidia-gpu-openacc, cce-cpu-icon-production, cce-gpu-openmp]
fpmodel: [DP, SP]
include:
# The tests are not experimental by default:
- experimental: false
- config-name: nvidia-gpu-openacc
rte-kernels: accel
compiler-modules: "PrgEnv-nvidia nvidia craype-accel-nvidia60 cdt-cuda/21.09 !cray-libsci_acc"
# Generic accelerator flag
fcflags: "-O3 -acc -Mallocatable=03 -gopt"
- config-name: cce-cpu-icon-production
rte-kernels: default
compiler-modules: "PrgEnv-cray"
# Production flags for Icon model
fcflags: "-hadd_paren -r am -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -hnoacc -O1,cache0"
- config-name: cce-gpu-openmp
rte-kernels: accel
compiler-modules: "PrgEnv-cray craype-accel-nvidia60 cdt-cuda/22.05 cudatoolkit/11.2.0_3.39-2.1__gf93aa1c"
# OpenMP flags from Nichols Romero (Argonne)
fcflags: "-hnoacc -homp -O0"
experimental: true
env:
# Core variables:
FC: ftn
FCFLAGS: ${{ matrix.fcflags }} -DRTE_USE_${{ matrix.fpmodel}}
# Make variables:
RRTMGP_ROOT: ${{ github.workspace }}
RRTMGP_DATA: ${{ github.workspace }}/rrtmgp-data
RTE_KERNELS: ${{ matrix.rte-kernels }}
RUN_CMD: "srun -C gpu -A d56 -p cscsci -t 15:00"
FAILURE_THRESHOLD: 7.e-4
steps:
#
# Checks-out repository under $GITHUB_WORKSPACE
#
- uses: actions/checkout@v3
#
# Check out data
#
- name: Check out data
uses: actions/checkout@v3
with:
repository: earth-system-radiation/rrtmgp-data
path: rrtmgp-data
ref: develop
#
# Finalize build environment
#
- name: Finalize build environment
run: |
# There are significant limitations on what can go into ${GITHUB_ENV},
# therefore, we use ${BASH_ENV} but only when necessary:
BASH_ENV="${GITHUB_WORKSPACE}/.bash"
echo "source '${GITHUB_WORKSPACE}/.github/workflows/module_switcher'" >> "${BASH_ENV}"
echo "switch_for_module daint-gpu ${{ matrix.compiler-modules }} cray-netcdf cray-hdf5" >> "${BASH_ENV}"
# Use custom Python environment:
# The environment can be re-generated as follows:
# module load cray-python
# python3 -m venv /scratch/snx3000/rpincus/rte-rrtmgp-python
# /scratch/snx3000/rpincus/rte-rrtmgp-python/bin/pip3 install --upgrade pip
# /scratch/snx3000/rpincus/rte-rrtmgp-python/bin/pip3 install dask[array] netCDF4 numpy xarray
echo 'PATH="/scratch/snx3000/rpincus/rte-rrtmgp-python/bin:${PATH}"' >> "${BASH_ENV}"
# Make bash run the above on startup:
echo "BASH_ENV=${BASH_ENV}" >> "${GITHUB_ENV}"
# Compiler needs more temporary space than normally available:
tmpdir='${{ github.workspace }}/tmp'
mkdir "${tmpdir}" && echo "TMPDIR=${tmpdir}" >> "${GITHUB_ENV}"
# We use the "non-default products" for the tests
# (see https://support.hpe.com/hpesc/public/docDisplay?docId=a00113984en_us&page=Modify_Linking_Behavior_to_Use_Non-default_Libraries.html):
echo 'LD_LIBRARY_PATH="${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}"' >> "${BASH_ENV}"
# SLURM jobs, user home directories and HDF5 file locking are
# incompatible on Daint:
echo 'HDF5_USE_FILE_LOCKING=FALSE' >> "${GITHUB_ENV}"
#
# Build libraries, examples and tests
#
- name: Build libraries, examples and tests
run: |
$FC --version
make libs
make -C build separate-libs
#
# Run examples and tests
#
- name: Run examples and tests
run: make tests
#
# Relax failure thresholds for single precision
#
- name: Relax failure threshold for single precision
if: matrix.fpmodel == 'SP'
run: echo "FAILURE_THRESHOLD=3.5e-1" >> $GITHUB_ENV
#
# Compare the results
#
- name: Compare the results
run: make check