diff --git a/experiments/branson/experiment.py b/experiments/branson/experiment.py new file mode 100644 index 000000000..7a8ea3f7d --- /dev/null +++ b/experiments/branson/experiment.py @@ -0,0 +1,125 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +from benchpark.error import BenchparkError +from benchpark.directives import variant +from benchpark.experiment import Experiment +from benchpark.openmp import OpenMPExperiment +from benchpark.cuda import CudaExperiment +from benchpark.rocm import ROCmExperiment +from benchpark.scaling import StrongScaling +from benchpark.scaling import WeakScaling +from benchpark.expr.builtin.caliper import Caliper + + +class Branson( + Experiment, + OpenMPExperiment, + CudaExperiment, + ROCmExperiment, + StrongScaling, + WeakScaling, + Caliper, +): + variant( + "workload", + default="branson", + description="workload name", + ) + + variant( + "version", + default="develop", + description="app version", + ) + + variant( + "n_groups", + default="30", + values=int, + description="Number of groups", + ) + + def compute_applications_section(self): + # TODO: Replace with conflicts clause + scaling_modes = { + "strong": self.spec.satisfies("+strong"), + "weak": self.spec.satisfies("+weak"), + "single_node": self.spec.satisfies("+single_node"), + } + + scaling_mode_enabled = [key for key, value in scaling_modes.items() if value] + if len(scaling_mode_enabled) != 1: + raise BenchparkError( + f"Only one type of scaling per experiment is allowed for application package {self.name}" + ) + + # Number of processes in each dimension + num_nodes = {"n_nodes": 1} + + # Per-process size (in zones) in each dimension + num_particles = {"num_particles": 850000000} + + if self.spec.satisfies("+single_node"): + for pk, pv in num_nodes.items(): + self.add_experiment_variable(pk, pv, True) + for nk, nv in num_particles.items(): + self.add_experiment_variable(nk, nv, True) + elif self.spec.satisfies("+strong"): + scaled_variables = self.generate_strong_scaling_params( + {tuple(num_nodes.keys()): list(num_nodes.values())}, + int(self.spec.variants["scaling-factor"][0]), + int(self.spec.variants["scaling-iterations"][0]), + ) + for pk, pv in scaled_variables.items(): + self.add_experiment_variable(pk, pv, True) + for nk, nv in num_particles.items(): + self.add_experiment_variable(nk, nv, True) + elif self.spec.satisfies("+weak"): + scaled_variables = self.generate_weak_scaling_params( + {tuple(num_nodes.keys()): list(num_nodes.values())}, + {tuple(num_particles.keys()): list(num_particles.values())}, + int(self.spec.variants["scaling-factor"][0]), + int(self.spec.variants["scaling-iterations"][0]), + ) + for k, v in scaled_variables.items(): + self.add_experiment_variable(k, v, True) + + self.add_experiment_variable( + "use_gpu", + ( + "TRUE" + if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm") + else "FALSE" + ), + ) + + self.add_experiment_variable("n_ranks", "{n_nodes}*{sys_cores_per_node}", True) + + def compute_spack_section(self): + # get package version + app_version = self.spec.variants["version"][0] + + # get system config options + # TODO: Get compiler/mpi/package handles directly from system.py + system_specs = {} + system_specs["compiler"] = "default-compiler" + system_specs["mpi"] = "default-mpi" + if self.spec.satisfies("+cuda"): + system_specs["cuda_version"] = "{default_cuda_version}" + system_specs["cuda_arch"] = "{cuda_arch}" + if self.spec.satisfies("+rocm"): + system_specs["rocm_arch"] = "{rocm_arch}" + + # set package spack specs + self.add_spack_spec(system_specs["mpi"]) + + self.add_spack_spec( + self.name, + [ + f"branson@{app_version} n_groups={self.spec.variants['n_groups'][0]} ", + system_specs["compiler"], + ], + ) diff --git a/legacy/experiments/branson/mpi-only/ramble.yaml b/legacy/experiments/branson/mpi-only/ramble.yaml new file mode 100644 index 000000000..c4cb4e929 --- /dev/null +++ b/legacy/experiments/branson/mpi-only/ramble.yaml @@ -0,0 +1,51 @@ +ramble: + applications: + branson: + workloads: + branson: + experiments: + branson_branson_weak_scaling_caliper_time_mpi_{n_nodes}_{num_particles}_{n_ranks}: + exclude: {} + matrix: [] + variables: + n_nodes: + - 1 + - 2 + - 4 + - 8 + n_ranks: '{n_nodes}*{sys_cores_per_node}' + num_particles: + - 850000000 + - 1700000000 + - 3400000000 + - 6800000000 + variants: + package_manager: spack + zips: {} + config: + deprecated: true + spack_flags: + concretize: -U -f + install: --add --keep-stage + include: + - ./configs + modifiers: + - name: allocation + - mode: mpi + name: caliper + - mode: time + name: caliper + software: + environments: + branson: + packages: + - caliper + - default-mpi + - branson + packages: + branson: + compiler: default-compiler + pkg_spec: branson@develop+caliper + caliper: + compiler: default-compiler + pkg_spec: caliper@master+adiak+mpi~libunwind~libdw~papi diff --git a/legacy/experiments/branson/openmp/ramble.yaml b/legacy/experiments/branson/openmp/ramble.yaml deleted file mode 100644 index e90dc280a..000000000 --- a/legacy/experiments/branson/openmp/ramble.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2023 Lawrence Livermore National Security, LLC and other -# Benchpark Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: Apache-2.0 -ramble: - include: - - ./configs/software.yaml - - ./configs/variables.yaml - - ./configs/modifier.yaml - config: - deprecated: true - spack_flags: - install: '--add --keep-stage' - concretize: '-U -f' -#Branson does not currently work on lassen - modifiers: - - name: allocation - applications: - branson: - workloads: - branson: - variables: - n_ranks: '{n_nodes}*{sys_cores_per_node}' - experiments: - branson_strong_{n_nodes}: - variants: - package_manager: spack - variables: - #looks like branson should use ~25% of memory per node - num_particles: '850000000' - n_nodes: ['1','2','4','8'] - - branson_weak_{n_nodes}: - variants: - package_manager: spack - variables: - num_particles: '850000000*{n_nodes}' - n_nodes: ['1','2','4','8'] - software: - packages: - branson: - pkg_spec: branson@develop - compiler: default-compiler - environments: - branson: - packages: - - default-mpi - - branson - - '{modifier_package_name}' diff --git a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml index 1c050e729..2528cb68e 100644 --- a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml +++ b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml @@ -302,6 +302,7 @@ packages: gtl_cutoff_size: 4096 fi_cxi_ats: 0 gtl_lib_path: /opt/cray/pe/mpich/8.1.26/gtl/lib + gtl_libs: ["libmpi_gtl_hsa"] ldflags: "-L/opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0/lib -lmpi -L/opt/cray/pe/mpich/8.1.26/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/8.1.26/gtl/lib -lmpi_gtl_hsa" - spec: cray-mpich@8.1.26%cce@16.0.0 ~gtl +wrappers prefix: /opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0 diff --git a/repo/branson/application.py b/repo/branson/application.py index 2103c1ca5..51a5e246a 100644 --- a/repo/branson/application.py +++ b/repo/branson/application.py @@ -19,14 +19,15 @@ class Branson(ExecutableApplication): executable('setup_experiment', template=[ 'cp {branson}/inputs/* {experiment_run_dir}/.', - 'sed -i "s|250000000|{num_particles}|g" {experiment_run_dir}/{input_file}' + 'sed -i "s|[0-9]*|{num_particles}|g" {experiment_run_dir}/{input_file}', + 'sed -i "s|.*|{use_gpu}|g" {experiment_run_dir}/{input_file}' ]) executable('p', '{branson}/bin/BRANSON {experiment_run_dir}/{input_file}', use_mpi=True) workload('branson', executables=['setup_experiment','p']) - workload_variable('input_file', default='3D_hohlraum_multi_node.xml', + workload_variable('input_file', default='3D_hohlraum_single_node.xml', description='input file name', workloads=['branson']) diff --git a/repo/branson/branson_cmake.patch b/repo/branson/branson_cmake.patch new file mode 100644 index 000000000..68af8bf73 --- /dev/null +++ b/repo/branson/branson_cmake.patch @@ -0,0 +1,485 @@ +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -9,65 +9,160 @@ + # Enbable debug mode by passing -DCMAKE_BUILD_TYPE=Debug to CMake, default is + # Release + +-cmake_minimum_required (VERSION 3.11) ++cmake_minimum_required(VERSION 3.21) + +-option(USE_GPU "user-set flag to compile in GPU code" FALSE) ++project (BRANSON ++ VERSION 0.8 ++ DESCRIPTION "Branson can be used to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition." ++) ++ ++## Fail if someone tries to config an in-source build. ++if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}") ++ message(FATAL_ERROR "In-source builds are not supported. Please remove " ++ "CMakeCache.txt from the 'src' dir and configure an " ++ "out-of-source build in another directory.") ++endif() ++ ++# Set the build type to Release by default if not set ++if(NOT CMAKE_BUILD_TYPE) ++ set(CMAKE_BUILD_TYPE "Release") ++endif() ++ ++set(CMAKE_CXX_STANDARD 17) ++set(CMAKE_CXX_STANDARD_REQUIRED ON) ++set(CMAKE_CXX_EXTENSIONS OFF) ++set(CMAKE_BRANSON_CXXFLAGS ${CMAKE_CXX_FLAGS}) ++ ++option(ENABLE_CUDA "Use CUDA" FALSE) ++option(ENABLE_HIP "Use HIP" FALSE) ++option(ENABLE_CALIPER "Enable Caliper" FALSE) ++option(ENABLE_OPENMP "Enable OpenMP" FALSE) ++ ++set(CMAKE_VERBOSE_MAKEFILE ON) ++ ++if(ENABLE_CUDA) ++ enable_language(CXX) + +-if(DEFINED ENV{CUDADIR} +- OR DEFINED ENV{CUDACXX} +- OR DEFINED ENV{CUDA_HOME}) +- if(USE_GPU) +- set(GPU_DBS_STRING "CUDA" CACHE STRING "If CUDA is available, this variable is 'CUDA'") +- enable_language(CUDA) ++ find_package(CUDAToolkit REQUIRED) ++ list(APPEND branson_deps ++ CUDA::cudart) ++ ++ include(CheckLanguage) ++ check_language(CUDA) ++ ++ set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) ++ ++ if(NOT CMAKE_CUDA_COMPILER) ++ message(FATAL_ERROR "Unable to find the nvcc compiler. Please use" ++ "CMAKE_CUDA_COMPILER to provide the nvcc compiler.") + endif() +-elseif( +- "$ENV{LOADEDMODULES}" MATCHES "rocmcc" +- OR DEFINED ENV{HIPCXX} +- OR DEFINED CMAKE_HIP_COMPILER +- OR DEFINED ENV{ROCM_PATH}) +- if(USE_GPU) +- set(GPU_DBS_STRING "HIP" CACHE STRING "If HIP is available, this variable is 'HIP'") +- #if(CMAKE_HIP_COMPILER MATCHES "CC") +- # set(CMAKE_HIP_FLAGS "-x hip") +- #endif() +- enable_language(HIP) ++ ++ enable_language(CUDA) ++ set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) ++ set(CMAKE_CUDA_STANDARD_REQUIRED ON) ++ set(CMAKE_CUDA_ARCHITECTURES "${CUDA_ARCH}") ++ set_source_files_properties("main.cc" PROPERTIES LANGUAGE CUDA) ++ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -v") ++ set(CMAKE_CUDA_FLAGS "-g --expt-relaxed-constexpr ${CMAKE_CUDA_FLAGS}") ++ set(CMAKE_CUDA_FLAGS "-expt-extended-lambda ${CMAKE_CUDA_FLAGS}") ++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DUSE_GPU) ++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DHAS_GPU) ++ ++ set(GPU_DBS_STRING "CUDA" CACHE STRING "If CUDA is available, this variable is 'CUDA'") ++ ++ message("Making GPU(CUDA) BRANSON") ++ ++elseif(ENABLE_HIP) ++ if(NOT DEFINED HIP_PATH) ++ if(NOT DEFINED ENV{HIP_PATH}) ++ set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") ++ else() ++ set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") ++ endif() + endif() +-endif() + +-message("GPU_DBS_STRING: ${GPU_DBS_STRING}") +-project (BRANSON +- VERSION 0.8 +- DESCRIPTION "Branson can be used to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition." +-# HOMEPAGE URL "https://github.com/lanl/branson" # needs cmake 3.12+ +- LANGUAGES CXX C ${GPU_DBS_STRING}) +- +-get_property(_LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES) +- +-message("Languages: ${_LANGUAGES_}") +-if((_LANGUAGES_ MATCHES CUDA OR _LANGUAGES_ MATCHES HIP) AND USE_GPU) +- message("CUDA/HIP module found (CUDA/HIP environment variables set) and USE_GPU is on, making GPU BRANSON") +- add_compile_definitions(HAS_GPU) +-elseif(USE_GPU) +- message(FATAL_ERROR "CUDA/HIP module NOT found (CUDA/HIP environment variables set) but USE_GPU is on, reconfigure with USE_GPU off or fix modules") +-elseif(NOT USE_GPU) +- message("GPU mode not requested, making CPU only BRANSON") ++ if(NOT DEFINED ROCM_PATH) ++ if(DEFINED ENV{ROCM_PATH}) ++ set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed") ++ elseif(DEFINED ENV{HIP_PATH}) ++ set(ROCM_PATH "$ENV{HIP_PATH}/.." CACHE PATH "Path to which ROCM has been installed") ++ else() ++ set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed") ++ endif() ++ endif() ++ ++ if(NOT DEFINED HCC_PATH) ++ if(DEFINED ENV{HCC_PATH}) ++ set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed") ++ else() ++ set(HCC_PATH "${ROCM_PATH}/hcc" CACHE PATH "Path to which HCC has been installed") ++ endif() ++ set(HCC_HOME "${HCC_PATH}") ++ endif() ++ ++ if(NOT DEFINED HIP_CLANG_PATH) ++ if(NOT DEFINED ENV{HIP_CLANG_PATH}) ++ set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin" CACHE PATH "Path to which HIP compatible clang binaries have been installed") ++ else() ++ set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH} CACHE PATH "Path to which HIP compatible clang binaries have been installed") ++ endif() ++ endif() ++ ++ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) ++ list(APPEND CMAKE_PREFIX_PATH ++ "${HIP_PATH}/lib/cmake" ++ "${HIP_PATH}/../lib/cmake" # hopefully catches all extra HIP dependencies ++ ) ++ ++ find_package(HIP QUIET) ++ if(HIP_FOUND) ++ message(STATUS "Found HIP: " ${HIP_VERSION}) ++ else() ++ message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.") ++ endif() ++ find_package(hip REQUIRED) ++ ++ # For ROCm >=3.5, wipe hip-clang specific interface options which are propagated ++ set_target_properties(hip::device PROPERTIES INTERFACE_COMPILE_OPTIONS "-fPIC") ++ set_target_properties(hip::device PROPERTIES INTERFACE_LINK_LIBRARIES "hip::host") ++ ++ list(APPEND branson_deps ++ "-fgpu-rdc" ++ "--hip-link" ++ "--offload-arch=${HIP_ARCH}" ++ "--rocm-path=${ROCM_PATH}" ++ hip::host ++ hip::device) ++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DUSE_GPU) ++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DHAS_GPU) ++ list(APPEND CMAKE_BRANSON_CXXFLAGS -x hip) ++ list(APPEND CMAKE_BRANSON_CXXFLAGS "--offload-arch=${HIP_ARCH}") ++ list(APPEND CMAKE_BRANSON_CXXFLAGS "-fgpu-rdc") ++ list(APPEND CMAKE_BRANSON_CXXFLAGS "-Wno-unused-result") ++ ++ #set_source_files_properties("main.cc" PROPERTIES LANGUAGE HIP) ++ ++ set(GPU_DBS_STRING "HIP" CACHE STRING "If HIP is available, this variable is 'HIP'") ++ ++ message("Making GPU(HIP) BRANSON") ++ + else() +- message("CUDA/HIP module not found and GPU mode not requested, making CPU only BRANSON") ++ message("Making CPU BRANSON") ++ + endif() + ++message("GPU_DBS_STRING: ${GPU_DBS_STRING}") + + # Build system support files are located here. +-set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config ) ++set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config ${CMAKE_MODULE_PATH}) + +-message( " +-This is ${CMAKE_PROJECT_NAME} v. ${PROJECT_VERSION}. +-") ++message("This is ${CMAKE_PROJECT_NAME} v. ${PROJECT_VERSION}.") + + #------------------------------------------------------------------------------# + # User configurable options + + set (BRANSON_N_GROUPS ${N_GROUPS}) +-if (NOT BRANSON_N_GROUPS) ++if(NOT BRANSON_N_GROUPS) + message(STATUS + "HARDCODED NUMBER OF GROUPS NOT SET, DEFAULTING TO ONE GROUP (GRAY) + use '-DN_GROUPS=' when running cmake to select multigroup.\n") +@@ -83,28 +178,10 @@ set(ENABLE_VERBOSE_GPU_TRANSPORT ${ENABLE_VERBOSE_GPU_TRANSPORT} CACHE STRING "F + #------------------------------------------------------------------------------# + # Record the site name + +-site_name( SITENAME ) +-string( REGEX REPLACE "([A-z0-9]+).*" "\\1" SITENAME ${SITENAME} ) ++site_name(SITENAME) ++string(REGEX REPLACE "([A-z0-9]+).*" "\\1" SITENAME ${SITENAME}) + include (lanl-setup) +-set( SITENAME ${SITENAME} CACHE "STRING" "Name of the current machine" FORCE) +- +-#------------------------------------------------------------------------------# +-# Setup compiler options +-set( CXX_STANDARD_REQUIRED ON ) +-set( CMAKE_CXX_STANDARD 17 ) +- +-# Do not enable extensions (e.g.: --std=gnu++11) +-set( CMAKE_CXX_EXTENSIONS OFF ) +-set( CMAKE_C_EXTENSIONS OFF ) +- +-#------------------------------------------------------------------------------# +-# Build type and custom compiler flags +- +-if ( "${CMAKE_BUILD_TYPE}notset" STREQUAL "notset" AND +- NOT DEFINED CMAKE_CONFIGURATION_TYPES ) +- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Release|Debug|RelWithDebInfo" +- FORCE) +-endif () ++set(SITENAME ${SITENAME} CACHE "STRING" "Name of the current machine" FORCE) + + # GCC options for address or undefined sanitizing + #set(GCC_SANITIZE_COMPILE_FLAGS "-fsanitize=address") +@@ -113,65 +190,68 @@ endif () + #set(GCC_SANITIZE_LINK_FLAGS "-fsanitize=undefined") + + # add compiler flags +-if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" ) +- set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ffp-contract=off") +- set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0") +- #set( CMAKE_CXX_FLAGS_RELEASE "-march=native -O3 -funroll-loops -fno-var-tracking-assignments") +- set( CMAKE_CXX_FLAGS_RELEASE "-g -O3 -funroll-loops -fno-var-tracking-assignments") ++if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") ++ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ffp-contract=off") ++ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0") ++ #set(CMAKE_CXX_FLAGS_RELEASE "-march=native -O3 -funroll-loops -fno-var-tracking-assignments") ++ set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -funroll-loops -fno-var-tracking-assignments") + # optionally add sanitize flags +- #set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_SANITIZE_COMPILE_FLAGS}") +- #set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_SANITIZE_LINK_FLAGS}") +-elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" ) +- set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0") ++ #set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_SANITIZE_COMPILE_FLAGS}") ++ #set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_SANITIZE_LINK_FLAGS}") ++elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") ++ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0") + # note that the "fp-model=precise" flag significantly limite vectorization and slows down + # branson but it makes it the hardware counter data match Jayenne more closely (Jayenne uses + # this flag) +- set( CMAKE_CXX_FLAGS_RELEASE "-g -O3 -fp-speculation=fast -xhost -qno-opt-dynamic-align") +-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") ++ set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -fp-speculation=fast -xhost -qno-opt-dynamic-align") ++elseif(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -Wall -O0 -xhost -qopenmp") + set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -xhost -qopenmp -qopt-zmm-usage=high -fp-speculation=fast -qopt-report=3 -qopt-report-file=runAVX512.optrpt") +-elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Cray" ) +- set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -DR123_USE_GNU_UINT128=0") +- set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -DR123_USE_GNU_UINT128=0") ++elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Cray") ++ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -DR123_USE_GNU_UINT128=0") ++ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -DR123_USE_GNU_UINT128=0") + endif() + ++if(ENABLE_CUDA) ++ string(JOIN " " CMAKE_BRANSON_CUDAFLAGS ${CMAKE_BRANSON_CXXFLAGS}) ++ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${CMAKE_BRANSON_CUDAFLAGS}") ++else() ++ add_compile_options(${CMAKE_BRANSON_CXXFLAGS}) ++endif() + + #------------------------------------------------------------------------------# + # Look for Third Party Libraries (metis, etc.) + add_subdirectory(pugixml) + +-include(find_tpls) +-setupTPLs() +- + #------------------------------------------------------------------------------# + # Report build configuration + + # Summary of TPLs + include(FeatureSummary) + message(" ") +-feature_summary( WHAT ALL ++feature_summary(WHAT ALL + INCLUDE_QUIET_PACKAGES + FATAL_ON_MISSING_REQUIRED_PACKAGES +- QUIET_ON_EMPTY ) ++ QUIET_ON_EMPTY) + + # Summary of user-selectable build options +-message( "\nBuild Summary:\n") +-message( STATUS "Machine name : ${SITENAME}") ++message("\nBuild Summary:\n") ++message(STATUS "Machine name : ${SITENAME}") + report_lanl_hpc_features() +-if( ${BRANSON_N_GROUPS} GREATER 1 ) +- message( STATUS "Energy groups : ${BRANSON_N_GROUPS}") ++if(${BRANSON_N_GROUPS} GREATER 1) ++ message(STATUS "Energy groups : ${BRANSON_N_GROUPS}") + else() +- message( STATUS "Energy groups : Gray (1-group)") ++ message(STATUS "Energy groups : Gray (1-group)") + endif() +-message( STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") +-message( STATUS "Compiler : ${CMAKE_CXX_COMPILER}") +-if( CMAKE_CONFIGURATION_TYPES ) +- message( STATUS "Compiler Flags (All) : ${CMAKE_CXX_FLAGS}") +- message( STATUS "Compiler Flags (Debug) : ${CMAKE_CXX_FLAGS_DEBUG}") +- message( STATUS "Compiler Flags (Release): ${CMAKE_CXX_FLAGS_RELEASE}") ++message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") ++message(STATUS "Compiler : ${CMAKE_CXX_COMPILER}") ++if(CMAKE_CONFIGURATION_TYPES) ++ message(STATUS "Compiler Flags (All) : ${CMAKE_CXX_FLAGS}") ++ message(STATUS "Compiler Flags (Debug) : ${CMAKE_CXX_FLAGS_DEBUG}") ++ message(STATUS "Compiler Flags (Release): ${CMAKE_CXX_FLAGS_RELEASE}") + else() +- string( TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER ) +- message( STATUS "Compiler Flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") ++ string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) ++ message(STATUS "Compiler Flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") + endif() + message("\n") + +@@ -180,68 +260,55 @@ message("\n") + + # set(DMAPP_DYNAMIC -Wl,--whole-archive,-ldmapp,--no-whole-archive) + ++find_package(MPI REQUIRED) ++list(APPEND branson_deps ++ MPI::MPI_CXX) + +-#------------------------------------------------------------------------------# +-# Targets +-file(GLOB headers *.h) +-add_executable(BRANSON main.cc ${headers}) +-target_include_directories( BRANSON PRIVATE +- $ ${PROJECT_SOURCE_DIR}/pugixml/src/) +-set( branson_deps +- MPI::MPI_CXX +- MPI::MPI_C +- pugixml) ++list(APPEND branson_deps pugixml) + +-if(OpenMP_FOUND) +- set(branson_deps "OpenMP::OpenMP_CXX;${branson_deps}") ++if(ENABLE_CALIPER) ++ message(STATUS "Looking for caliper...") ++ find_package(caliper REQUIRED) ++ if(caliper_FOUND) ++ message(STATUS "Looking for caliper.....found") ++ endif() ++ list(APPEND branson_deps ++ caliper) ++ find_package(adiak REQUIRED) ++ list(APPEND branson_deps ++ adiak::adiak) ++else() ++ add_custom_target(caliper) + endif() + +-if(METIS_FOUND) +- set( branson_deps "METIS::metis;${branson_deps}") ++if(ENABLE_OPENMP) ++ set(USE_OPENMP ON) ++else() ++ set(USE_OPENMP OFF) + endif() + +-if( VIZ_LIBRARIES_FOUND ) +- set( branson_deps "Silo::silo;${HDF5_LIBRARIES};${branson_deps}" ) +-endif() ++include(find_tpls) ++setupTPLs() + +-if(caliper_FOUND) +- set( branson_deps "caliper;${branson_deps}") ++if(OpenMP_FOUND) ++ list(APPEND branson_deps ++ OpenMP::OpenMP_CXX ++ ) + endif() + +-#------------------------------------------------------------------------------# +-# Generate config.h +- +-configure_file(config.h.in ${PROJECT_BINARY_DIR}/config.h) +-#------------------------------------------------------------------------------# +- +-if("${GPU_DBS_STRING}" STREQUAL "CUDA" ) +- message("Setting CUDA compiler options") +- #set_target_properties(BRANSON PROPERTIES CUDA_ARCHITECTURES "70") # V100 +- set_target_properties(BRANSON PROPERTIES CUDA_ARCHITECTURES "80") # A100 +- set_target_properties(BRANSON PROPERTIES CUDA_STANDARD 17) +- string(APPEND CMAKE_CUDA_FLAGS " -g --expt-relaxed-constexpr") +- string(APPEND CMAKE_CUDA_FLAGS " --expt-extended-lambda" ) +- set_source_files_properties("main.cc" PROPERTIES LANGUAGE CUDA) +-elseif("${GPU_DBS_STRING}" STREQUAL "HIP" ) +- message("Setting HIP compiler options") +- if(CMAKE_HIP_COMPILER MATCHES "CC") +- set(CMAKE_HIP_FLAGS "-x hip") +- endif() +- set_target_properties(BRANSON PROPERTIES HIP_ARCHITECTURES "gfx942") # MI300 +- set_target_properties(BRANSON PROPERTIES HIP_STANDARD 17) +- string(APPEND CMAKE_HIP_FLAGS " -g -mllvm=--disable-peephole") +- #string(APPEND CMAKE_HIP_FLAGS " --expt-extended-lambda" ) +- set_source_files_properties("main.cc" PROPERTIES LANGUAGE HIP) +-else() +- message("GPU Options: Not a GPU build or GPU_DBS_STRING not recognized") ++if(METIS_FOUND) ++ list(APPEND branson_deps ++ METIS::metis) + endif() + +-target_link_libraries( BRANSON PRIVATE ${branson_deps} ) +- +-#------------------------------------------------------------------------------# +-# Testing ++if(VIZ_LIBRARIES_FOUND) ++ list(APPEND branson_deps ++ Silo::silo ++ ${HDF5_LIBRARIES} ++ ) ++endif() + +-option( BUILD_TESTING "Should we compile the tests?" ON ) ++option(BUILD_TESTING "Should we compile the tests?" ON) + if(BUILD_TESTING) + enable_testing() + add_subdirectory(test) +@@ -252,11 +319,38 @@ else() + "Building tests disabled, set BUILD_TESTING=TRUE or don't set BUILD_TESTING to enable test builds") + endif() + ++#------------------------------------------------------------------------------# ++# Generate config.h ++ ++configure_file(config.h.in ${PROJECT_BINARY_DIR}/config.h) ++#------------------------------------------------------------------------------# ++ ++#------------------------------------------------------------------------------# ++# Targets ++file(GLOB headers *.h) ++add_executable(BRANSON main.cc ${headers}) ++target_link_libraries(BRANSON PUBLIC ${branson_deps}) ++target_include_directories(BRANSON PRIVATE ++ ${CMAKE_BINARY_DIR} ${PROJECT_SOURCE_DIR}/pugixml/src/ ${HIP_INCLUDE_DIRS}) ++ ++if(ENABLE_CUDA) ++ set_target_properties(BRANSON ++ PROPERTIES ++ CUDA_SEPARABLE_COMPILATION ON ++ CUDA_RESOLVE_DEVICE_SYMBOLS ON) ++endif() ++ ++if(ENABLE_HIP) ++ set_target_properties(BRANSON ++ PROPERTIES ++ HIP_SEPARABLE_COMPILATION ON ++ HIP_RESOLVE_DEVICE_SYMBOLS ON) ++endif() + + #------------------------------------------------------------------------------# + # Targets for installation + +-install(TARGETS BRANSON DESTINATION bin) ++install(TARGETS BRANSON DESTINATION ${CMAKE_INSTALL_BINDIR}) + + #------------------------------------------------------------------------------# + # End src/CMakeLists.txt diff --git a/repo/branson/branson_power9.patch b/repo/branson/branson_power9.patch new file mode 100644 index 000000000..ea3110173 --- /dev/null +++ b/repo/branson/branson_power9.patch @@ -0,0 +1,11 @@ +--- a/src/random123/features/gccfeatures.h ++++ b/src/random123/features/gccfeatures.h +@@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + #endif + + #ifdef __powerpc__ +-#include ++//#include + #endif + + #ifndef R123_STATIC_INLINE diff --git a/repo/branson/package.py b/repo/branson/package.py index 14e31c343..ac260e976 100644 --- a/repo/branson/package.py +++ b/repo/branson/package.py @@ -6,8 +6,10 @@ from spack.package import * from spack.pkg.builtin.boost import Boost +import os -class Branson(CMakePackage): + +class Branson(CMakePackage, CudaPackage, ROCmPackage): """Branson's purpose is to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition.""" @@ -30,6 +32,11 @@ class Branson(CMakePackage): ) version("0.81", sha256="493f720904791f06b49ff48c17a681532c6a4d9fa59636522cf3f9700e77efe4") version("0.8", sha256="85ffee110f89be00c37798700508b66b0d15de1d98c54328b6d02a9eb2cf1cb8") + + variant("openmp", default=False, description="Enable OpenMP support") + variant("caliper", default=False, description="Enable Caliper monitoring") + variant("n_groups", default=30, values=int, description="Number of groups") + #depends_on("mpi") depends_on("mpi@2:") @@ -39,16 +46,70 @@ class Branson(CMakePackage): depends_on(Boost.with_default_variants, when="@:0.81") depends_on("metis") depends_on("parmetis", when="@:0.81") + depends_on("caliper", when="+caliper") + depends_on("adiak", when="+caliper") root_cmakelists_dir = "src" + flag_handler = build_system_flags + + patch("branson_cmake.patch") + patch("branson_power9.patch") + + def setup_build_environment(self, env): + if "+cuda" in self.spec: + env.set("NVCC_APPEND_FLAGS", "-allow-unsupported-compiler") + + def patch(self): + ppu_intrinsics_file = os.path.join(self.stage.source_path, "src", "random123", "features", "ppu_intrinsics.h") + with open(ppu_intrinsics_file , "w") as f: + pass + def cmake_args(self): spec = self.spec args = [] - #args.append("--enable-mpi") - args.append(f"-DCMAKE_C_COMPILER={spec['mpi'].mpicc}") - args.append(f"-DCMAKE_CXX_COMPILER={spec['mpi'].mpicxx}") + + args.append(f"-DMPI_C_COMPILER={spec['mpi'].mpicc}") + args.append(f"-DMPI_CXX_COMPILER={spec['mpi'].mpicxx}") args.append(f"-DCMAKE_Fortran_COMPILER={spec['mpi'].mpifc}") + + args.append(f"-DMETIS_ROOT_DIR={spec['metis'].prefix}") + + if '+cuda' in spec: + args.append("-DENABLE_CUDA=ON") + args.append(f"-DCMAKE_CUDA_COMPILER={spec['cuda'].prefix}/bin/nvcc") + cuda_arch_vals = spec.variants["cuda_arch"].value + if cuda_arch_vals: + cuda_arch_sorted = list(sorted(cuda_arch_vals, reverse=True)) + cuda_arch = cuda_arch_sorted[0] + args.append(f"-DCUDA_ARCH={cuda_arch}") + else: + args.append("-DENABLE_CUDA=OFF") + + if '+rocm' in spec: + args.append("-DENABLE_HIP=ON") + rocm_arch_vals = spec.variants["amdgpu_target"].value + args.append(f"-DROCM_PATH={spec['hip'].prefix}") + args.append(f"-DHIP_PATH={spec['hip'].prefix}/hip") + if rocm_arch_vals: + rocm_arch_sorted = list(sorted(rocm_arch_vals, reverse=True)) + rocm_arch = rocm_arch_sorted[0] + args.append(f"-DROCM_ARCH={rocm_arch}") + args.append(f"-DHIP_ARCH={rocm_arch}") + else: + args.append("-DENABLE_HIP=OFF") + + args.append(self.define_from_variant("ENABLE_OPENMP", "openmp")) + + if '+caliper' in spec: + args.append(self.define_from_variant("ENABLE_CALIPER", "caliper")) + args.append(f"-Dcaliper_DIR={spec['caliper'].prefix}") + + args.append("-DBUILD_TESTING=OFF") + args.append(f"-DN_GROUPS={self.spec.variants['n_groups'].value}") + + args.append(f"-DMPI_CXX_LINK_FLAGS={spec['mpi'].libs.ld_flags}") + return args def install(self, spec, prefix): diff --git a/repo/caliper/for_aarch64.patch b/repo/caliper/for_aarch64.patch new file mode 100644 index 000000000..d3fed9a09 --- /dev/null +++ b/repo/caliper/for_aarch64.patch @@ -0,0 +1,11 @@ +--- spack-src/src/services/callpath/Callpath.cpp.bak 2020-10-28 14:38:19.668122844 +0900 ++++ spack-src/src/services/callpath/Callpath.cpp 2020-10-28 15:03:12.258061188 +0900 +@@ -63,7 +63,7 @@ + unw_context_t unw_ctx; + unw_cursor_t unw_cursor; + +- unw_getcontext(&unw_ctx); ++ unw_getcontext(unw_ctx); + + if (unw_init_local(&unw_cursor, &unw_ctx) < 0) { + Log(0).stream() << "callpath: unable to init libunwind cursor" << endl; diff --git a/repo/caliper/package.py b/repo/caliper/package.py new file mode 100644 index 000000000..1b13781bd --- /dev/null +++ b/repo/caliper/package.py @@ -0,0 +1,291 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys + +from spack.package import * + + +class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage): + """Caliper is a program instrumentation and performance measurement + framework. It is designed as a performance analysis toolbox in a + library, allowing one to bake performance analysis capabilities + directly into applications and activate them at runtime. + """ + + homepage = "https://github.com/LLNL/Caliper" + git = "https://github.com/LLNL/Caliper.git" + url = "https://github.com/LLNL/Caliper/archive/v2.12.1.tar.gz" + tags = ["e4s", "radiuss"] + + maintainers("daboehme", "adrienbernede") + + test_requires_compiler = True + + license("BSD-3-Clause") + + version("master", branch="master") + version("2.12.1", sha256="2b5a8f98382c94dc75cc3f4517c758eaf9a3f9cea0a8dbdc7b38506060d6955c") + version("2.11.0", sha256="b86b733cbb73495d5f3fe06e6a9885ec77365c8aa9195e7654581180adc2217c") + version("2.10.0", sha256="14c4fb5edd5e67808d581523b4f8f05ace8549698c0e90d84b53171a77f58565") + version("2.9.1", sha256="4771d630de505eff9227e0ec498d0da33ae6f9c34df23cb201b56181b8759e9e") + version("2.9.0", sha256="507ea74be64a2dfd111b292c24c4f55f459257528ba51a5242313fa50978371f") + version( + "2.8.0", + sha256="17807b364b5ac4b05997ead41bd173e773f9a26ff573ff2fe61e0e70eab496e4", + deprecated=True, + ) + version( + "2.7.0", + sha256="b3bf290ec2692284c6b4f54cc0c507b5700c536571d3e1a66e56626618024b2b", + deprecated=True, + ) + version( + "2.6.0", + sha256="6efcd3e4845cc9a6169e0d934840766b12182c6d09aa3ceca4ae776e23b6360f", + deprecated=True, + ) + version( + "2.5.0", + sha256="d553e60697d61c53de369b9ca464eb30710bda90fba9671201543b64eeac943c", + deprecated=True, + ) + version( + "2.4.0", tag="v2.4.0", commit="30577b4b8beae104b2b35ed487fec52590a99b3d", deprecated=True + ) + version( + "2.3.0", tag="v2.3.0", commit="9fd89bb0120750d1f9dfe37bd963e24e478a2a20", deprecated=True + ) + version( + "2.2.0", tag="v2.2.0", commit="c408e9b3642c7aa80eff37b0826d819c57e7bc04", deprecated=True + ) + version( + "2.1.1", tag="v2.1.1", commit="0593b0e01c1d8d3e50c990399cc0fee403485599", deprecated=True + ) + version( + "2.0.1", tag="v2.0.1", commit="4d7ff46381c53a461e62edd949e2d9dea9db7b08", deprecated=True + ) + version( + "1.9.1", tag="v1.9.1", commit="cfc1defbbee20b50dd3e3477badd09a92b1df970", deprecated=True + ) + version( + "1.9.0", tag="v1.9.0", commit="8356e747349b285aa621c5b74e71559f0babc4a1", deprecated=True + ) + version( + "1.8.0", tag="v1.8.0", commit="117c1ef596b617dc71407b8b67eebef094a654f8", deprecated=True + ) + version( + "1.7.0", tag="v1.7.0", commit="898277c93d884d4e7ca1ffcf3bbea81d22364f26", deprecated=True + ) + + #depends_on("c", type="build") # generated + #depends_on("cxx", type="build") # generated + #depends_on("fortran", type="build") # generated + + is_linux = sys.platform.startswith("linux") + variant("shared", default=True, description="Build shared libraries") + variant("adiak", default=True, description="Enable Adiak support") + variant("mpi", default=True, description="Enable MPI support") + # libunwind has some issues on Mac + variant( + "libunwind", default=sys.platform != "darwin", description="Enable stack unwind support" + ) + variant("libdw", default=is_linux, description="Enable DWARF symbol lookup") + # pthread_self() signature is incompatible with PAPI_thread_init() on Mac + variant("papi", default=sys.platform != "darwin", description="Enable PAPI service") + variant("libpfm", default=False, description="Enable libpfm (perf_events) service") + # Gotcha is Linux-only + variant("gotcha", default=is_linux, description="Enable GOTCHA support") + variant("sampler", default=is_linux, description="Enable sampling support on Linux") + variant("sosflow", default=False, description="Enable SOSflow support") + variant("fortran", default=False, description="Enable Fortran support") + variant("variorum", default=False, description="Enable Variorum support") + variant("vtune", default=False, description="Enable Intel Vtune support") + variant("kokkos", default=True, when="@2.3.0:", description="Enable Kokkos profiling support") + variant("tests", default=False, description="Enable tests") + variant("tools", default=True, description="Enable tools") + variant("python", default=False, when="@v2.12:", description="Build Python bindings") + + depends_on("adiak@0.1:0", when="@2.2:2.10 +adiak") + depends_on("adiak@0.4:0", when="@2.11: +adiak") + + depends_on("papi@5.3:5", when="@:2.2 +papi") + depends_on("papi@5.3:", when="@2.3: +papi") + + depends_on("libpfm4@4.8:4", when="+libpfm") + + depends_on("mpi", when="+mpi") + depends_on("unwind@1.2:1", when="+libunwind") + depends_on("elfutils", when="+libdw") + depends_on("variorum", when="+variorum") + depends_on("intel-oneapi-vtune", when="+vtune") + + depends_on("sosflow@spack", when="@1.0:1+sosflow") + + depends_on("cmake", type="build") + depends_on("python", type="build") + + depends_on("python@3", when="+python", type=("build", "link", "run")) + depends_on("py-pybind11", when="+python", type=("build", "link", "run")) + + # sosflow support not yet in 2.0 + conflicts("+sosflow", "@2:") + conflicts("+adiak", "@:2.1") + conflicts("+libdw", "@:2.4") + conflicts("+rocm", "@:2.7") + conflicts("+rocm+cuda") + + patch("for_aarch64.patch", when="@:2.11 target=aarch64:") + patch( + "sampler-service-missing-libunwind-include-dir.patch", + when="@2.9.0:2.9.1 +libunwind +sampler", + ) + + def _get_sys_type(self, spec): + sys_type = spec.architecture + if "SYS_TYPE" in env: + sys_type = env["SYS_TYPE"] + return sys_type + + def initconfig_compiler_entries(self): + spec = self.spec + entries = super().initconfig_compiler_entries() + + if spec.satisfies("+rocm"): + entries.insert(0, cmake_cache_path("CMAKE_CXX_COMPILER", spec["hip"].hipcc)) + + entries.append(cmake_cache_option("WITH_FORTRAN", spec.satisfies("+fortran"))) + + entries.append(cmake_cache_option("BUILD_SHARED_LIBS", spec.satisfies("+shared"))) + entries.append(cmake_cache_option("BUILD_TESTING", spec.satisfies("+tests"))) + entries.append(cmake_cache_option("WITH_TOOLS", spec.satisfies("+tools"))) + entries.append(cmake_cache_option("BUILD_DOCS", False)) + entries.append(cmake_cache_path("PYTHON_EXECUTABLE", spec["python"].command.path)) + + return entries + + def initconfig_hardware_entries(self): + spec = self.spec + entries = super().initconfig_hardware_entries() + + if spec.satisfies("+cuda"): + entries.append(cmake_cache_option("WITH_CUPTI", True)) + entries.append(cmake_cache_option("WITH_NVTX", True)) + entries.append(cmake_cache_path("CUDA_TOOLKIT_ROOT_DIR", spec["cuda"].prefix)) + entries.append(cmake_cache_path("CUPTI_PREFIX", spec["cuda"].prefix)) + else: + entries.append(cmake_cache_option("WITH_CUPTI", False)) + entries.append(cmake_cache_option("WITH_NVTX", False)) + + if spec.satisfies("+rocm"): + entries.append(cmake_cache_option("WITH_ROCTRACER", True)) + entries.append(cmake_cache_option("WITH_ROCTX", True)) + else: + entries.append(cmake_cache_option("WITH_ROCTRACER", False)) + entries.append(cmake_cache_option("WITH_ROCTX", False)) + + return entries + + def initconfig_mpi_entries(self): + spec = self.spec + entries = super().initconfig_mpi_entries() + + entries.append(cmake_cache_option("WITH_MPI", spec.satisfies("+mpi"))) + + return entries + + def initconfig_package_entries(self): + spec = self.spec + entries = [] + + # TPL locations + entries.append("#------------------{0}".format("-" * 60)) + entries.append("# TPLs") + entries.append("#------------------{0}\n".format("-" * 60)) + + if spec.satisfies("+adiak"): + entries.append(cmake_cache_path("adiak_DIR", spec["adiak"].prefix)) + if spec.satisfies("+papi"): + entries.append(cmake_cache_path("PAPI_PREFIX", spec["papi"].prefix)) + if spec.satisfies("+libdw"): + entries.append(cmake_cache_path("LIBDW_PREFIX", spec["elfutils"].prefix)) + if spec.satisfies("+libpfm"): + entries.append(cmake_cache_path("LIBPFM_INSTALL", spec["libpfm4"].prefix)) + if spec.satisfies("+sosflow"): + entries.append(cmake_cache_path("SOS_PREFIX", spec["sosflow"].prefix)) + if spec.satisfies("+variorum"): + entries.append(cmake_cache_path("VARIORUM_PREFIX", spec["variorum"].prefix)) + if spec.satisfies("+vtune"): + itt_dir = join_path(spec["intel-oneapi-vtune"].prefix, "vtune", "latest") + entries.append(cmake_cache_path("ITT_PREFIX", itt_dir)) + if spec.satisfies("+libunwind"): + entries.append(cmake_cache_path("LIBUNWIND_PREFIX", spec["unwind"].prefix)) + + # Build options + entries.append("#------------------{0}".format("-" * 60)) + entries.append("# Build Options") + entries.append("#------------------{0}\n".format("-" * 60)) + + entries.append(cmake_cache_option("WITH_ADIAK", spec.satisfies("+adiak"))) + entries.append(cmake_cache_option("WITH_GOTCHA", spec.satisfies("+gotcha"))) + entries.append(cmake_cache_option("WITH_SAMPLER", spec.satisfies("+sampler"))) + entries.append(cmake_cache_option("WITH_PAPI", spec.satisfies("+papi"))) + entries.append(cmake_cache_option("WITH_LIBDW", spec.satisfies("+libdw"))) + entries.append(cmake_cache_option("WITH_LIBPFM", spec.satisfies("+libpfm"))) + entries.append(cmake_cache_option("WITH_SOSFLOW", spec.satisfies("+sosflow"))) + entries.append(cmake_cache_option("WITH_KOKKOS", spec.satisfies("+kokkos"))) + entries.append(cmake_cache_option("WITH_VARIORUM", spec.satisfies("+variorum"))) + entries.append(cmake_cache_option("WITH_VTUNE", spec.satisfies("+vtune"))) + entries.append(cmake_cache_option("WITH_PYTHON_BINDINGS", spec.satisfies("+python"))) + + # -DWITH_CALLPATH was renamed -DWITH_LIBUNWIND in 2.5 + callpath_flag = "LIBUNWIND" if spec.satisfies("@2.5:") else "CALLPATH" + entries.append(cmake_cache_option("WITH_%s" % callpath_flag, spec.satisfies("+libunwind"))) + + return entries + + def cmake_args(self): + return [] + + def setup_run_environment(self, env): + if self.spec.satisfies("+python"): + env.prepend_path("PYTHONPATH", self.spec.prefix.join(python_platlib)) + env.prepend_path("PYTHONPATH", self.spec.prefix.join(python_purelib)) + + @run_after("install") + def cache_test_sources(self): + """Copy the example source files after the package is installed to an + install test subdirectory for use during `spack test run`.""" + cache_extra_test_sources(self, [join_path("examples", "apps")]) + + def test_cxx_example(self): + """build and run cxx-example""" + + exe = "cxx-example" + source_file = "{0}.cpp".format(exe) + + source_path = find_required_file( + self.test_suite.current_test_cache_dir, source_file, expected=1, recursive=True + ) + + lib_dir = self.prefix.lib if os.path.exists(self.prefix.lib) else self.prefix.lib64 + + cxx = which(os.environ["CXX"]) + test_dir = os.path.dirname(source_path) + with working_dir(test_dir): + cxx( + "-L{0}".format(lib_dir), + "-I{0}".format(self.prefix.include), + source_path, + "-o", + exe, + "-std=c++11", + "-lcaliper", + "-lstdc++", + ) + + cxx_example = which(exe) + cxx_example() diff --git a/repo/caliper/sampler-service-missing-libunwind-include-dir.patch b/repo/caliper/sampler-service-missing-libunwind-include-dir.patch new file mode 100644 index 000000000..4acd660f6 --- /dev/null +++ b/repo/caliper/sampler-service-missing-libunwind-include-dir.patch @@ -0,0 +1,14 @@ +diff -ruN spack-src/src/services/sampler/CMakeLists.txt spack-src-patched/src/services/sampler/CMakeLists.txt +--- spack-src/src/services/sampler/CMakeLists.txt 2022-11-30 13:52:42.000000000 -0500 ++++ spack-src-patched/src/services/sampler/CMakeLists.txt 2023-05-04 20:43:47.240310306 -0400 +@@ -17,6 +17,10 @@ + + include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + ++if (CALIPER_HAVE_LIBUNWIND) ++ include_directories(${LIBUNWIND_INCLUDE_DIRS}) ++endif() ++ + add_library(caliper-sampler OBJECT ${CALIPER_SAMPLER_SOURCES}) + + add_service_objlib("caliper-sampler") diff --git a/repo/cray-mpich/package.py b/repo/cray-mpich/package.py index b0f2e1561..36d80d64b 100644 --- a/repo/cray-mpich/package.py +++ b/repo/cray-mpich/package.py @@ -17,8 +17,8 @@ def libs(self): if self.spec.satisfies("+gtl"): gtl_lib_prefix = self.spec.extra_attributes["gtl_lib_path"] - libraries = ["libmpi_gtl_hsa"] - libs += find_libraries(libraries, root=gtl_lib_prefix, recursive=True) + gtl_libs = self.spec.extra_attributes["gtl_libs"] + libs += find_libraries(gtl_libs, root=gtl_lib_prefix, recursive=True) return libs @@ -29,8 +29,6 @@ def setup_run_environment(self, env): if self.spec.satisfies("+gtl"): env.set("MPICH_GPU_SUPPORT_ENABLED", "1") env.prepend_path("LD_LIBRARY_PATH", self.spec.extra_attributes["gtl_lib_path"]) - env.set("GTL_HSA_VSMSG_CUTOFF_SIZE", str(self.spec.extra_attributes["gtl_cutoff_size"])) - env.set("FI_CXI_ATS", str(self.spec.extra_attributes["fi_cxi_ats"])) else: env.set("MPICH_GPU_SUPPORT_ENABLED", "0") gtl_path = self.spec.extra_attributes.get("gtl_lib_path", "") diff --git a/systems/lanl-venado/compilers/cce/00-cce-18-compilers.yaml b/systems/lanl-venado/compilers/cce/00-cce-18-compilers.yaml new file mode 100644 index 000000000..7ce4aeb35 --- /dev/null +++ b/systems/lanl-venado/compilers/cce/00-cce-18-compilers.yaml @@ -0,0 +1,25 @@ +compilers: +- compiler: + spec: cce@18.0.0 + paths: + cc: /opt/cray/pe/cce/18.0.0/bin/craycc + cxx: /opt/cray/pe/cce/18.0.0/bin/crayCC + f77: /opt/cray/pe/cce/18.0.0/bin/crayftn + fc: /opt/cray/pe/cce/18.0.0/bin/crayftn + flags: + cflags: -g -O2 --gcc-toolchain=/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0 + cxxflags: -g -O2 --gcc-toolchain=/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0 + fflags: -g -O2 -hnopattern + ldflags: -ldl + operating_system: sles15 + target: aarch64 + modules: [] + environment: + prepend_path: + LD_LIBRARY_PATH: "/opt/cray/pe/cce/18.0.0/cce/aarch64/lib:/opt/cray/libfabric/1.20.1/lib64:/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib:/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib64:/opt/cray/pe/gcc-libs" + extra_rpaths: + - /opt/cray/pe/gcc-libs + - /opt/cray/pe/cce/18.0.0/cce/aarch64/lib + - /opt/cray/libfabric/1.20.1/lib64 + - /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib + - /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib64 diff --git a/systems/lanl-venado/compilers/gcc/00-gcc-12-compilers.yaml b/systems/lanl-venado/compilers/gcc/00-gcc-12-compilers.yaml new file mode 100644 index 000000000..f28815e8f --- /dev/null +++ b/systems/lanl-venado/compilers/gcc/00-gcc-12-compilers.yaml @@ -0,0 +1,14 @@ +compilers: +- compiler: + spec: gcc@12.3.0 + paths: + cc: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/gcc + cxx: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/g++ + f77: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/gfortran + fc: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/gfortran + flags: {} + operating_system: sles15 + target: aarch64 + modules: [] + environment: {} + extra_rpaths: [] diff --git a/systems/lanl-venado/externals/base/00-packages.yaml b/systems/lanl-venado/externals/base/00-packages.yaml new file mode 100644 index 000000000..63682360b --- /dev/null +++ b/systems/lanl-venado/externals/base/00-packages.yaml @@ -0,0 +1,43 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +packages: + tar: + externals: + - spec: tar@1.34 + prefix: /usr + buildable: false + cmake: + externals: + - spec: cmake@3.29.6 + prefix: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/packages/cmake/cmake-3.29.6 + buildable: false + gmake: + externals: + - spec: gmake@4.2.1 + prefix: /usr + buildable: false + automake: + externals: + - spec: automake@1.15.1 + prefix: /usr + buildable: false + autoconf: + externals: + - spec: autoconf@2.69 + prefix: /usr + buildable: false + fftw: + externals: + - spec: fftw@3.3.10.8 + prefix: /opt/cray/pe/fftw/3.3.10.8/arm_grace + buildable: false + python: + externals: + - spec: python@3.10.9 + prefix: /usr/projects/hpcsoft/common/aarch64/anaconda/2023.03-python-3.10 + buildable: false + mpi: + buildable: false diff --git a/systems/lanl-venado/externals/libsci/00-gcc-packages.yaml b/systems/lanl-venado/externals/libsci/00-gcc-packages.yaml new file mode 100644 index 000000000..f58d5a59a --- /dev/null +++ b/systems/lanl-venado/externals/libsci/00-gcc-packages.yaml @@ -0,0 +1,5 @@ +packages: + cray-libsci: + externals: + - spec: cray-libsci@24.07.0%gcc + prefix: /opt/cray/pe/libsci/24.07.0/gnu/12.3/aarch64 diff --git a/systems/lanl-venado/externals/libsci/01-cce-packages.yaml b/systems/lanl-venado/externals/libsci/01-cce-packages.yaml new file mode 100644 index 000000000..8c1608b6b --- /dev/null +++ b/systems/lanl-venado/externals/libsci/01-cce-packages.yaml @@ -0,0 +1,5 @@ +packages: + cray-libsci: + externals: + - spec: cray-libsci@24.07.0%cce + prefix: /opt/cray/pe/libsci/24.07.0/cray/17.0/aarch64 diff --git a/systems/lanl-venado/system.py b/systems/lanl-venado/system.py new file mode 100644 index 000000000..b95ab0f55 --- /dev/null +++ b/systems/lanl-venado/system.py @@ -0,0 +1,215 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + +import pathlib + +from benchpark.directives import variant +from benchpark.system import System + +id_to_resources = { + "grace-hopper": { + "sys_cores_per_node": 144, + "sys_gpus_per_node": 4, + }, + "grace-grace": { + "sys_cores_per_node": 144, + }, +} + + +class LanlVenado(System): + variant( + "cluster", + default="grace-hopper", + values=("grace-hopper", "grace-grace"), + description="Which cluster to run on", + ) + + variant( + "cuda", + default="12-5", + values=("11.8", "12.5"), + description="CUDA version", + ) + + variant( + "compiler", + default="cce", + values=("gcc", "cce"), + description="Which compiler to use", + ) + + variant( + "gtl", + default=False, + values=(True, False), + description="Use GTL-enabled MPI", + ) + + variant( + "lapack", + default="cusolver", + values=("cusolver", "cray-libsci"), + description="Which lapack to use", + ) + + variant( + "blas", + default="cublas", + values=("cublas", "cray-libsci"), + description="Which blas to use", + ) + + def initialize(self): + super().initialize() + + self.scheduler = "slurm" + attrs = id_to_resources.get(self.spec.variants["cluster"][0]) + for k, v in attrs.items(): + setattr(self, k, v) + + def generate_description(self, output_dir): + super().generate_description(output_dir) + + sw_description = pathlib.Path(output_dir) / "software.yaml" + + with open(sw_description, "w") as f: + f.write(self.sw_description()) + + def system_specific_variables(self): + return { + "cuda_arch": "90", + "default_cuda_version": self.spec.variants["cuda"][0], + "extra_batch_opts": '"-A llnl_ai_g -pgpu"', + } + + def external_pkg_configs(self): + externals = LanlVenado.resource_location / "externals" + + selections = [externals / "base" / "00-packages.yaml"] + + cuda_cfg_path = self.next_adhoc_cfg() + with open(cuda_cfg_path, "w") as f: + f.write(self.cuda_config(self.spec.variants["cuda"][0])) + selections.append(cuda_cfg_path) + + mpi_cfg_path = self.next_adhoc_cfg() + with open(mpi_cfg_path, "w") as f: + f.write(self.mpi_config()) + selections.append(mpi_cfg_path) + + if self.spec.satisfies("compiler=cce"): + selections.append(externals / "libsci" / "01-cce-packages.yaml") + elif self.spec.satisfies("compiler=gcc"): + selections.append(externals / "libsci" / "00-gcc-packages.yaml") + + return selections + + def compiler_configs(self): + compilers = LanlVenado.resource_location / "compilers" + + selections = [] + # TODO: Construct/extract/customize compiler information from the working set + if self.spec.satisfies("compiler=cce"): + selections.append(compilers / "cce" / "00-cce-18-compilers.yaml") + selections.append(compilers / "gcc" / "00-gcc-12-compilers.yaml") + + return selections + + def mpi_config(self): + mpi_version = "8.1.30" + gtl = ( + "+gtl" + if self.spec.satisfies("compiler=cce") and self.spec.satisfies("+gtl") + else "~gtl" + ) + + # TODO: Construct/extract this information from the working set + if self.spec.satisfies("compiler=cce"): + compiler = "cce@18.0.0" + mpi_compiler_suffix = "crayclang/17.0" + elif self.spec.satisfies("compiler=gcc"): + compiler = "gcc@12.3.0" + mpi_compiler_suffix = "gnu/12.3" + + return f"""\ +packages: + cray-mpich: + externals: + - spec: cray-mpich@{mpi_version}%{compiler} {gtl} +wrappers + prefix: /opt/cray/pe/mpich/{mpi_version}/ofi/{mpi_compiler_suffix} + extra_attributes: + gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib + gtl_libs: ["libmpi_gtl_cuda"] + ldflags: "-L/opt/cray/pe/mpich/{mpi_version}/ofi/{mpi_compiler_suffix}/lib -lmpi -L/opt/cray/pe/mpich/{mpi_version}/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/{mpi_version}/gtl/lib -lmpi_gtl_cuda" +""" + + def cuda_config(self, cuda_version): + template = """\ +packages: + blas: + require: + - {blas} + lapack: + require: + - {lapack} + curand: + externals: + - spec: curand@{x} + prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x} + buildable: false + cusparse: + externals: + - spec: cusparse@{x} + prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x} + buildable: false + cuda: + externals: + - spec: cuda@{x} + prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x} + buildable: false + cub: + externals: + - spec: cub@{x} + prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x} + buildable: false + cublas: + externals: + - spec: cublas@{x} + prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/math_libs/{x} + buildable: false + cusolver: + externals: + - spec: cusolver@{x} + prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/math_libs/{x} + buildable: false +""" + return template.format( + x=cuda_version, + blas=self.spec.variants["blas"][0], + lapack=self.spec.variants["lapack"][0], + ) + + def sw_description(self): + """This is somewhat vestigial: for the Tioga config that is committed + to the repo, multiple instances of mpi/compilers are stored and + and these variables were used to choose consistent dependencies. + The configs generated by this class should only ever have one + instance of MPI etc., so there is no need for that. The experiments + will fail if these variables are not defined though, so for now + they are still generated (but with more-generic values). + """ + return f"""\ +software: + packages: + default-compiler: + pkg_spec: {self.spec.variants["compiler"][0]} + default-mpi: + pkg_spec: cray-mpich + default-lapack: + pkg_spec: {self.spec.variants["lapack"][0]} + default-blas: + pkg_spec: {self.spec.variants["blas"][0]} +""" diff --git a/systems/llnl-elcapitan/system.py b/systems/llnl-elcapitan/system.py index dbb1142c0..b8b2c9a7a 100644 --- a/systems/llnl-elcapitan/system.py +++ b/systems/llnl-elcapitan/system.py @@ -105,30 +105,8 @@ def external_pkg_configs(self): elif self.spec.satisfies("compiler=gcc"): selections.append(externals / "libsci" / "00-gcc-packages.yaml") - cmp_preference_path = self.next_adhoc_cfg() - with open(cmp_preference_path, "w") as f: - f.write(self.compiler_weighting_cfg()) - selections.append(cmp_preference_path) - return selections - def compiler_weighting_cfg(self): - compiler = self.spec.variants["compiler"][0] - - if compiler == "cce": - return """\ -packages: - all: - require: - - one_of: ["%cce", "@:"] -""" - elif compiler == "gcc": - return """\ -packages: {} -""" - else: - raise ValueError(f"Unexpected value for compiler: {compiler}") - def compiler_configs(self): compilers = LlnlElcapitan.resource_location / "compilers" @@ -163,6 +141,7 @@ def mpi_config(self, cce_version): gtl_cutoff_size: 4096 fi_cxi_ats: 0 gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib + gtl_libs: ["libmpi_gtl_hsa"] ldflags: "-L/opt/cray/pe/mpich/{mpi_version}/ofi/crayclang/{short_cce_version}/lib -lmpi -L/opt/cray/pe/mpich/{mpi_version}/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/{mpi_version}/gtl/lib -lmpi_gtl_hsa" """