diff --git a/experiments/branson/experiment.py b/experiments/branson/experiment.py
new file mode 100644
index 000000000..7a8ea3f7d
--- /dev/null
+++ b/experiments/branson/experiment.py
@@ -0,0 +1,125 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from benchpark.error import BenchparkError
+from benchpark.directives import variant
+from benchpark.experiment import Experiment
+from benchpark.openmp import OpenMPExperiment
+from benchpark.cuda import CudaExperiment
+from benchpark.rocm import ROCmExperiment
+from benchpark.scaling import StrongScaling
+from benchpark.scaling import WeakScaling
+from benchpark.expr.builtin.caliper import Caliper
+
+
+class Branson(
+ Experiment,
+ OpenMPExperiment,
+ CudaExperiment,
+ ROCmExperiment,
+ StrongScaling,
+ WeakScaling,
+ Caliper,
+):
+ variant(
+ "workload",
+ default="branson",
+ description="workload name",
+ )
+
+ variant(
+ "version",
+ default="develop",
+ description="app version",
+ )
+
+ variant(
+ "n_groups",
+ default="30",
+ values=int,
+ description="Number of groups",
+ )
+
+ def compute_applications_section(self):
+ # TODO: Replace with conflicts clause
+ scaling_modes = {
+ "strong": self.spec.satisfies("+strong"),
+ "weak": self.spec.satisfies("+weak"),
+ "single_node": self.spec.satisfies("+single_node"),
+ }
+
+ scaling_mode_enabled = [key for key, value in scaling_modes.items() if value]
+ if len(scaling_mode_enabled) != 1:
+ raise BenchparkError(
+ f"Only one type of scaling per experiment is allowed for application package {self.name}"
+ )
+
+ # Number of processes in each dimension
+ num_nodes = {"n_nodes": 1}
+
+ # Per-process size (in zones) in each dimension
+ num_particles = {"num_particles": 850000000}
+
+ if self.spec.satisfies("+single_node"):
+ for pk, pv in num_nodes.items():
+ self.add_experiment_variable(pk, pv, True)
+ for nk, nv in num_particles.items():
+ self.add_experiment_variable(nk, nv, True)
+ elif self.spec.satisfies("+strong"):
+ scaled_variables = self.generate_strong_scaling_params(
+ {tuple(num_nodes.keys()): list(num_nodes.values())},
+ int(self.spec.variants["scaling-factor"][0]),
+ int(self.spec.variants["scaling-iterations"][0]),
+ )
+ for pk, pv in scaled_variables.items():
+ self.add_experiment_variable(pk, pv, True)
+ for nk, nv in num_particles.items():
+ self.add_experiment_variable(nk, nv, True)
+ elif self.spec.satisfies("+weak"):
+ scaled_variables = self.generate_weak_scaling_params(
+ {tuple(num_nodes.keys()): list(num_nodes.values())},
+ {tuple(num_particles.keys()): list(num_particles.values())},
+ int(self.spec.variants["scaling-factor"][0]),
+ int(self.spec.variants["scaling-iterations"][0]),
+ )
+ for k, v in scaled_variables.items():
+ self.add_experiment_variable(k, v, True)
+
+ self.add_experiment_variable(
+ "use_gpu",
+ (
+ "TRUE"
+ if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm")
+ else "FALSE"
+ ),
+ )
+
+ self.add_experiment_variable("n_ranks", "{n_nodes}*{sys_cores_per_node}", True)
+
+ def compute_spack_section(self):
+ # get package version
+ app_version = self.spec.variants["version"][0]
+
+ # get system config options
+ # TODO: Get compiler/mpi/package handles directly from system.py
+ system_specs = {}
+ system_specs["compiler"] = "default-compiler"
+ system_specs["mpi"] = "default-mpi"
+ if self.spec.satisfies("+cuda"):
+ system_specs["cuda_version"] = "{default_cuda_version}"
+ system_specs["cuda_arch"] = "{cuda_arch}"
+ if self.spec.satisfies("+rocm"):
+ system_specs["rocm_arch"] = "{rocm_arch}"
+
+ # set package spack specs
+ self.add_spack_spec(system_specs["mpi"])
+
+ self.add_spack_spec(
+ self.name,
+ [
+ f"branson@{app_version} n_groups={self.spec.variants['n_groups'][0]} ",
+ system_specs["compiler"],
+ ],
+ )
diff --git a/legacy/experiments/branson/mpi-only/ramble.yaml b/legacy/experiments/branson/mpi-only/ramble.yaml
new file mode 100644
index 000000000..c4cb4e929
--- /dev/null
+++ b/legacy/experiments/branson/mpi-only/ramble.yaml
@@ -0,0 +1,51 @@
+ramble:
+ applications:
+ branson:
+ workloads:
+ branson:
+ experiments:
+ branson_branson_weak_scaling_caliper_time_mpi_{n_nodes}_{num_particles}_{n_ranks}:
+ exclude: {}
+ matrix: []
+ variables:
+ n_nodes:
+ - 1
+ - 2
+ - 4
+ - 8
+ n_ranks: '{n_nodes}*{sys_cores_per_node}'
+ num_particles:
+ - 850000000
+ - 1700000000
+ - 3400000000
+ - 6800000000
+ variants:
+ package_manager: spack
+ zips: {}
+ config:
+ deprecated: true
+ spack_flags:
+ concretize: -U -f
+ install: --add --keep-stage
+ include:
+ - ./configs
+ modifiers:
+ - name: allocation
+ - mode: mpi
+ name: caliper
+ - mode: time
+ name: caliper
+ software:
+ environments:
+ branson:
+ packages:
+ - caliper
+ - default-mpi
+ - branson
+ packages:
+ branson:
+ compiler: default-compiler
+ pkg_spec: branson@develop+caliper
+ caliper:
+ compiler: default-compiler
+ pkg_spec: caliper@master+adiak+mpi~libunwind~libdw~papi
diff --git a/legacy/experiments/branson/openmp/ramble.yaml b/legacy/experiments/branson/openmp/ramble.yaml
deleted file mode 100644
index e90dc280a..000000000
--- a/legacy/experiments/branson/openmp/ramble.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright 2023 Lawrence Livermore National Security, LLC and other
-# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
-#
-# SPDX-License-Identifier: Apache-2.0
-ramble:
- include:
- - ./configs/software.yaml
- - ./configs/variables.yaml
- - ./configs/modifier.yaml
- config:
- deprecated: true
- spack_flags:
- install: '--add --keep-stage'
- concretize: '-U -f'
-#Branson does not currently work on lassen
- modifiers:
- - name: allocation
- applications:
- branson:
- workloads:
- branson:
- variables:
- n_ranks: '{n_nodes}*{sys_cores_per_node}'
- experiments:
- branson_strong_{n_nodes}:
- variants:
- package_manager: spack
- variables:
- #looks like branson should use ~25% of memory per node
- num_particles: '850000000'
- n_nodes: ['1','2','4','8']
-
- branson_weak_{n_nodes}:
- variants:
- package_manager: spack
- variables:
- num_particles: '850000000*{n_nodes}'
- n_nodes: ['1','2','4','8']
- software:
- packages:
- branson:
- pkg_spec: branson@develop
- compiler: default-compiler
- environments:
- branson:
- packages:
- - default-mpi
- - branson
- - '{modifier_package_name}'
diff --git a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml
index 1c050e729..2528cb68e 100644
--- a/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml
+++ b/legacy/systems/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/auxiliary_software_files/packages.yaml
@@ -302,6 +302,7 @@ packages:
gtl_cutoff_size: 4096
fi_cxi_ats: 0
gtl_lib_path: /opt/cray/pe/mpich/8.1.26/gtl/lib
+ gtl_libs: ["libmpi_gtl_hsa"]
ldflags: "-L/opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0/lib -lmpi -L/opt/cray/pe/mpich/8.1.26/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/8.1.26/gtl/lib -lmpi_gtl_hsa"
- spec: cray-mpich@8.1.26%cce@16.0.0 ~gtl +wrappers
prefix: /opt/cray/pe/mpich/8.1.26/ofi/crayclang/16.0
diff --git a/repo/branson/application.py b/repo/branson/application.py
index 2103c1ca5..51a5e246a 100644
--- a/repo/branson/application.py
+++ b/repo/branson/application.py
@@ -19,14 +19,15 @@ class Branson(ExecutableApplication):
executable('setup_experiment',
template=[
'cp {branson}/inputs/* {experiment_run_dir}/.',
- 'sed -i "s|250000000|{num_particles}|g" {experiment_run_dir}/{input_file}'
+ 'sed -i "s|[0-9]*|{num_particles}|g" {experiment_run_dir}/{input_file}',
+ 'sed -i "s|.*|{use_gpu}|g" {experiment_run_dir}/{input_file}'
])
executable('p', '{branson}/bin/BRANSON {experiment_run_dir}/{input_file}', use_mpi=True)
workload('branson', executables=['setup_experiment','p'])
- workload_variable('input_file', default='3D_hohlraum_multi_node.xml',
+ workload_variable('input_file', default='3D_hohlraum_single_node.xml',
description='input file name',
workloads=['branson'])
diff --git a/repo/branson/branson_cmake.patch b/repo/branson/branson_cmake.patch
new file mode 100644
index 000000000..68af8bf73
--- /dev/null
+++ b/repo/branson/branson_cmake.patch
@@ -0,0 +1,485 @@
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -9,65 +9,160 @@
+ # Enbable debug mode by passing -DCMAKE_BUILD_TYPE=Debug to CMake, default is
+ # Release
+
+-cmake_minimum_required (VERSION 3.11)
++cmake_minimum_required(VERSION 3.21)
+
+-option(USE_GPU "user-set flag to compile in GPU code" FALSE)
++project (BRANSON
++ VERSION 0.8
++ DESCRIPTION "Branson can be used to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition."
++)
++
++## Fail if someone tries to config an in-source build.
++if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
++ message(FATAL_ERROR "In-source builds are not supported. Please remove "
++ "CMakeCache.txt from the 'src' dir and configure an "
++ "out-of-source build in another directory.")
++endif()
++
++# Set the build type to Release by default if not set
++if(NOT CMAKE_BUILD_TYPE)
++ set(CMAKE_BUILD_TYPE "Release")
++endif()
++
++set(CMAKE_CXX_STANDARD 17)
++set(CMAKE_CXX_STANDARD_REQUIRED ON)
++set(CMAKE_CXX_EXTENSIONS OFF)
++set(CMAKE_BRANSON_CXXFLAGS ${CMAKE_CXX_FLAGS})
++
++option(ENABLE_CUDA "Use CUDA" FALSE)
++option(ENABLE_HIP "Use HIP" FALSE)
++option(ENABLE_CALIPER "Enable Caliper" FALSE)
++option(ENABLE_OPENMP "Enable OpenMP" FALSE)
++
++set(CMAKE_VERBOSE_MAKEFILE ON)
++
++if(ENABLE_CUDA)
++ enable_language(CXX)
+
+-if(DEFINED ENV{CUDADIR}
+- OR DEFINED ENV{CUDACXX}
+- OR DEFINED ENV{CUDA_HOME})
+- if(USE_GPU)
+- set(GPU_DBS_STRING "CUDA" CACHE STRING "If CUDA is available, this variable is 'CUDA'")
+- enable_language(CUDA)
++ find_package(CUDAToolkit REQUIRED)
++ list(APPEND branson_deps
++ CUDA::cudart)
++
++ include(CheckLanguage)
++ check_language(CUDA)
++
++ set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
++
++ if(NOT CMAKE_CUDA_COMPILER)
++ message(FATAL_ERROR "Unable to find the nvcc compiler. Please use"
++ "CMAKE_CUDA_COMPILER to provide the nvcc compiler.")
+ endif()
+-elseif(
+- "$ENV{LOADEDMODULES}" MATCHES "rocmcc"
+- OR DEFINED ENV{HIPCXX}
+- OR DEFINED CMAKE_HIP_COMPILER
+- OR DEFINED ENV{ROCM_PATH})
+- if(USE_GPU)
+- set(GPU_DBS_STRING "HIP" CACHE STRING "If HIP is available, this variable is 'HIP'")
+- #if(CMAKE_HIP_COMPILER MATCHES "CC")
+- # set(CMAKE_HIP_FLAGS "-x hip")
+- #endif()
+- enable_language(HIP)
++
++ enable_language(CUDA)
++ set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
++ set(CMAKE_CUDA_STANDARD_REQUIRED ON)
++ set(CMAKE_CUDA_ARCHITECTURES "${CUDA_ARCH}")
++ set_source_files_properties("main.cc" PROPERTIES LANGUAGE CUDA)
++ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -v")
++ set(CMAKE_CUDA_FLAGS "-g --expt-relaxed-constexpr ${CMAKE_CUDA_FLAGS}")
++ set(CMAKE_CUDA_FLAGS "-expt-extended-lambda ${CMAKE_CUDA_FLAGS}")
++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DUSE_GPU)
++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DHAS_GPU)
++
++ set(GPU_DBS_STRING "CUDA" CACHE STRING "If CUDA is available, this variable is 'CUDA'")
++
++ message("Making GPU(CUDA) BRANSON")
++
++elseif(ENABLE_HIP)
++ if(NOT DEFINED HIP_PATH)
++ if(NOT DEFINED ENV{HIP_PATH})
++ set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
++ else()
++ set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
++ endif()
+ endif()
+-endif()
+
+-message("GPU_DBS_STRING: ${GPU_DBS_STRING}")
+-project (BRANSON
+- VERSION 0.8
+- DESCRIPTION "Branson can be used to study different algorithms for parallel Monte Carlo transport. Currently it contains particle passing and mesh passing methods for domain decomposition."
+-# HOMEPAGE URL "https://github.com/lanl/branson" # needs cmake 3.12+
+- LANGUAGES CXX C ${GPU_DBS_STRING})
+-
+-get_property(_LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES)
+-
+-message("Languages: ${_LANGUAGES_}")
+-if((_LANGUAGES_ MATCHES CUDA OR _LANGUAGES_ MATCHES HIP) AND USE_GPU)
+- message("CUDA/HIP module found (CUDA/HIP environment variables set) and USE_GPU is on, making GPU BRANSON")
+- add_compile_definitions(HAS_GPU)
+-elseif(USE_GPU)
+- message(FATAL_ERROR "CUDA/HIP module NOT found (CUDA/HIP environment variables set) but USE_GPU is on, reconfigure with USE_GPU off or fix modules")
+-elseif(NOT USE_GPU)
+- message("GPU mode not requested, making CPU only BRANSON")
++ if(NOT DEFINED ROCM_PATH)
++ if(DEFINED ENV{ROCM_PATH})
++ set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed")
++ elseif(DEFINED ENV{HIP_PATH})
++ set(ROCM_PATH "$ENV{HIP_PATH}/.." CACHE PATH "Path to which ROCM has been installed")
++ else()
++ set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed")
++ endif()
++ endif()
++
++ if(NOT DEFINED HCC_PATH)
++ if(DEFINED ENV{HCC_PATH})
++ set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed")
++ else()
++ set(HCC_PATH "${ROCM_PATH}/hcc" CACHE PATH "Path to which HCC has been installed")
++ endif()
++ set(HCC_HOME "${HCC_PATH}")
++ endif()
++
++ if(NOT DEFINED HIP_CLANG_PATH)
++ if(NOT DEFINED ENV{HIP_CLANG_PATH})
++ set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin" CACHE PATH "Path to which HIP compatible clang binaries have been installed")
++ else()
++ set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH} CACHE PATH "Path to which HIP compatible clang binaries have been installed")
++ endif()
++ endif()
++
++ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
++ list(APPEND CMAKE_PREFIX_PATH
++ "${HIP_PATH}/lib/cmake"
++ "${HIP_PATH}/../lib/cmake" # hopefully catches all extra HIP dependencies
++ )
++
++ find_package(HIP QUIET)
++ if(HIP_FOUND)
++ message(STATUS "Found HIP: " ${HIP_VERSION})
++ else()
++ message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
++ endif()
++ find_package(hip REQUIRED)
++
++ # For ROCm >=3.5, wipe hip-clang specific interface options which are propagated
++ set_target_properties(hip::device PROPERTIES INTERFACE_COMPILE_OPTIONS "-fPIC")
++ set_target_properties(hip::device PROPERTIES INTERFACE_LINK_LIBRARIES "hip::host")
++
++ list(APPEND branson_deps
++ "-fgpu-rdc"
++ "--hip-link"
++ "--offload-arch=${HIP_ARCH}"
++ "--rocm-path=${ROCM_PATH}"
++ hip::host
++ hip::device)
++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DUSE_GPU)
++ list(APPEND CMAKE_BRANSON_CXXFLAGS -DHAS_GPU)
++ list(APPEND CMAKE_BRANSON_CXXFLAGS -x hip)
++ list(APPEND CMAKE_BRANSON_CXXFLAGS "--offload-arch=${HIP_ARCH}")
++ list(APPEND CMAKE_BRANSON_CXXFLAGS "-fgpu-rdc")
++ list(APPEND CMAKE_BRANSON_CXXFLAGS "-Wno-unused-result")
++
++ #set_source_files_properties("main.cc" PROPERTIES LANGUAGE HIP)
++
++ set(GPU_DBS_STRING "HIP" CACHE STRING "If HIP is available, this variable is 'HIP'")
++
++ message("Making GPU(HIP) BRANSON")
++
+ else()
+- message("CUDA/HIP module not found and GPU mode not requested, making CPU only BRANSON")
++ message("Making CPU BRANSON")
++
+ endif()
+
++message("GPU_DBS_STRING: ${GPU_DBS_STRING}")
+
+ # Build system support files are located here.
+-set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config )
++set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config ${CMAKE_MODULE_PATH})
+
+-message( "
+-This is ${CMAKE_PROJECT_NAME} v. ${PROJECT_VERSION}.
+-")
++message("This is ${CMAKE_PROJECT_NAME} v. ${PROJECT_VERSION}.")
+
+ #------------------------------------------------------------------------------#
+ # User configurable options
+
+ set (BRANSON_N_GROUPS ${N_GROUPS})
+-if (NOT BRANSON_N_GROUPS)
++if(NOT BRANSON_N_GROUPS)
+ message(STATUS
+ "HARDCODED NUMBER OF GROUPS NOT SET, DEFAULTING TO ONE GROUP (GRAY)
+ use '-DN_GROUPS=' when running cmake to select multigroup.\n")
+@@ -83,28 +178,10 @@ set(ENABLE_VERBOSE_GPU_TRANSPORT ${ENABLE_VERBOSE_GPU_TRANSPORT} CACHE STRING "F
+ #------------------------------------------------------------------------------#
+ # Record the site name
+
+-site_name( SITENAME )
+-string( REGEX REPLACE "([A-z0-9]+).*" "\\1" SITENAME ${SITENAME} )
++site_name(SITENAME)
++string(REGEX REPLACE "([A-z0-9]+).*" "\\1" SITENAME ${SITENAME})
+ include (lanl-setup)
+-set( SITENAME ${SITENAME} CACHE "STRING" "Name of the current machine" FORCE)
+-
+-#------------------------------------------------------------------------------#
+-# Setup compiler options
+-set( CXX_STANDARD_REQUIRED ON )
+-set( CMAKE_CXX_STANDARD 17 )
+-
+-# Do not enable extensions (e.g.: --std=gnu++11)
+-set( CMAKE_CXX_EXTENSIONS OFF )
+-set( CMAKE_C_EXTENSIONS OFF )
+-
+-#------------------------------------------------------------------------------#
+-# Build type and custom compiler flags
+-
+-if ( "${CMAKE_BUILD_TYPE}notset" STREQUAL "notset" AND
+- NOT DEFINED CMAKE_CONFIGURATION_TYPES )
+- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Release|Debug|RelWithDebInfo"
+- FORCE)
+-endif ()
++set(SITENAME ${SITENAME} CACHE "STRING" "Name of the current machine" FORCE)
+
+ # GCC options for address or undefined sanitizing
+ #set(GCC_SANITIZE_COMPILE_FLAGS "-fsanitize=address")
+@@ -113,65 +190,68 @@ endif ()
+ #set(GCC_SANITIZE_LINK_FLAGS "-fsanitize=undefined")
+
+ # add compiler flags
+-if( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
+- set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ffp-contract=off")
+- set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0")
+- #set( CMAKE_CXX_FLAGS_RELEASE "-march=native -O3 -funroll-loops -fno-var-tracking-assignments")
+- set( CMAKE_CXX_FLAGS_RELEASE "-g -O3 -funroll-loops -fno-var-tracking-assignments")
++if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
++ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -ffp-contract=off")
++ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0")
++ #set(CMAKE_CXX_FLAGS_RELEASE "-march=native -O3 -funroll-loops -fno-var-tracking-assignments")
++ set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -funroll-loops -fno-var-tracking-assignments")
+ # optionally add sanitize flags
+- #set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_SANITIZE_COMPILE_FLAGS}")
+- #set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_SANITIZE_LINK_FLAGS}")
+-elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" )
+- set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0")
++ #set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_SANITIZE_COMPILE_FLAGS}")
++ #set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_SANITIZE_LINK_FLAGS}")
++elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
++ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -O0")
+ # note that the "fp-model=precise" flag significantly limite vectorization and slows down
+ # branson but it makes it the hardware counter data match Jayenne more closely (Jayenne uses
+ # this flag)
+- set( CMAKE_CXX_FLAGS_RELEASE "-g -O3 -fp-speculation=fast -xhost -qno-opt-dynamic-align")
+-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
++ set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -fp-speculation=fast -xhost -qno-opt-dynamic-align")
++elseif(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -Wall -O0 -xhost -qopenmp")
+ set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -xhost -qopenmp -qopt-zmm-usage=high -fp-speculation=fast -qopt-report=3 -qopt-report-file=runAVX512.optrpt")
+-elseif( CMAKE_CXX_COMPILER_ID STREQUAL "Cray" )
+- set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -DR123_USE_GNU_UINT128=0")
+- set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -DR123_USE_GNU_UINT128=0")
++elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Cray")
++ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -DR123_USE_GNU_UINT128=0")
++ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -DR123_USE_GNU_UINT128=0")
+ endif()
+
++if(ENABLE_CUDA)
++ string(JOIN " " CMAKE_BRANSON_CUDAFLAGS ${CMAKE_BRANSON_CXXFLAGS})
++ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler ${CMAKE_BRANSON_CUDAFLAGS}")
++else()
++ add_compile_options(${CMAKE_BRANSON_CXXFLAGS})
++endif()
+
+ #------------------------------------------------------------------------------#
+ # Look for Third Party Libraries (metis, etc.)
+ add_subdirectory(pugixml)
+
+-include(find_tpls)
+-setupTPLs()
+-
+ #------------------------------------------------------------------------------#
+ # Report build configuration
+
+ # Summary of TPLs
+ include(FeatureSummary)
+ message(" ")
+-feature_summary( WHAT ALL
++feature_summary(WHAT ALL
+ INCLUDE_QUIET_PACKAGES
+ FATAL_ON_MISSING_REQUIRED_PACKAGES
+- QUIET_ON_EMPTY )
++ QUIET_ON_EMPTY)
+
+ # Summary of user-selectable build options
+-message( "\nBuild Summary:\n")
+-message( STATUS "Machine name : ${SITENAME}")
++message("\nBuild Summary:\n")
++message(STATUS "Machine name : ${SITENAME}")
+ report_lanl_hpc_features()
+-if( ${BRANSON_N_GROUPS} GREATER 1 )
+- message( STATUS "Energy groups : ${BRANSON_N_GROUPS}")
++if(${BRANSON_N_GROUPS} GREATER 1)
++ message(STATUS "Energy groups : ${BRANSON_N_GROUPS}")
+ else()
+- message( STATUS "Energy groups : Gray (1-group)")
++ message(STATUS "Energy groups : Gray (1-group)")
+ endif()
+-message( STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+-message( STATUS "Compiler : ${CMAKE_CXX_COMPILER}")
+-if( CMAKE_CONFIGURATION_TYPES )
+- message( STATUS "Compiler Flags (All) : ${CMAKE_CXX_FLAGS}")
+- message( STATUS "Compiler Flags (Debug) : ${CMAKE_CXX_FLAGS_DEBUG}")
+- message( STATUS "Compiler Flags (Release): ${CMAKE_CXX_FLAGS_RELEASE}")
++message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
++message(STATUS "Compiler : ${CMAKE_CXX_COMPILER}")
++if(CMAKE_CONFIGURATION_TYPES)
++ message(STATUS "Compiler Flags (All) : ${CMAKE_CXX_FLAGS}")
++ message(STATUS "Compiler Flags (Debug) : ${CMAKE_CXX_FLAGS_DEBUG}")
++ message(STATUS "Compiler Flags (Release): ${CMAKE_CXX_FLAGS_RELEASE}")
+ else()
+- string( TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER )
+- message( STATUS "Compiler Flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
++ string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
++ message(STATUS "Compiler Flags : ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}")
+ endif()
+ message("\n")
+
+@@ -180,68 +260,55 @@ message("\n")
+
+ # set(DMAPP_DYNAMIC -Wl,--whole-archive,-ldmapp,--no-whole-archive)
+
++find_package(MPI REQUIRED)
++list(APPEND branson_deps
++ MPI::MPI_CXX)
+
+-#------------------------------------------------------------------------------#
+-# Targets
+-file(GLOB headers *.h)
+-add_executable(BRANSON main.cc ${headers})
+-target_include_directories( BRANSON PRIVATE
+- $ ${PROJECT_SOURCE_DIR}/pugixml/src/)
+-set( branson_deps
+- MPI::MPI_CXX
+- MPI::MPI_C
+- pugixml)
++list(APPEND branson_deps pugixml)
+
+-if(OpenMP_FOUND)
+- set(branson_deps "OpenMP::OpenMP_CXX;${branson_deps}")
++if(ENABLE_CALIPER)
++ message(STATUS "Looking for caliper...")
++ find_package(caliper REQUIRED)
++ if(caliper_FOUND)
++ message(STATUS "Looking for caliper.....found")
++ endif()
++ list(APPEND branson_deps
++ caliper)
++ find_package(adiak REQUIRED)
++ list(APPEND branson_deps
++ adiak::adiak)
++else()
++ add_custom_target(caliper)
+ endif()
+
+-if(METIS_FOUND)
+- set( branson_deps "METIS::metis;${branson_deps}")
++if(ENABLE_OPENMP)
++ set(USE_OPENMP ON)
++else()
++ set(USE_OPENMP OFF)
+ endif()
+
+-if( VIZ_LIBRARIES_FOUND )
+- set( branson_deps "Silo::silo;${HDF5_LIBRARIES};${branson_deps}" )
+-endif()
++include(find_tpls)
++setupTPLs()
+
+-if(caliper_FOUND)
+- set( branson_deps "caliper;${branson_deps}")
++if(OpenMP_FOUND)
++ list(APPEND branson_deps
++ OpenMP::OpenMP_CXX
++ )
+ endif()
+
+-#------------------------------------------------------------------------------#
+-# Generate config.h
+-
+-configure_file(config.h.in ${PROJECT_BINARY_DIR}/config.h)
+-#------------------------------------------------------------------------------#
+-
+-if("${GPU_DBS_STRING}" STREQUAL "CUDA" )
+- message("Setting CUDA compiler options")
+- #set_target_properties(BRANSON PROPERTIES CUDA_ARCHITECTURES "70") # V100
+- set_target_properties(BRANSON PROPERTIES CUDA_ARCHITECTURES "80") # A100
+- set_target_properties(BRANSON PROPERTIES CUDA_STANDARD 17)
+- string(APPEND CMAKE_CUDA_FLAGS " -g --expt-relaxed-constexpr")
+- string(APPEND CMAKE_CUDA_FLAGS " --expt-extended-lambda" )
+- set_source_files_properties("main.cc" PROPERTIES LANGUAGE CUDA)
+-elseif("${GPU_DBS_STRING}" STREQUAL "HIP" )
+- message("Setting HIP compiler options")
+- if(CMAKE_HIP_COMPILER MATCHES "CC")
+- set(CMAKE_HIP_FLAGS "-x hip")
+- endif()
+- set_target_properties(BRANSON PROPERTIES HIP_ARCHITECTURES "gfx942") # MI300
+- set_target_properties(BRANSON PROPERTIES HIP_STANDARD 17)
+- string(APPEND CMAKE_HIP_FLAGS " -g -mllvm=--disable-peephole")
+- #string(APPEND CMAKE_HIP_FLAGS " --expt-extended-lambda" )
+- set_source_files_properties("main.cc" PROPERTIES LANGUAGE HIP)
+-else()
+- message("GPU Options: Not a GPU build or GPU_DBS_STRING not recognized")
++if(METIS_FOUND)
++ list(APPEND branson_deps
++ METIS::metis)
+ endif()
+
+-target_link_libraries( BRANSON PRIVATE ${branson_deps} )
+-
+-#------------------------------------------------------------------------------#
+-# Testing
++if(VIZ_LIBRARIES_FOUND)
++ list(APPEND branson_deps
++ Silo::silo
++ ${HDF5_LIBRARIES}
++ )
++endif()
+
+-option( BUILD_TESTING "Should we compile the tests?" ON )
++option(BUILD_TESTING "Should we compile the tests?" ON)
+ if(BUILD_TESTING)
+ enable_testing()
+ add_subdirectory(test)
+@@ -252,11 +319,38 @@ else()
+ "Building tests disabled, set BUILD_TESTING=TRUE or don't set BUILD_TESTING to enable test builds")
+ endif()
+
++#------------------------------------------------------------------------------#
++# Generate config.h
++
++configure_file(config.h.in ${PROJECT_BINARY_DIR}/config.h)
++#------------------------------------------------------------------------------#
++
++#------------------------------------------------------------------------------#
++# Targets
++file(GLOB headers *.h)
++add_executable(BRANSON main.cc ${headers})
++target_link_libraries(BRANSON PUBLIC ${branson_deps})
++target_include_directories(BRANSON PRIVATE
++ ${CMAKE_BINARY_DIR} ${PROJECT_SOURCE_DIR}/pugixml/src/ ${HIP_INCLUDE_DIRS})
++
++if(ENABLE_CUDA)
++ set_target_properties(BRANSON
++ PROPERTIES
++ CUDA_SEPARABLE_COMPILATION ON
++ CUDA_RESOLVE_DEVICE_SYMBOLS ON)
++endif()
++
++if(ENABLE_HIP)
++ set_target_properties(BRANSON
++ PROPERTIES
++ HIP_SEPARABLE_COMPILATION ON
++ HIP_RESOLVE_DEVICE_SYMBOLS ON)
++endif()
+
+ #------------------------------------------------------------------------------#
+ # Targets for installation
+
+-install(TARGETS BRANSON DESTINATION bin)
++install(TARGETS BRANSON DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+ #------------------------------------------------------------------------------#
+ # End src/CMakeLists.txt
diff --git a/repo/branson/branson_power9.patch b/repo/branson/branson_power9.patch
new file mode 100644
index 000000000..ea3110173
--- /dev/null
+++ b/repo/branson/branson_power9.patch
@@ -0,0 +1,11 @@
+--- a/src/random123/features/gccfeatures.h
++++ b/src/random123/features/gccfeatures.h
+@@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #endif
+
+ #ifdef __powerpc__
+-#include
++//#include
+ #endif
+
+ #ifndef R123_STATIC_INLINE
diff --git a/repo/branson/package.py b/repo/branson/package.py
index 14e31c343..ac260e976 100644
--- a/repo/branson/package.py
+++ b/repo/branson/package.py
@@ -6,8 +6,10 @@
from spack.package import *
from spack.pkg.builtin.boost import Boost
+import os
-class Branson(CMakePackage):
+
+class Branson(CMakePackage, CudaPackage, ROCmPackage):
"""Branson's purpose is to study different algorithms for parallel Monte
Carlo transport. Currently it contains particle passing and mesh passing
methods for domain decomposition."""
@@ -30,6 +32,11 @@ class Branson(CMakePackage):
)
version("0.81", sha256="493f720904791f06b49ff48c17a681532c6a4d9fa59636522cf3f9700e77efe4")
version("0.8", sha256="85ffee110f89be00c37798700508b66b0d15de1d98c54328b6d02a9eb2cf1cb8")
+
+ variant("openmp", default=False, description="Enable OpenMP support")
+ variant("caliper", default=False, description="Enable Caliper monitoring")
+ variant("n_groups", default=30, values=int, description="Number of groups")
+
#depends_on("mpi")
depends_on("mpi@2:")
@@ -39,16 +46,70 @@ class Branson(CMakePackage):
depends_on(Boost.with_default_variants, when="@:0.81")
depends_on("metis")
depends_on("parmetis", when="@:0.81")
+ depends_on("caliper", when="+caliper")
+ depends_on("adiak", when="+caliper")
root_cmakelists_dir = "src"
+ flag_handler = build_system_flags
+
+ patch("branson_cmake.patch")
+ patch("branson_power9.patch")
+
+ def setup_build_environment(self, env):
+ if "+cuda" in self.spec:
+ env.set("NVCC_APPEND_FLAGS", "-allow-unsupported-compiler")
+
+ def patch(self):
+ ppu_intrinsics_file = os.path.join(self.stage.source_path, "src", "random123", "features", "ppu_intrinsics.h")
+ with open(ppu_intrinsics_file , "w") as f:
+ pass
+
def cmake_args(self):
spec = self.spec
args = []
- #args.append("--enable-mpi")
- args.append(f"-DCMAKE_C_COMPILER={spec['mpi'].mpicc}")
- args.append(f"-DCMAKE_CXX_COMPILER={spec['mpi'].mpicxx}")
+
+ args.append(f"-DMPI_C_COMPILER={spec['mpi'].mpicc}")
+ args.append(f"-DMPI_CXX_COMPILER={spec['mpi'].mpicxx}")
args.append(f"-DCMAKE_Fortran_COMPILER={spec['mpi'].mpifc}")
+
+ args.append(f"-DMETIS_ROOT_DIR={spec['metis'].prefix}")
+
+ if '+cuda' in spec:
+ args.append("-DENABLE_CUDA=ON")
+ args.append(f"-DCMAKE_CUDA_COMPILER={spec['cuda'].prefix}/bin/nvcc")
+ cuda_arch_vals = spec.variants["cuda_arch"].value
+ if cuda_arch_vals:
+ cuda_arch_sorted = list(sorted(cuda_arch_vals, reverse=True))
+ cuda_arch = cuda_arch_sorted[0]
+ args.append(f"-DCUDA_ARCH={cuda_arch}")
+ else:
+ args.append("-DENABLE_CUDA=OFF")
+
+ if '+rocm' in spec:
+ args.append("-DENABLE_HIP=ON")
+ rocm_arch_vals = spec.variants["amdgpu_target"].value
+ args.append(f"-DROCM_PATH={spec['hip'].prefix}")
+ args.append(f"-DHIP_PATH={spec['hip'].prefix}/hip")
+ if rocm_arch_vals:
+ rocm_arch_sorted = list(sorted(rocm_arch_vals, reverse=True))
+ rocm_arch = rocm_arch_sorted[0]
+ args.append(f"-DROCM_ARCH={rocm_arch}")
+ args.append(f"-DHIP_ARCH={rocm_arch}")
+ else:
+ args.append("-DENABLE_HIP=OFF")
+
+ args.append(self.define_from_variant("ENABLE_OPENMP", "openmp"))
+
+ if '+caliper' in spec:
+ args.append(self.define_from_variant("ENABLE_CALIPER", "caliper"))
+ args.append(f"-Dcaliper_DIR={spec['caliper'].prefix}")
+
+ args.append("-DBUILD_TESTING=OFF")
+ args.append(f"-DN_GROUPS={self.spec.variants['n_groups'].value}")
+
+ args.append(f"-DMPI_CXX_LINK_FLAGS={spec['mpi'].libs.ld_flags}")
+
return args
def install(self, spec, prefix):
diff --git a/repo/caliper/for_aarch64.patch b/repo/caliper/for_aarch64.patch
new file mode 100644
index 000000000..d3fed9a09
--- /dev/null
+++ b/repo/caliper/for_aarch64.patch
@@ -0,0 +1,11 @@
+--- spack-src/src/services/callpath/Callpath.cpp.bak 2020-10-28 14:38:19.668122844 +0900
++++ spack-src/src/services/callpath/Callpath.cpp 2020-10-28 15:03:12.258061188 +0900
+@@ -63,7 +63,7 @@
+ unw_context_t unw_ctx;
+ unw_cursor_t unw_cursor;
+
+- unw_getcontext(&unw_ctx);
++ unw_getcontext(unw_ctx);
+
+ if (unw_init_local(&unw_cursor, &unw_ctx) < 0) {
+ Log(0).stream() << "callpath: unable to init libunwind cursor" << endl;
diff --git a/repo/caliper/package.py b/repo/caliper/package.py
new file mode 100644
index 000000000..1b13781bd
--- /dev/null
+++ b/repo/caliper/package.py
@@ -0,0 +1,291 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+
+from spack.package import *
+
+
+class Caliper(CachedCMakePackage, CudaPackage, ROCmPackage):
+ """Caliper is a program instrumentation and performance measurement
+ framework. It is designed as a performance analysis toolbox in a
+ library, allowing one to bake performance analysis capabilities
+ directly into applications and activate them at runtime.
+ """
+
+ homepage = "https://github.com/LLNL/Caliper"
+ git = "https://github.com/LLNL/Caliper.git"
+ url = "https://github.com/LLNL/Caliper/archive/v2.12.1.tar.gz"
+ tags = ["e4s", "radiuss"]
+
+ maintainers("daboehme", "adrienbernede")
+
+ test_requires_compiler = True
+
+ license("BSD-3-Clause")
+
+ version("master", branch="master")
+ version("2.12.1", sha256="2b5a8f98382c94dc75cc3f4517c758eaf9a3f9cea0a8dbdc7b38506060d6955c")
+ version("2.11.0", sha256="b86b733cbb73495d5f3fe06e6a9885ec77365c8aa9195e7654581180adc2217c")
+ version("2.10.0", sha256="14c4fb5edd5e67808d581523b4f8f05ace8549698c0e90d84b53171a77f58565")
+ version("2.9.1", sha256="4771d630de505eff9227e0ec498d0da33ae6f9c34df23cb201b56181b8759e9e")
+ version("2.9.0", sha256="507ea74be64a2dfd111b292c24c4f55f459257528ba51a5242313fa50978371f")
+ version(
+ "2.8.0",
+ sha256="17807b364b5ac4b05997ead41bd173e773f9a26ff573ff2fe61e0e70eab496e4",
+ deprecated=True,
+ )
+ version(
+ "2.7.0",
+ sha256="b3bf290ec2692284c6b4f54cc0c507b5700c536571d3e1a66e56626618024b2b",
+ deprecated=True,
+ )
+ version(
+ "2.6.0",
+ sha256="6efcd3e4845cc9a6169e0d934840766b12182c6d09aa3ceca4ae776e23b6360f",
+ deprecated=True,
+ )
+ version(
+ "2.5.0",
+ sha256="d553e60697d61c53de369b9ca464eb30710bda90fba9671201543b64eeac943c",
+ deprecated=True,
+ )
+ version(
+ "2.4.0", tag="v2.4.0", commit="30577b4b8beae104b2b35ed487fec52590a99b3d", deprecated=True
+ )
+ version(
+ "2.3.0", tag="v2.3.0", commit="9fd89bb0120750d1f9dfe37bd963e24e478a2a20", deprecated=True
+ )
+ version(
+ "2.2.0", tag="v2.2.0", commit="c408e9b3642c7aa80eff37b0826d819c57e7bc04", deprecated=True
+ )
+ version(
+ "2.1.1", tag="v2.1.1", commit="0593b0e01c1d8d3e50c990399cc0fee403485599", deprecated=True
+ )
+ version(
+ "2.0.1", tag="v2.0.1", commit="4d7ff46381c53a461e62edd949e2d9dea9db7b08", deprecated=True
+ )
+ version(
+ "1.9.1", tag="v1.9.1", commit="cfc1defbbee20b50dd3e3477badd09a92b1df970", deprecated=True
+ )
+ version(
+ "1.9.0", tag="v1.9.0", commit="8356e747349b285aa621c5b74e71559f0babc4a1", deprecated=True
+ )
+ version(
+ "1.8.0", tag="v1.8.0", commit="117c1ef596b617dc71407b8b67eebef094a654f8", deprecated=True
+ )
+ version(
+ "1.7.0", tag="v1.7.0", commit="898277c93d884d4e7ca1ffcf3bbea81d22364f26", deprecated=True
+ )
+
+ #depends_on("c", type="build") # generated
+ #depends_on("cxx", type="build") # generated
+ #depends_on("fortran", type="build") # generated
+
+ is_linux = sys.platform.startswith("linux")
+ variant("shared", default=True, description="Build shared libraries")
+ variant("adiak", default=True, description="Enable Adiak support")
+ variant("mpi", default=True, description="Enable MPI support")
+ # libunwind has some issues on Mac
+ variant(
+ "libunwind", default=sys.platform != "darwin", description="Enable stack unwind support"
+ )
+ variant("libdw", default=is_linux, description="Enable DWARF symbol lookup")
+ # pthread_self() signature is incompatible with PAPI_thread_init() on Mac
+ variant("papi", default=sys.platform != "darwin", description="Enable PAPI service")
+ variant("libpfm", default=False, description="Enable libpfm (perf_events) service")
+ # Gotcha is Linux-only
+ variant("gotcha", default=is_linux, description="Enable GOTCHA support")
+ variant("sampler", default=is_linux, description="Enable sampling support on Linux")
+ variant("sosflow", default=False, description="Enable SOSflow support")
+ variant("fortran", default=False, description="Enable Fortran support")
+ variant("variorum", default=False, description="Enable Variorum support")
+ variant("vtune", default=False, description="Enable Intel Vtune support")
+ variant("kokkos", default=True, when="@2.3.0:", description="Enable Kokkos profiling support")
+ variant("tests", default=False, description="Enable tests")
+ variant("tools", default=True, description="Enable tools")
+ variant("python", default=False, when="@v2.12:", description="Build Python bindings")
+
+ depends_on("adiak@0.1:0", when="@2.2:2.10 +adiak")
+ depends_on("adiak@0.4:0", when="@2.11: +adiak")
+
+ depends_on("papi@5.3:5", when="@:2.2 +papi")
+ depends_on("papi@5.3:", when="@2.3: +papi")
+
+ depends_on("libpfm4@4.8:4", when="+libpfm")
+
+ depends_on("mpi", when="+mpi")
+ depends_on("unwind@1.2:1", when="+libunwind")
+ depends_on("elfutils", when="+libdw")
+ depends_on("variorum", when="+variorum")
+ depends_on("intel-oneapi-vtune", when="+vtune")
+
+ depends_on("sosflow@spack", when="@1.0:1+sosflow")
+
+ depends_on("cmake", type="build")
+ depends_on("python", type="build")
+
+ depends_on("python@3", when="+python", type=("build", "link", "run"))
+ depends_on("py-pybind11", when="+python", type=("build", "link", "run"))
+
+ # sosflow support not yet in 2.0
+ conflicts("+sosflow", "@2:")
+ conflicts("+adiak", "@:2.1")
+ conflicts("+libdw", "@:2.4")
+ conflicts("+rocm", "@:2.7")
+ conflicts("+rocm+cuda")
+
+ patch("for_aarch64.patch", when="@:2.11 target=aarch64:")
+ patch(
+ "sampler-service-missing-libunwind-include-dir.patch",
+ when="@2.9.0:2.9.1 +libunwind +sampler",
+ )
+
+ def _get_sys_type(self, spec):
+ sys_type = spec.architecture
+ if "SYS_TYPE" in env:
+ sys_type = env["SYS_TYPE"]
+ return sys_type
+
+ def initconfig_compiler_entries(self):
+ spec = self.spec
+ entries = super().initconfig_compiler_entries()
+
+ if spec.satisfies("+rocm"):
+ entries.insert(0, cmake_cache_path("CMAKE_CXX_COMPILER", spec["hip"].hipcc))
+
+ entries.append(cmake_cache_option("WITH_FORTRAN", spec.satisfies("+fortran")))
+
+ entries.append(cmake_cache_option("BUILD_SHARED_LIBS", spec.satisfies("+shared")))
+ entries.append(cmake_cache_option("BUILD_TESTING", spec.satisfies("+tests")))
+ entries.append(cmake_cache_option("WITH_TOOLS", spec.satisfies("+tools")))
+ entries.append(cmake_cache_option("BUILD_DOCS", False))
+ entries.append(cmake_cache_path("PYTHON_EXECUTABLE", spec["python"].command.path))
+
+ return entries
+
+ def initconfig_hardware_entries(self):
+ spec = self.spec
+ entries = super().initconfig_hardware_entries()
+
+ if spec.satisfies("+cuda"):
+ entries.append(cmake_cache_option("WITH_CUPTI", True))
+ entries.append(cmake_cache_option("WITH_NVTX", True))
+ entries.append(cmake_cache_path("CUDA_TOOLKIT_ROOT_DIR", spec["cuda"].prefix))
+ entries.append(cmake_cache_path("CUPTI_PREFIX", spec["cuda"].prefix))
+ else:
+ entries.append(cmake_cache_option("WITH_CUPTI", False))
+ entries.append(cmake_cache_option("WITH_NVTX", False))
+
+ if spec.satisfies("+rocm"):
+ entries.append(cmake_cache_option("WITH_ROCTRACER", True))
+ entries.append(cmake_cache_option("WITH_ROCTX", True))
+ else:
+ entries.append(cmake_cache_option("WITH_ROCTRACER", False))
+ entries.append(cmake_cache_option("WITH_ROCTX", False))
+
+ return entries
+
+ def initconfig_mpi_entries(self):
+ spec = self.spec
+ entries = super().initconfig_mpi_entries()
+
+ entries.append(cmake_cache_option("WITH_MPI", spec.satisfies("+mpi")))
+
+ return entries
+
+ def initconfig_package_entries(self):
+ spec = self.spec
+ entries = []
+
+ # TPL locations
+ entries.append("#------------------{0}".format("-" * 60))
+ entries.append("# TPLs")
+ entries.append("#------------------{0}\n".format("-" * 60))
+
+ if spec.satisfies("+adiak"):
+ entries.append(cmake_cache_path("adiak_DIR", spec["adiak"].prefix))
+ if spec.satisfies("+papi"):
+ entries.append(cmake_cache_path("PAPI_PREFIX", spec["papi"].prefix))
+ if spec.satisfies("+libdw"):
+ entries.append(cmake_cache_path("LIBDW_PREFIX", spec["elfutils"].prefix))
+ if spec.satisfies("+libpfm"):
+ entries.append(cmake_cache_path("LIBPFM_INSTALL", spec["libpfm4"].prefix))
+ if spec.satisfies("+sosflow"):
+ entries.append(cmake_cache_path("SOS_PREFIX", spec["sosflow"].prefix))
+ if spec.satisfies("+variorum"):
+ entries.append(cmake_cache_path("VARIORUM_PREFIX", spec["variorum"].prefix))
+ if spec.satisfies("+vtune"):
+ itt_dir = join_path(spec["intel-oneapi-vtune"].prefix, "vtune", "latest")
+ entries.append(cmake_cache_path("ITT_PREFIX", itt_dir))
+ if spec.satisfies("+libunwind"):
+ entries.append(cmake_cache_path("LIBUNWIND_PREFIX", spec["unwind"].prefix))
+
+ # Build options
+ entries.append("#------------------{0}".format("-" * 60))
+ entries.append("# Build Options")
+ entries.append("#------------------{0}\n".format("-" * 60))
+
+ entries.append(cmake_cache_option("WITH_ADIAK", spec.satisfies("+adiak")))
+ entries.append(cmake_cache_option("WITH_GOTCHA", spec.satisfies("+gotcha")))
+ entries.append(cmake_cache_option("WITH_SAMPLER", spec.satisfies("+sampler")))
+ entries.append(cmake_cache_option("WITH_PAPI", spec.satisfies("+papi")))
+ entries.append(cmake_cache_option("WITH_LIBDW", spec.satisfies("+libdw")))
+ entries.append(cmake_cache_option("WITH_LIBPFM", spec.satisfies("+libpfm")))
+ entries.append(cmake_cache_option("WITH_SOSFLOW", spec.satisfies("+sosflow")))
+ entries.append(cmake_cache_option("WITH_KOKKOS", spec.satisfies("+kokkos")))
+ entries.append(cmake_cache_option("WITH_VARIORUM", spec.satisfies("+variorum")))
+ entries.append(cmake_cache_option("WITH_VTUNE", spec.satisfies("+vtune")))
+ entries.append(cmake_cache_option("WITH_PYTHON_BINDINGS", spec.satisfies("+python")))
+
+ # -DWITH_CALLPATH was renamed -DWITH_LIBUNWIND in 2.5
+ callpath_flag = "LIBUNWIND" if spec.satisfies("@2.5:") else "CALLPATH"
+ entries.append(cmake_cache_option("WITH_%s" % callpath_flag, spec.satisfies("+libunwind")))
+
+ return entries
+
+ def cmake_args(self):
+ return []
+
+ def setup_run_environment(self, env):
+ if self.spec.satisfies("+python"):
+ env.prepend_path("PYTHONPATH", self.spec.prefix.join(python_platlib))
+ env.prepend_path("PYTHONPATH", self.spec.prefix.join(python_purelib))
+
+ @run_after("install")
+ def cache_test_sources(self):
+ """Copy the example source files after the package is installed to an
+ install test subdirectory for use during `spack test run`."""
+ cache_extra_test_sources(self, [join_path("examples", "apps")])
+
+ def test_cxx_example(self):
+ """build and run cxx-example"""
+
+ exe = "cxx-example"
+ source_file = "{0}.cpp".format(exe)
+
+ source_path = find_required_file(
+ self.test_suite.current_test_cache_dir, source_file, expected=1, recursive=True
+ )
+
+ lib_dir = self.prefix.lib if os.path.exists(self.prefix.lib) else self.prefix.lib64
+
+ cxx = which(os.environ["CXX"])
+ test_dir = os.path.dirname(source_path)
+ with working_dir(test_dir):
+ cxx(
+ "-L{0}".format(lib_dir),
+ "-I{0}".format(self.prefix.include),
+ source_path,
+ "-o",
+ exe,
+ "-std=c++11",
+ "-lcaliper",
+ "-lstdc++",
+ )
+
+ cxx_example = which(exe)
+ cxx_example()
diff --git a/repo/caliper/sampler-service-missing-libunwind-include-dir.patch b/repo/caliper/sampler-service-missing-libunwind-include-dir.patch
new file mode 100644
index 000000000..4acd660f6
--- /dev/null
+++ b/repo/caliper/sampler-service-missing-libunwind-include-dir.patch
@@ -0,0 +1,14 @@
+diff -ruN spack-src/src/services/sampler/CMakeLists.txt spack-src-patched/src/services/sampler/CMakeLists.txt
+--- spack-src/src/services/sampler/CMakeLists.txt 2022-11-30 13:52:42.000000000 -0500
++++ spack-src-patched/src/services/sampler/CMakeLists.txt 2023-05-04 20:43:47.240310306 -0400
+@@ -17,6 +17,10 @@
+
+ include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
+
++if (CALIPER_HAVE_LIBUNWIND)
++ include_directories(${LIBUNWIND_INCLUDE_DIRS})
++endif()
++
+ add_library(caliper-sampler OBJECT ${CALIPER_SAMPLER_SOURCES})
+
+ add_service_objlib("caliper-sampler")
diff --git a/repo/cray-mpich/package.py b/repo/cray-mpich/package.py
index b0f2e1561..36d80d64b 100644
--- a/repo/cray-mpich/package.py
+++ b/repo/cray-mpich/package.py
@@ -17,8 +17,8 @@ def libs(self):
if self.spec.satisfies("+gtl"):
gtl_lib_prefix = self.spec.extra_attributes["gtl_lib_path"]
- libraries = ["libmpi_gtl_hsa"]
- libs += find_libraries(libraries, root=gtl_lib_prefix, recursive=True)
+ gtl_libs = self.spec.extra_attributes["gtl_libs"]
+ libs += find_libraries(gtl_libs, root=gtl_lib_prefix, recursive=True)
return libs
@@ -29,8 +29,6 @@ def setup_run_environment(self, env):
if self.spec.satisfies("+gtl"):
env.set("MPICH_GPU_SUPPORT_ENABLED", "1")
env.prepend_path("LD_LIBRARY_PATH", self.spec.extra_attributes["gtl_lib_path"])
- env.set("GTL_HSA_VSMSG_CUTOFF_SIZE", str(self.spec.extra_attributes["gtl_cutoff_size"]))
- env.set("FI_CXI_ATS", str(self.spec.extra_attributes["fi_cxi_ats"]))
else:
env.set("MPICH_GPU_SUPPORT_ENABLED", "0")
gtl_path = self.spec.extra_attributes.get("gtl_lib_path", "")
diff --git a/systems/lanl-venado/compilers/cce/00-cce-18-compilers.yaml b/systems/lanl-venado/compilers/cce/00-cce-18-compilers.yaml
new file mode 100644
index 000000000..7ce4aeb35
--- /dev/null
+++ b/systems/lanl-venado/compilers/cce/00-cce-18-compilers.yaml
@@ -0,0 +1,25 @@
+compilers:
+- compiler:
+ spec: cce@18.0.0
+ paths:
+ cc: /opt/cray/pe/cce/18.0.0/bin/craycc
+ cxx: /opt/cray/pe/cce/18.0.0/bin/crayCC
+ f77: /opt/cray/pe/cce/18.0.0/bin/crayftn
+ fc: /opt/cray/pe/cce/18.0.0/bin/crayftn
+ flags:
+ cflags: -g -O2 --gcc-toolchain=/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0
+ cxxflags: -g -O2 --gcc-toolchain=/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0
+ fflags: -g -O2 -hnopattern
+ ldflags: -ldl
+ operating_system: sles15
+ target: aarch64
+ modules: []
+ environment:
+ prepend_path:
+ LD_LIBRARY_PATH: "/opt/cray/pe/cce/18.0.0/cce/aarch64/lib:/opt/cray/libfabric/1.20.1/lib64:/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib:/usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib64:/opt/cray/pe/gcc-libs"
+ extra_rpaths:
+ - /opt/cray/pe/gcc-libs
+ - /opt/cray/pe/cce/18.0.0/cce/aarch64/lib
+ - /opt/cray/libfabric/1.20.1/lib64
+ - /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib
+ - /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/lib64
diff --git a/systems/lanl-venado/compilers/gcc/00-gcc-12-compilers.yaml b/systems/lanl-venado/compilers/gcc/00-gcc-12-compilers.yaml
new file mode 100644
index 000000000..f28815e8f
--- /dev/null
+++ b/systems/lanl-venado/compilers/gcc/00-gcc-12-compilers.yaml
@@ -0,0 +1,14 @@
+compilers:
+- compiler:
+ spec: gcc@12.3.0
+ paths:
+ cc: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/gcc
+ cxx: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/g++
+ f77: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/gfortran
+ fc: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/compilers/gcc/12.3.0/bin/gfortran
+ flags: {}
+ operating_system: sles15
+ target: aarch64
+ modules: []
+ environment: {}
+ extra_rpaths: []
diff --git a/systems/lanl-venado/externals/base/00-packages.yaml b/systems/lanl-venado/externals/base/00-packages.yaml
new file mode 100644
index 000000000..63682360b
--- /dev/null
+++ b/systems/lanl-venado/externals/base/00-packages.yaml
@@ -0,0 +1,43 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+packages:
+ tar:
+ externals:
+ - spec: tar@1.34
+ prefix: /usr
+ buildable: false
+ cmake:
+ externals:
+ - spec: cmake@3.29.6
+ prefix: /usr/projects/hpcsoft/tce/24-07/cos3-aarch64-cc90/packages/cmake/cmake-3.29.6
+ buildable: false
+ gmake:
+ externals:
+ - spec: gmake@4.2.1
+ prefix: /usr
+ buildable: false
+ automake:
+ externals:
+ - spec: automake@1.15.1
+ prefix: /usr
+ buildable: false
+ autoconf:
+ externals:
+ - spec: autoconf@2.69
+ prefix: /usr
+ buildable: false
+ fftw:
+ externals:
+ - spec: fftw@3.3.10.8
+ prefix: /opt/cray/pe/fftw/3.3.10.8/arm_grace
+ buildable: false
+ python:
+ externals:
+ - spec: python@3.10.9
+ prefix: /usr/projects/hpcsoft/common/aarch64/anaconda/2023.03-python-3.10
+ buildable: false
+ mpi:
+ buildable: false
diff --git a/systems/lanl-venado/externals/libsci/00-gcc-packages.yaml b/systems/lanl-venado/externals/libsci/00-gcc-packages.yaml
new file mode 100644
index 000000000..f58d5a59a
--- /dev/null
+++ b/systems/lanl-venado/externals/libsci/00-gcc-packages.yaml
@@ -0,0 +1,5 @@
+packages:
+ cray-libsci:
+ externals:
+ - spec: cray-libsci@24.07.0%gcc
+ prefix: /opt/cray/pe/libsci/24.07.0/gnu/12.3/aarch64
diff --git a/systems/lanl-venado/externals/libsci/01-cce-packages.yaml b/systems/lanl-venado/externals/libsci/01-cce-packages.yaml
new file mode 100644
index 000000000..8c1608b6b
--- /dev/null
+++ b/systems/lanl-venado/externals/libsci/01-cce-packages.yaml
@@ -0,0 +1,5 @@
+packages:
+ cray-libsci:
+ externals:
+ - spec: cray-libsci@24.07.0%cce
+ prefix: /opt/cray/pe/libsci/24.07.0/cray/17.0/aarch64
diff --git a/systems/lanl-venado/system.py b/systems/lanl-venado/system.py
new file mode 100644
index 000000000..b95ab0f55
--- /dev/null
+++ b/systems/lanl-venado/system.py
@@ -0,0 +1,215 @@
+# Copyright 2023 Lawrence Livermore National Security, LLC and other
+# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import pathlib
+
+from benchpark.directives import variant
+from benchpark.system import System
+
+id_to_resources = {
+ "grace-hopper": {
+ "sys_cores_per_node": 144,
+ "sys_gpus_per_node": 4,
+ },
+ "grace-grace": {
+ "sys_cores_per_node": 144,
+ },
+}
+
+
+class LanlVenado(System):
+ variant(
+ "cluster",
+ default="grace-hopper",
+ values=("grace-hopper", "grace-grace"),
+ description="Which cluster to run on",
+ )
+
+ variant(
+ "cuda",
+ default="12-5",
+ values=("11.8", "12.5"),
+ description="CUDA version",
+ )
+
+ variant(
+ "compiler",
+ default="cce",
+ values=("gcc", "cce"),
+ description="Which compiler to use",
+ )
+
+ variant(
+ "gtl",
+ default=False,
+ values=(True, False),
+ description="Use GTL-enabled MPI",
+ )
+
+ variant(
+ "lapack",
+ default="cusolver",
+ values=("cusolver", "cray-libsci"),
+ description="Which lapack to use",
+ )
+
+ variant(
+ "blas",
+ default="cublas",
+ values=("cublas", "cray-libsci"),
+ description="Which blas to use",
+ )
+
+ def initialize(self):
+ super().initialize()
+
+ self.scheduler = "slurm"
+ attrs = id_to_resources.get(self.spec.variants["cluster"][0])
+ for k, v in attrs.items():
+ setattr(self, k, v)
+
+ def generate_description(self, output_dir):
+ super().generate_description(output_dir)
+
+ sw_description = pathlib.Path(output_dir) / "software.yaml"
+
+ with open(sw_description, "w") as f:
+ f.write(self.sw_description())
+
+ def system_specific_variables(self):
+ return {
+ "cuda_arch": "90",
+ "default_cuda_version": self.spec.variants["cuda"][0],
+ "extra_batch_opts": '"-A llnl_ai_g -pgpu"',
+ }
+
+ def external_pkg_configs(self):
+ externals = LanlVenado.resource_location / "externals"
+
+ selections = [externals / "base" / "00-packages.yaml"]
+
+ cuda_cfg_path = self.next_adhoc_cfg()
+ with open(cuda_cfg_path, "w") as f:
+ f.write(self.cuda_config(self.spec.variants["cuda"][0]))
+ selections.append(cuda_cfg_path)
+
+ mpi_cfg_path = self.next_adhoc_cfg()
+ with open(mpi_cfg_path, "w") as f:
+ f.write(self.mpi_config())
+ selections.append(mpi_cfg_path)
+
+ if self.spec.satisfies("compiler=cce"):
+ selections.append(externals / "libsci" / "01-cce-packages.yaml")
+ elif self.spec.satisfies("compiler=gcc"):
+ selections.append(externals / "libsci" / "00-gcc-packages.yaml")
+
+ return selections
+
+ def compiler_configs(self):
+ compilers = LanlVenado.resource_location / "compilers"
+
+ selections = []
+ # TODO: Construct/extract/customize compiler information from the working set
+ if self.spec.satisfies("compiler=cce"):
+ selections.append(compilers / "cce" / "00-cce-18-compilers.yaml")
+ selections.append(compilers / "gcc" / "00-gcc-12-compilers.yaml")
+
+ return selections
+
+ def mpi_config(self):
+ mpi_version = "8.1.30"
+ gtl = (
+ "+gtl"
+ if self.spec.satisfies("compiler=cce") and self.spec.satisfies("+gtl")
+ else "~gtl"
+ )
+
+ # TODO: Construct/extract this information from the working set
+ if self.spec.satisfies("compiler=cce"):
+ compiler = "cce@18.0.0"
+ mpi_compiler_suffix = "crayclang/17.0"
+ elif self.spec.satisfies("compiler=gcc"):
+ compiler = "gcc@12.3.0"
+ mpi_compiler_suffix = "gnu/12.3"
+
+ return f"""\
+packages:
+ cray-mpich:
+ externals:
+ - spec: cray-mpich@{mpi_version}%{compiler} {gtl} +wrappers
+ prefix: /opt/cray/pe/mpich/{mpi_version}/ofi/{mpi_compiler_suffix}
+ extra_attributes:
+ gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib
+ gtl_libs: ["libmpi_gtl_cuda"]
+ ldflags: "-L/opt/cray/pe/mpich/{mpi_version}/ofi/{mpi_compiler_suffix}/lib -lmpi -L/opt/cray/pe/mpich/{mpi_version}/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/{mpi_version}/gtl/lib -lmpi_gtl_cuda"
+"""
+
+ def cuda_config(self, cuda_version):
+ template = """\
+packages:
+ blas:
+ require:
+ - {blas}
+ lapack:
+ require:
+ - {lapack}
+ curand:
+ externals:
+ - spec: curand@{x}
+ prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x}
+ buildable: false
+ cusparse:
+ externals:
+ - spec: cusparse@{x}
+ prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x}
+ buildable: false
+ cuda:
+ externals:
+ - spec: cuda@{x}
+ prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x}
+ buildable: false
+ cub:
+ externals:
+ - spec: cub@{x}
+ prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/cuda/{x}
+ buildable: false
+ cublas:
+ externals:
+ - spec: cublas@{x}
+ prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/math_libs/{x}
+ buildable: false
+ cusolver:
+ externals:
+ - spec: cusolver@{x}
+ prefix: /opt/nvidia/hpc_sdk/Linux_aarch64/24.7/math_libs/{x}
+ buildable: false
+"""
+ return template.format(
+ x=cuda_version,
+ blas=self.spec.variants["blas"][0],
+ lapack=self.spec.variants["lapack"][0],
+ )
+
+ def sw_description(self):
+ """This is somewhat vestigial: for the Tioga config that is committed
+ to the repo, multiple instances of mpi/compilers are stored and
+ and these variables were used to choose consistent dependencies.
+ The configs generated by this class should only ever have one
+ instance of MPI etc., so there is no need for that. The experiments
+ will fail if these variables are not defined though, so for now
+ they are still generated (but with more-generic values).
+ """
+ return f"""\
+software:
+ packages:
+ default-compiler:
+ pkg_spec: {self.spec.variants["compiler"][0]}
+ default-mpi:
+ pkg_spec: cray-mpich
+ default-lapack:
+ pkg_spec: {self.spec.variants["lapack"][0]}
+ default-blas:
+ pkg_spec: {self.spec.variants["blas"][0]}
+"""
diff --git a/systems/llnl-elcapitan/system.py b/systems/llnl-elcapitan/system.py
index dbb1142c0..b8b2c9a7a 100644
--- a/systems/llnl-elcapitan/system.py
+++ b/systems/llnl-elcapitan/system.py
@@ -105,30 +105,8 @@ def external_pkg_configs(self):
elif self.spec.satisfies("compiler=gcc"):
selections.append(externals / "libsci" / "00-gcc-packages.yaml")
- cmp_preference_path = self.next_adhoc_cfg()
- with open(cmp_preference_path, "w") as f:
- f.write(self.compiler_weighting_cfg())
- selections.append(cmp_preference_path)
-
return selections
- def compiler_weighting_cfg(self):
- compiler = self.spec.variants["compiler"][0]
-
- if compiler == "cce":
- return """\
-packages:
- all:
- require:
- - one_of: ["%cce", "@:"]
-"""
- elif compiler == "gcc":
- return """\
-packages: {}
-"""
- else:
- raise ValueError(f"Unexpected value for compiler: {compiler}")
-
def compiler_configs(self):
compilers = LlnlElcapitan.resource_location / "compilers"
@@ -163,6 +141,7 @@ def mpi_config(self, cce_version):
gtl_cutoff_size: 4096
fi_cxi_ats: 0
gtl_lib_path: /opt/cray/pe/mpich/{mpi_version}/gtl/lib
+ gtl_libs: ["libmpi_gtl_hsa"]
ldflags: "-L/opt/cray/pe/mpich/{mpi_version}/ofi/crayclang/{short_cce_version}/lib -lmpi -L/opt/cray/pe/mpich/{mpi_version}/gtl/lib -Wl,-rpath=/opt/cray/pe/mpich/{mpi_version}/gtl/lib -lmpi_gtl_hsa"
"""