From bc864771dfafda93cf4036c7f079de1828611d11 Mon Sep 17 00:00:00 2001 From: Alex Wells Date: Mon, 20 Nov 2023 13:54:22 -0800 Subject: [PATCH] build: refactor icc/icx specific pragma macros (#1756) Added OSL_INTEL_CLASSIC_PRAGMA(...) which only emits for Intel(r) Classic C++ Compiler, previously OSL_INTEL_PRAGMA did that. As many Intel compiler specicif pragmas are now supported by icx and icc, redefined OSL_INTEL_PRAGMA to emit for the classic and llvm based Intel compilers (icc and icx). Updated some uses of OSL_INTEL_PRAGMA to OSL_INTEL_CLASSIC_PRAGMA as appropriate. Fixed long icx 2023.1.0, 2023.2.0, 2023.2.1 build times for certain files by removing the "-mllvm -inline-threshold=100000" flag for all but 1 file avoiding the overhead of optimization passes being applied to inlined code that wasn't actually part of the SIMD loop that we truly needed inlined. icx now supports "#pragma forceinline recursive" which allows us to target just the SIMD loops to be inlined (as we did for icc). Signed-off-by: Alex M. Wells --- src/include/OSL/Imathx/Imathx.h | 2 +- src/include/OSL/platform.h | 18 ++++++++++++---- src/liboslexec/CMakeLists.txt | 33 +++++++++++++++++++++++------ src/liboslexec/batched_analysis.cpp | 2 +- 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/include/OSL/Imathx/Imathx.h b/src/include/OSL/Imathx/Imathx.h index 8deb61d34..4753134e5 100644 --- a/src/include/OSL/Imathx/Imathx.h +++ b/src/include/OSL/Imathx/Imathx.h @@ -265,7 +265,7 @@ nonAffineInverse(const Matrix44 &source); Matrix44 OSL_HOSTDEVICE nonAffineInverse(const Matrix44 &source) { - OSL_INTEL_PRAGMA(float_control(strict,on,push)) + OSL_INTEL_CLASSIC_PRAGMA(float_control(strict,on,push)) OSL_CLANG_PRAGMA(clang fp contract(off)) using ScalarT = typename Matrix44::BaseType; diff --git a/src/include/OSL/platform.h b/src/include/OSL/platform.h index 6ee6658c8..cdaf9bad9 100644 --- a/src/include/OSL/platform.h +++ b/src/include/OSL/platform.h @@ -206,9 +206,14 @@ # define OSL_CLANG_PRAGMA(UnQuotedPragma) # endif # if defined(__INTEL_COMPILER) -# define OSL_INTEL_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma) +# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma) # else -# define OSL_INTEL_PRAGMA(UnQuotedPragma) +# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma) +# endif +# if defined(__INTEL_LLVM_COMPILER) +# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma) +# else +# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma) # endif # define OSL_MSVS_PRAGMA(UnQuotedPragma) #elif defined(_MSC_VER) @@ -218,7 +223,8 @@ # define OSL_PRAGMA_VISIBILITY_POP /* N/A on MSVS */ # define OSL_GCC_PRAGMA(UnQuotedPragma) # define OSL_CLANG_PRAGMA(UnQuotedPragma) -# define OSL_INTEL_PRAGMA(UnQuotedPragma) +# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma) +# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma) # define OSL_MSVS_PRAGMA(UnQuotedPragma) OSL_PRAGMA(UnQuotedPragma) #else # define OSL_PRAGMA_WARNING_PUSH @@ -227,10 +233,14 @@ # define OSL_PRAGMA_VISIBILITY_POP # define OSL_GCC_PRAGMA(UnQuotedPragma) # define OSL_CLANG_PRAGMA(UnQuotedPragma) -# define OSL_INTEL_PRAGMA(UnQuotedPragma) +# define OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma) +# define OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma) # define OSL_MSVS_PRAGMA(UnQuotedPragma) #endif +// A pragma that applies to both icc and icx +#define OSL_INTEL_PRAGMA(UnQuotedPragma) OSL_INTEL_CLASSIC_PRAGMA(UnQuotedPragma) OSL_INTEL_LLVM_PRAGMA(UnQuotedPragma) + #ifdef __clang__ #define OSL_CLANG_ATTRIBUTE(value) __attribute__((value)) #else diff --git a/src/liboslexec/CMakeLists.txt b/src/liboslexec/CMakeLists.txt index 29f6ca729..d3dc3b4b0 100644 --- a/src/liboslexec/CMakeLists.txt +++ b/src/liboslexec/CMakeLists.txt @@ -103,7 +103,11 @@ set ( liboslexec_require_INF_NaN wide/wide_shadingsys wide/wide_optest_float ) - + +set ( liboslexec_exceeds_INTELCLANG_inlining_limits + wide/wide_opspline + ) + set (local_lib oslexec) set (lib_src shadingsys.cpp closure.cpp @@ -390,11 +394,15 @@ foreach(batched_target ${BATCHED_TARGET_LIST}) list (APPEND TARGET_CXX_OPTS "-ffp-contract=off") endif () - # large SIMD function loops will exceed llvm's -inline-threshold default of 225. - # remark: loop not vectorized: call instruction cannot be vectorized [-Rpass-analysis] - # choose to increase that limit via compiler flags vs. - # workaround with __attribute__((flatten)) - list (APPEND TARGET_CXX_OPTS "-mllvm" "-inline-threshold=100000") + if (NOT CMAKE_COMPILER_IS_INTELCLANG) + # large SIMD function loops will exceed llvm's -inline-threshold default of 225. + # remark: loop not vectorized: call instruction cannot be vectorized [-Rpass-analysis] + # choose to increase that limit via compiler flags vs. + # workaround with __attribute__((flatten)) + list (APPEND TARGET_CXX_OPTS "-mllvm" "-inline-threshold=100000") + # NOTE: INTELCLANG we use icx/icc specific "#pragma force inline recursive" + # to only inline the SIMD loops contents vs. everything. + endif () # For loops with small loop bodies, clang was unrolling the loop before # #pragma omp simd @@ -489,7 +497,18 @@ foreach(batched_target ${BATCHED_TARGET_LIST}) if (${target_src} IN_LIST liboslexec_require_INF_NaN) REQUIRE_INF_NAN ( ${TARGET_SRC} ) endif() - + + if (CMAKE_COMPILER_IS_INTELCLANG) + if (${target_src} IN_LIST liboslexec_exceeds_INTELCLANG_inlining_limits) + # NOTE: Some files still require increasing + # inlining threshold to successfully vectorize. + # Retest with future compiler versions + set_property(SOURCE ${TARGET_SRC} APPEND PROPERTY COMPILE_OPTIONS + "-mllvm" "-inline-threshold=100000") + endif() + endif() + + endforeach(target_src) add_library ( ${batched_target_lib} MODULE ${TARGET_LIB_SOURCES} ) diff --git a/src/liboslexec/batched_analysis.cpp b/src/liboslexec/batched_analysis.cpp index 9660edb8b..6fa39f272 100644 --- a/src/liboslexec/batched_analysis.cpp +++ b/src/liboslexec/batched_analysis.cpp @@ -448,7 +448,7 @@ could_op_return_be_logically_boolean(ustring opname) // comparison operators even if the are not "currently" // being referenced. May need to disable some warnings // error #177: function "..." was declared but never referenced -OSL_INTEL_PRAGMA(warning(disable : 177)) +OSL_INTEL_CLASSIC_PRAGMA(warning(disable : 177)) // The Position returned by top_pos changes and symbols are pushed and popped.