From 25fd8a9794d51e6440d46a2970c88a9212611c14 Mon Sep 17 00:00:00 2001 From: Fabian Boemer Date: Fri, 23 Jul 2021 09:41:19 -0700 Subject: [PATCH] Fboemer/windows avx512 (#23) * Fix Windows compilation --- README.md | 36 ++++++++++++++++++++++++++++++++---- benchmark/CMakeLists.txt | 4 +++- hexl/CMakeLists.txt | 2 +- hexl/ntt/fwd-ntt-avx512.cpp | 2 +- hexl/ntt/inv-ntt-avx512.cpp | 12 ++++++------ test/CMakeLists.txt | 6 +++++- 6 files changed, 48 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 6d4177fb..fb2db212 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ Intel:registered: HEXL is an open-source library which provides efficient implem - [Dependencies](#dependencies) - [Compile-time options](#compile-time-options) - [Compiling Intel HEXL](#compiling-intel-hexl) + - [Linux and Mac](#linux-and-mac) + - [Windows](#windows) - [Testing Intel HEXL](#testing-intel-hexl) - [Benchmarking Intel HEXL](#benchmarking-intel-hexl) - [Using Intel HEXL](#using-intel-hexl) @@ -18,7 +20,7 @@ Intel:registered: HEXL is an open-source library which provides efficient implem - [Documentation](#documentation) - [Contributing](#contributing) - [Repository layout](#repository-layout) - - [Citing Intel HEXL](#citing-intel-hexl) +- [Citing Intel HEXL](#citing-intel-hexl) - [Version 1.2](#version-12) - [Version 1.1](#version-11) - [Version 1.0](#version-10) @@ -74,9 +76,11 @@ For convenience, they are listed below: | HEXL_TREAT_WARNING_AS_ERROR | ON / OFF (default OFF) | Set to ON to treat all warnings as error | ### Compiling Intel HEXL -The instructions to build Intel HEXL are common between Linux, MacOS, and Windows. +To compile Intel HEXL from source code, first clone the repository and change directories to the where the source has been cloned. +#### Linux and Mac +The instructions to build Intel HEXL are common to Linux and MacOS. -To compile Intel HEXL from source code, first clone the repository into your current directory. Then, to configure the build, call +Then, to configure the build, call ```bash cmake -S . -B build ``` @@ -99,6 +103,30 @@ To use a non-standard installation directory, configure the build with ```bash cmake -S . -B build -DCMAKE_INSTALL_PREFIX=/path/to/install ``` +before proceeding with the build and installation directions above. + +#### Windows +To compile Intel HEXL on Windows using Visual Studio in Release mode, configure the build via +```bash +cmake -S . -B build -G "Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=Release +``` +adding the desired compile-time options with a `-D` flag (see [Compile-time options](#compile-time-options)). + +To specify the desired build configuration, pass either `--config Debug` or `--config Release` to the build step and install steps. For instance, to build Intel HEXL in Release mode, call +```bash +cmake --build build --config Release +``` +This will build the Intel HEXL library in the `build/hexl/lib/` or `build/hexl/Release/lib` directory. + +To install Intel HEXL to the installation directory, run +```bash +cmake --build build --target install --config Release +``` +To use a non-standard installation directory, configure the build with +```bash +cmake -S . -B build -G "Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/path/to/install +``` +before proceeding with the build and installation directions above. ## Testing Intel HEXL To run a set of unit tests via Googletest, configure and build Intel HEXL with `-DHEXL_TESTING=ON` (see [Compile-time options](#compile-time-options)). @@ -119,7 +147,7 @@ The benchmark executable itself is located at `build/benchmark/bench_hexl` The `example` folder has an example of using Intel HEXL in a third-party project. ## Debugging -For optimal performance, Intel HEXL does not perform input validation. In many cases the time required for the validation would be longer than the execution of the function itself. To debug Intel HEXL, configure and build Intel HEXL with `-DCMAKE_BUILD_TYPE=Debug` (see [Compile-time options](#compile-time-options)). This will generate a debug version of the library, e.g. `libhexl.a`, that can be used to debug the execution. +For optimal performance, Intel HEXL does not perform input validation. In many cases the time required for the validation would be longer than the execution of the function itself. To debug Intel HEXL, configure and build Intel HEXL with `-DCMAKE_BUILD_TYPE=Debug` (see [Compile-time options](#compile-time-options)). This will generate a debug version of the library, e.g. `libhexl_debug.a`, that can be used to debug the execution. In Debug mode, Intel HEXL will also link against [Address Sanitizer](https://github.com/google/sanitizers/wiki/AddressSanitizer). **Note**, enabling `CMAKE_BUILD_TYPE=Debug` will result in a significant runtime overhead. ## Threading diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 57c958d7..3fa02e9e 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -27,5 +27,7 @@ endif() if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") target_compile_options(bench_hexl PRIVATE -Wall -Wextra -march=native -O3) elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - target_compile_options(bench_hexl PRIVATE /Wall /W4 /Zc:preprocessor) + target_compile_options(bench_hexl PRIVATE /Wall /W4 /Zc:preprocessor + /wd4127 # warning C4127: conditional expression is constant; C++11 doesn't support constexpr + ) endif() diff --git a/hexl/CMakeLists.txt b/hexl/CMakeLists.txt index 61ee7df7..124cb52b 100644 --- a/hexl/CMakeLists.txt +++ b/hexl/CMakeLists.txt @@ -78,7 +78,7 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # so we disable it here target_compile_options(hexl PRIVATE /Wall /W4 /Zc:preprocessor /Ob0 /wd4127 # warning C4127: conditional expression is constant; C++11 doesn't support constexpr - ) + ) target_compile_definitions(hexl PRIVATE -D_CRT_SECURE_NO_WARNINGS) endif() diff --git a/hexl/ntt/fwd-ntt-avx512.cpp b/hexl/ntt/fwd-ntt-avx512.cpp index 0087c470..6035514c 100644 --- a/hexl/ntt/fwd-ntt-avx512.cpp +++ b/hexl/ntt/fwd-ntt-avx512.cpp @@ -372,7 +372,7 @@ void ForwardTransformToBitReverseAVX512( } else { // Perform depth-first NTT via recursive call size_t t = (n >> 1); - size_t W_idx = (1 << recursion_depth) + recursion_half; + size_t W_idx = (1ULL << recursion_depth) + recursion_half; const uint64_t* W_op = &root_of_unity_powers[W_idx]; const uint64_t* W_precon = &precon_root_of_unity_powers[W_idx]; diff --git a/hexl/ntt/inv-ntt-avx512.cpp b/hexl/ntt/inv-ntt-avx512.cpp index cf2faea1..6f62ac76 100644 --- a/hexl/ntt/inv-ntt-avx512.cpp +++ b/hexl/ntt/inv-ntt-avx512.cpp @@ -23,8 +23,8 @@ template void InverseTransformFromBitReverseAVX512( uint64_t* operand, uint64_t degree, uint64_t modulus, const uint64_t* inv_root_of_unity_powers, const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, - uint64_t output_mod_factor, uint64_t recursion_depth = 0, - uint64_t recursion_half = 0); + uint64_t output_mod_factor, uint64_t recursion_depth, + uint64_t recursion_half); #endif #ifdef HEXL_HAS_AVX512DQ @@ -32,15 +32,15 @@ template void InverseTransformFromBitReverseAVX512<32>( uint64_t* operand, uint64_t degree, uint64_t modulus, const uint64_t* inv_root_of_unity_powers, const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, - uint64_t output_mod_factor, uint64_t recursion_depth = 0, - uint64_t recursion_half = 0); + uint64_t output_mod_factor, uint64_t recursion_depth, + uint64_t recursion_half); template void InverseTransformFromBitReverseAVX512( uint64_t* operand, uint64_t degree, uint64_t modulus, const uint64_t* inv_root_of_unity_powers, const uint64_t* precon_inv_root_of_unity_powers, uint64_t input_mod_factor, - uint64_t output_mod_factor, uint64_t recursion_depth = 0, - uint64_t recursion_half = 0); + uint64_t output_mod_factor, uint64_t recursion_depth, + uint64_t recursion_half); #endif #ifdef HEXL_HAS_AVX512DQ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0c0dd65d..00ee8e90 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -38,7 +38,11 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") target_link_libraries(unit-test PRIVATE gflags) endif() # Disable inline, due to incorect optimization in ExtractValues, causing failing tests in Windows AVX512 in Release mode with HEXL_DEBUG=OFF - target_compile_options(unit-test PRIVATE /Wall /W4 /Zc:preprocessor /Ob0) + target_compile_options(unit-test PRIVATE /Wall /W4 /Zc:preprocessor /Ob0 + /wd4127 # warning C4127: conditional expression is constant; C++11 doesn't support constexpr + /wd4389 # warning C4389: signed/unsigned mismatch from gtest + ) + target_compile_definitions(unit-test PRIVATE -D_CRT_SECURE_NO_WARNINGS) endif() target_include_directories(unit-test PRIVATE