-
Notifications
You must be signed in to change notification settings - Fork 205
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
69 changed files
with
775 additions
and
1,592 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ jobs: | |
strategy: | ||
fail-fast: false | ||
matrix: | ||
compiler: | ||
compiler: | ||
- { name: g++-7, cc: gcc-7, cxx: g++-7, distro: ubuntu-20.04 } | ||
- { name: g++-8, cc: gcc-8, cxx: g++-8, distro: ubuntu-20.04 } | ||
- { name: g++-9, cc: gcc-9, cxx: g++-9, distro: ubuntu-latest } | ||
|
@@ -33,7 +33,7 @@ jobs: | |
# - { name: clang-15, cc: clang-15, cxx: clang++-15, distro: ubuntu-latest } | ||
|
||
runs-on: ${{ matrix.compiler.distro }} | ||
|
||
steps: | ||
- uses: actions/[email protected] | ||
with: | ||
|
@@ -44,7 +44,7 @@ jobs: | |
env: | ||
CC: ${{ matrix.compiler.cc }} | ||
CXX: ${{ matrix.compiler.cxx }} | ||
run: mkdir build && cd build && cmake -DCMAKE_CXX_FLAGS="-Werror" .. | ||
run: mkdir build && cd build && cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON .. | ||
- name: Build | ||
run: | | ||
echo "Build with $(nproc) thread(s)" | ||
|
@@ -107,14 +107,14 @@ jobs: | |
- arch: riscv64 | ||
distro: ubuntu22.04 | ||
compiler: { name: g++-12, cc: gcc-12, cxx: g++-12 } | ||
|
||
steps: | ||
- uses: actions/[email protected] | ||
with: | ||
submodules: 'recursive' | ||
- uses: uraimo/[email protected] | ||
name: Build in non-x86 container | ||
continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) || contains(fromJson('["clang-14"]'), matrix.compiler.name) }} | ||
continue-on-error: ${{ contains(fromJson('["ppc64le", "s390x"]'), matrix.arch) }} | ||
id: build | ||
with: | ||
arch: ${{ matrix.arch }} | ||
|
@@ -150,7 +150,7 @@ jobs: | |
run: | | ||
cd /volk | ||
cd build | ||
cmake -DCMAKE_CXX_FLAGS="-Werror" .. | ||
cmake -DCMAKE_CXX_FLAGS="-Werror" -DBUILD_EXECUTABLE=ON .. | ||
echo "Build with $(nproc) thread(s)" | ||
make -j$(nproc) | ||
./cpu_features/list_cpu_features | ||
|
@@ -173,7 +173,7 @@ jobs: | |
- name: dependencies | ||
run: sudo apt install python3-mako liborc-dev | ||
- name: configure | ||
run: mkdir build && cd build && cmake -DENABLE_STATIC_LIBS=True .. | ||
run: mkdir build && cd build && cmake -DENABLE_STATIC_LIBS=True -DBUILD_EXECUTABLE=ON .. | ||
- name: build | ||
run: cmake --build build -j$(nproc) | ||
- name: Print info | ||
|
@@ -206,7 +206,7 @@ jobs: | |
|
||
# build-windows-msys2: | ||
# name: Build on windows-latest using MinGW and MSYS2 | ||
|
||
# runs-on: windows-latest | ||
# steps: | ||
# - uses: msys2/setup-msys2@v2 | ||
|
@@ -231,7 +231,7 @@ jobs: | |
# - name: Build | ||
# shell: msys2 {0} | ||
# run: cd build && make -j$(nproc) | ||
# - name: Test | ||
# - name: Test | ||
# shell: msys2 {0} | ||
# run: | | ||
# cd build | ||
|
@@ -248,7 +248,7 @@ jobs: | |
- name: dependencies | ||
run: pip3 install mako | ||
- name: configure | ||
run: mkdir build && cd build && cmake .. | ||
run: mkdir build && cd build && cmake -DBUILD_EXECUTABLE=ON .. | ||
- name: build | ||
run: cmake --build build --config Debug -j3 | ||
- name: Print info | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# | ||
# Copyright 2011-2020 Free Software Foundation, Inc. | ||
# Copyright 2023 Magnus Lundmark <[email protected]> | ||
# | ||
# This file is part of VOLK | ||
# | ||
|
@@ -144,6 +145,7 @@ if (VOLK_CPU_FEATURES) | |
FORCE) | ||
set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}") | ||
set(BUILD_SHARED_LIBS OFF) | ||
set(ENABLE_INSTALL OFF) | ||
add_subdirectory(cpu_features) | ||
set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}") | ||
endif() | ||
|
@@ -248,6 +250,7 @@ install(FILES | |
${CMAKE_SOURCE_DIR}/include/volk/saturation_arithmetic.h | ||
${CMAKE_SOURCE_DIR}/include/volk/volk_avx_intrinsics.h | ||
${CMAKE_SOURCE_DIR}/include/volk/volk_avx2_intrinsics.h | ||
${CMAKE_SOURCE_DIR}/include/volk/volk_avx2_fma_intrinsics.h | ||
${CMAKE_SOURCE_DIR}/include/volk/volk_sse_intrinsics.h | ||
${CMAKE_SOURCE_DIR}/include/volk/volk_sse3_intrinsics.h | ||
${CMAKE_SOURCE_DIR}/include/volk/volk_neon_intrinsics.h | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule cpu_features
updated
30 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* -*- c++ -*- */ | ||
/* | ||
* Copyright 2023 Magnus Lundmark <[email protected]> | ||
* | ||
* This file is part of VOLK | ||
* | ||
* SPDX-License-Identifier: LGPL-3.0-or-later | ||
*/ | ||
|
||
/* | ||
* This file is intended to hold AVX2 FMA intrinsics of intrinsics. | ||
* They should be used in VOLK kernels to avoid copy-paste. | ||
*/ | ||
|
||
#ifndef INCLUDE_VOLK_VOLK_AVX2_FMA_INTRINSICS_H_ | ||
#define INCLUDE_VOLK_VOLK_AVX2_FMA_INTRINSICS_H_ | ||
#include <immintrin.h> | ||
|
||
/* | ||
* Approximate arctan(x) via polynomial expansion | ||
* on the interval [-1, 1] | ||
* | ||
* Maximum relative error ~6.5e-7 | ||
* Polynomial evaluated via Horner's method | ||
*/ | ||
static inline __m256 _m256_arctan_poly_avx2_fma(const __m256 x) | ||
{ | ||
const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f); | ||
const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f); | ||
const __m256 a5 = _mm256_set1_ps(+0x1.972be6p-3f); | ||
const __m256 a7 = _mm256_set1_ps(-0x1.1436ap-3f); | ||
const __m256 a9 = _mm256_set1_ps(+0x1.5785aap-4f); | ||
const __m256 a11 = _mm256_set1_ps(-0x1.2f3004p-5f); | ||
const __m256 a13 = _mm256_set1_ps(+0x1.01a37cp-7f); | ||
|
||
const __m256 x_times_x = _mm256_mul_ps(x, x); | ||
__m256 arctan; | ||
arctan = a13; | ||
arctan = _mm256_fmadd_ps(x_times_x, arctan, a11); | ||
arctan = _mm256_fmadd_ps(x_times_x, arctan, a9); | ||
arctan = _mm256_fmadd_ps(x_times_x, arctan, a7); | ||
arctan = _mm256_fmadd_ps(x_times_x, arctan, a5); | ||
arctan = _mm256_fmadd_ps(x_times_x, arctan, a3); | ||
arctan = _mm256_fmadd_ps(x_times_x, arctan, a1); | ||
arctan = _mm256_mul_ps(x, arctan); | ||
|
||
return arctan; | ||
} | ||
|
||
#endif /* INCLUDE_VOLK_VOLK_AVX2_FMA_INTRINSICS_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
/* -*- c++ -*- */ | ||
/* | ||
* Copyright 2015 Free Software Foundation, Inc. | ||
* Copyright 2023 Magnus Lundmark <[email protected]> | ||
* | ||
* This file is part of VOLK | ||
* | ||
|
@@ -16,6 +17,43 @@ | |
#define INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_ | ||
#include <immintrin.h> | ||
|
||
/* | ||
* Approximate arctan(x) via polynomial expansion | ||
* on the interval [-1, 1] | ||
* | ||
* Maximum relative error ~6.5e-7 | ||
* Polynomial evaluated via Horner's method | ||
*/ | ||
static inline __m256 _m256_arctan_poly_avx(const __m256 x) | ||
{ | ||
const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f); | ||
const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f); | ||
const __m256 a5 = _mm256_set1_ps(+0x1.972be6p-3f); | ||
const __m256 a7 = _mm256_set1_ps(-0x1.1436ap-3f); | ||
const __m256 a9 = _mm256_set1_ps(+0x1.5785aap-4f); | ||
const __m256 a11 = _mm256_set1_ps(-0x1.2f3004p-5f); | ||
const __m256 a13 = _mm256_set1_ps(+0x1.01a37cp-7f); | ||
|
||
const __m256 x_times_x = _mm256_mul_ps(x, x); | ||
__m256 arctan; | ||
arctan = a13; | ||
arctan = _mm256_mul_ps(x_times_x, arctan); | ||
arctan = _mm256_add_ps(arctan, a11); | ||
arctan = _mm256_mul_ps(x_times_x, arctan); | ||
arctan = _mm256_add_ps(arctan, a9); | ||
arctan = _mm256_mul_ps(x_times_x, arctan); | ||
arctan = _mm256_add_ps(arctan, a7); | ||
arctan = _mm256_mul_ps(x_times_x, arctan); | ||
arctan = _mm256_add_ps(arctan, a5); | ||
arctan = _mm256_mul_ps(x_times_x, arctan); | ||
arctan = _mm256_add_ps(arctan, a3); | ||
arctan = _mm256_mul_ps(x_times_x, arctan); | ||
arctan = _mm256_add_ps(arctan, a1); | ||
arctan = _mm256_mul_ps(x, arctan); | ||
|
||
return arctan; | ||
} | ||
|
||
static inline __m256 _mm256_complexmul_ps(__m256 x, __m256 y) | ||
{ | ||
__m256 yl, yh, tmp1, tmp2; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
/* -*- c++ -*- */ | ||
/* | ||
* Copyright 2010, 2011, 2015-2017, 2019, 2020 Free Software Foundation, Inc. | ||
* Copyright 2023 Magnus Lundmark <[email protected]> | ||
* | ||
* This file is part of VOLK | ||
* | ||
|
@@ -166,6 +167,50 @@ static inline float log2f_non_ieee(float f) | |
// Constant used to do log10 calculations as faster log2 | ||
//////////////////////////////////////////////////////////////////////// | ||
// precalculated 10.0 / log2f_non_ieee(10.0) to allow for constexpr | ||
#define volk_log2to10factor 3.01029995663981209120 | ||
#define volk_log2to10factor (0x1.815182p1) // 3.01029995663981209120 | ||
|
||
//////////////////////////////////////////////////////////////////////// | ||
// arctan(x) | ||
//////////////////////////////////////////////////////////////////////// | ||
static inline float volk_arctan_poly(const float x) | ||
{ | ||
/* | ||
* arctan(x) polynomial expansion on the interval [-1, 1] | ||
* Maximum relative error < 6.6e-7 | ||
*/ | ||
const float a1 = +0x1.ffffeap-1f; | ||
const float a3 = -0x1.55437p-2f; | ||
const float a5 = +0x1.972be6p-3f; | ||
const float a7 = -0x1.1436ap-3f; | ||
const float a9 = +0x1.5785aap-4f; | ||
const float a11 = -0x1.2f3004p-5f; | ||
const float a13 = +0x1.01a37cp-7f; | ||
|
||
const float x_times_x = x * x; | ||
float arctan = a13; | ||
arctan = fmaf(x_times_x, arctan, a11); | ||
arctan = fmaf(x_times_x, arctan, a9); | ||
arctan = fmaf(x_times_x, arctan, a7); | ||
arctan = fmaf(x_times_x, arctan, a5); | ||
arctan = fmaf(x_times_x, arctan, a3); | ||
arctan = fmaf(x_times_x, arctan, a1); | ||
arctan *= x; | ||
|
||
return arctan; | ||
} | ||
|
||
static inline float volk_arctan(const float x) | ||
{ | ||
/* | ||
* arctan(x) + arctan(1 / x) == sign(x) * pi / 2 | ||
*/ | ||
const float pi_over_2 = 0x1.921fb6p0f; | ||
|
||
if (fabs(x) < 1.f) { | ||
return volk_arctan_poly(x); | ||
} else { | ||
return copysignf(pi_over_2, x) - volk_arctan_poly(1.f / x); | ||
} | ||
} | ||
|
||
#endif /*INCLUDED_LIBVOLK_COMMON_H*/ |
Oops, something went wrong.