From fa419fd4d03f4984f4169fd13646baa259962803 Mon Sep 17 00:00:00 2001 From: Paul Date: Fri, 1 Nov 2024 11:16:48 -0400 Subject: [PATCH] Move to new SIMD methodology (#61) Move to a new SIMD methodology which allows arm64ec to use neon instructions while keeping our code mostly intact. --- .github/workflows/build-pr.yml | 39 +- CMakeLists.txt | 37 +- cmake/CPM.cmake | 1281 ++++++++++++++++- .../filter_plot_tool/filter_plot_tool.cpp | 14 + .../FilterPlotComponent.cpp | 14 + .../FilterPlotComponent.h | 14 + .../filters_example_plugin/FiltersPlugin.cpp | 61 +- .../filters_example_plugin/FiltersPlugin.h | 14 + .../FiltersPluginEditor.cpp | 14 + .../FiltersPluginEditor.h | 14 + include-extras/sst/filters/FilterPlotter.h | 96 +- include/sst/filters/CutoffWarp.h | 77 +- include/sst/filters/CytomicSVF.h | 147 +- include/sst/filters/DiodeLadder.h | 102 +- .../sst/filters/FilterCoefficientMaker_Impl.h | 8 +- include/sst/filters/HalfRateFilter.h | 311 ++-- include/sst/filters/K35Filter.h | 50 +- include/sst/filters/OBXDFilter.h | 231 +-- include/sst/filters/QuadFilterUnit.h | 8 +- include/sst/filters/QuadFilterUnit_Impl.h | 790 +++++----- include/sst/filters/ResonanceWarp.h | 18 +- include/sst/filters/TriPoleFilter.h | 111 +- include/sst/filters/VintageLadders.h | 75 +- include/sst/utilities/globals.h | 23 +- include/sst/utilities/shared.h | 18 +- scripts/fix_file_comments.pl | 4 +- tests/BasicFiltersTest.cpp | 14 + tests/BiquadTest.cpp | 14 + tests/CMakeLists.txt | 1 + tests/CutoffWarpTest.cpp | 14 + tests/CytomicSVFTests.cpp | 17 +- tests/DiodeLadderTest.cpp | 14 + tests/HalfRateTest.cpp | 15 +- tests/K35FilterTest.cpp | 14 + tests/OBXDFilterTest.cpp | 14 + tests/ResonanceWarpTest.cpp | 14 + tests/TestUtils.h | 21 +- tests/TriPoleFilterTest.cpp | 14 + tests/VintageLaddersTest.cpp | 14 + tests/tests.cpp | 14 + 40 files changed, 2691 insertions(+), 1074 deletions(-) diff --git a/.github/workflows/build-pr.yml b/.github/workflows/build-pr.yml index 17b87fb..a76d0ea 100644 --- a/.github/workflows/build-pr.yml +++ b/.github/workflows/build-pr.yml @@ -7,21 +7,47 @@ on: jobs: build_tests: - name: Test ${{ matrix.os }} + name: Test ${{ matrix.name }} runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - os: [ ubuntu-latest, macos-latest, windows-latest ] include: - os: ubuntu-latest name: linux + runTest: true testExe: build/tests/sst-filters-tests + - os: macos-latest - name: mac + name: mac-x86 + runTest: true testExe: build/tests/sst-filters-tests + cmakeArgs: -DCMAKE_OSX_ARCHITECTURES=x86_64 + + - os: macos-latest + name: mac-arm + cmakeArgs: -DCMAKE_OSX_ARCHITECTURES=arm64 + + - os: macos-latest + name: mac-arm-nonative + cmakeArgs: -DCMAKE_OSX_ARCHITECTURES=arm64 -DSST_BASIC_BLOCKS_SIMD_OMIT_NATIVE_ALIASES=TRUE + + - os: windows-latest + name: win-x86 + runTest: true + testExe: build/tests/Release/sst-filters-tests.exe + + - os: windows-latest + name: win-arm64 + cmakeArgs: -G"Visual Studio 17 2022" -A arm64 -DCMAKE_SYSTEM_VERSION=10 + + - os: windows-latest + name: win-arm64ec + cmakeArgs: -G"Visual Studio 17 2022" -A arm64ec -DCMAKE_SYSTEM_VERSION=10 + - os: windows-latest - name: win - testExe: build/tests/Release/sst-filters-tests.exe + name: win-arm64-non-native + cmakeArgs: -G"Visual Studio 17 2022" -A arm64 -DCMAKE_SYSTEM_VERSION=10 -DSST_BASIC_BLOCKS_SIMD_OMIT_NATIVE_ALIASES=TRUE steps: @@ -32,10 +58,11 @@ jobs: - name: Build Smoke test run: | - cmake -S . -B ./build -DCMAKE_BUILD_TYPE=Release -DSST_FILTERS_BUILD_TESTS=TRUE -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" + cmake -S . -B ./build -DCMAKE_BUILD_TYPE=Release -DSST_FILTERS_BUILD_TESTS=TRUE ${{ matrix.cmakeArgs }} cmake --build ./build --config Release - name: Run Smoke Test + if: ${{ matrix.runTest }}m run: | ls ${{ matrix.testExe }} ${{ matrix.testExe }} diff --git a/CMakeLists.txt b/CMakeLists.txt index fedffd3..24fb7a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,12 +6,12 @@ set(CMAKE_CXX_STANDARD 17) add_library(${PROJECT_NAME} INTERFACE) target_include_directories(${PROJECT_NAME} INTERFACE include) -if(MSVC) +if (MSVC) target_compile_definitions(${PROJECT_NAME} - INTERFACE + INTERFACE _USE_MATH_DEFINES=1 # So that we can have M_PI on MSVC ) -endif() +endif () add_library(${PROJECT_NAME}-extras INTERFACE) target_include_directories(${PROJECT_NAME}-extras INTERFACE include-extras) @@ -30,42 +30,43 @@ option(SST_FILTERS_BUILD_EXAMPLES "Add targets for building and running sst-filt if (SST_FILTERS_BUILD_TESTS OR SST_FILTERS_BUILD_EXAMPLES) message(STATUS "Importing SIMDE with CPM") - if (NOT TARGET sst-basic-blocks) - CPMAddPackage(NAME sst-basic-blocks - GITHUB_REPOSITORY surge-synthesizer/sst-basic-blocks - GIT_TAG main - ) - endif() - if (NOT TARGET simde) CPMAddPackage(NAME simde GITHUB_REPOSITORY simd-everywhere/simde VERSION 0.7.2 - ) + ) add_library(simde INTERFACE) target_include_directories(simde INTERFACE ${simde_SOURCE_DIR}) - endif() + endif () + + if (NOT TARGET sst-basic-blocks) + CPMAddPackage(NAME sst-basic-blocks + GITHUB_REPOSITORY surge-synthesizer/sst-basic-blocks + GIT_TAG main + ) + endif () endif () if (SST_GET_BASIC_BLOCKS) CPMAddPackage(NAME sst-basic-blocks GITHUB_REPOSITORY surge-synthesizer/sst-basic-blocks GIT_TAG main - ) -endif() + ) +endif () if (NOT TARGET sst-basic-blocks) message(FATAL_ERROR "sst-basic-blocks is not available in this context. Set SST_GET_BASIC_BLOCKS=1 or add it") -else() +else () target_link_libraries(${PROJECT_NAME} INTERFACE sst-basic-blocks) -endif() +endif () if (SST_FILTERS_BUILD_TESTS) + message(STATUS "Adding test targets") add_subdirectory(tests) endif () -if(SST_FILTERS_BUILD_EXAMPLES) +if (SST_FILTERS_BUILD_EXAMPLES) add_subdirectory(examples) -endif() +endif () diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake index eaa397b..8269a8b 100644 --- a/cmake/CPM.cmake +++ b/cmake/CPM.cmake @@ -1,32 +1,1269 @@ -set(CPM_DOWNLOAD_VERSION 0.36.0) +# CPM.cmake - CMake's missing package manager +# =========================================== +# See https://github.com/cpm-cmake/CPM.cmake for usage and update instructions. +# +# MIT License +# ----------- +#[[ + Copyright (c) 2019-2023 Lars Melchior and contributors -if(CPM_SOURCE_CACHE) - set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") -elseif(DEFINED ENV{CPM_SOURCE_CACHE}) - set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +]] + +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +# Initialize logging prefix +if(NOT CPM_INDENT) + set(CPM_INDENT + "CPM:" + CACHE INTERNAL "" + ) +endif() + +if(NOT COMMAND cpm_message) + function(cpm_message) + message(${ARGV}) + endfunction() +endif() + +set(CURRENT_CPM_VERSION 0.40.2) + +get_filename_component(CPM_CURRENT_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +if(CPM_DIRECTORY) + if(NOT CPM_DIRECTORY STREQUAL CPM_CURRENT_DIRECTORY) + if(CPM_VERSION VERSION_LESS CURRENT_CPM_VERSION) + message( + AUTHOR_WARNING + "${CPM_INDENT} \ +A dependency is using a more recent CPM version (${CURRENT_CPM_VERSION}) than the current project (${CPM_VERSION}). \ +It is recommended to upgrade CPM to the most recent version. \ +See https://github.com/cpm-cmake/CPM.cmake for more information." + ) + endif() + if(${CMAKE_VERSION} VERSION_LESS "3.17.0") + include(FetchContent) + endif() + return() + endif() + + get_property( + CPM_INITIALIZED GLOBAL "" + PROPERTY CPM_INITIALIZED + SET + ) + if(CPM_INITIALIZED) + return() + endif() +endif() + +if(CURRENT_CPM_VERSION MATCHES "development-version") + message( + WARNING "${CPM_INDENT} Your project is using an unstable development version of CPM.cmake. \ +Please update to a recent release if possible. \ +See https://github.com/cpm-cmake/CPM.cmake for details." + ) +endif() + +set_property(GLOBAL PROPERTY CPM_INITIALIZED true) + +macro(cpm_set_policies) + # the policy allows us to change options without caching + cmake_policy(SET CMP0077 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) + + # the policy allows us to change set(CACHE) without caching + if(POLICY CMP0126) + cmake_policy(SET CMP0126 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0126 NEW) + endif() + + # The policy uses the download time for timestamp, instead of the timestamp in the archive. This + # allows for proper rebuilds when a projects url changes + if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0135 NEW) + endif() + + # treat relative git repository paths as being relative to the parent project's remote + if(POLICY CMP0150) + cmake_policy(SET CMP0150 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0150 NEW) + endif() +endmacro() +cpm_set_policies() + +option(CPM_USE_LOCAL_PACKAGES "Always try to use `find_package` to get dependencies" + $ENV{CPM_USE_LOCAL_PACKAGES} +) +option(CPM_LOCAL_PACKAGES_ONLY "Only use `find_package` to get dependencies" + $ENV{CPM_LOCAL_PACKAGES_ONLY} +) +option(CPM_DOWNLOAD_ALL "Always download dependencies from source" $ENV{CPM_DOWNLOAD_ALL}) +option(CPM_DONT_UPDATE_MODULE_PATH "Don't update the module path to allow using find_package" + $ENV{CPM_DONT_UPDATE_MODULE_PATH} +) +option(CPM_DONT_CREATE_PACKAGE_LOCK "Don't create a package lock file in the binary path" + $ENV{CPM_DONT_CREATE_PACKAGE_LOCK} +) +option(CPM_INCLUDE_ALL_IN_PACKAGE_LOCK + "Add all packages added through CPM.cmake to the package lock" + $ENV{CPM_INCLUDE_ALL_IN_PACKAGE_LOCK} +) +option(CPM_USE_NAMED_CACHE_DIRECTORIES + "Use additional directory of package name in cache on the most nested level." + $ENV{CPM_USE_NAMED_CACHE_DIRECTORIES} +) + +set(CPM_VERSION + ${CURRENT_CPM_VERSION} + CACHE INTERNAL "" +) +set(CPM_DIRECTORY + ${CPM_CURRENT_DIRECTORY} + CACHE INTERNAL "" +) +set(CPM_FILE + ${CMAKE_CURRENT_LIST_FILE} + CACHE INTERNAL "" +) +set(CPM_PACKAGES + "" + CACHE INTERNAL "" +) +set(CPM_DRY_RUN + OFF + CACHE INTERNAL "Don't download or configure dependencies (for testing)" +) + +if(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_SOURCE_CACHE_DEFAULT $ENV{CPM_SOURCE_CACHE}) else() - set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") + set(CPM_SOURCE_CACHE_DEFAULT OFF) endif() -# Expand relative path. This is important if the provided path contains a tilde (~) -get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) +set(CPM_SOURCE_CACHE + ${CPM_SOURCE_CACHE_DEFAULT} + CACHE PATH "Directory to download CPM dependencies" +) + +if(NOT CPM_DONT_UPDATE_MODULE_PATH) + set(CPM_MODULE_PATH + "${CMAKE_BINARY_DIR}/CPM_modules" + CACHE INTERNAL "" + ) + # remove old modules + file(REMOVE_RECURSE ${CPM_MODULE_PATH}) + file(MAKE_DIRECTORY ${CPM_MODULE_PATH}) + # locally added CPM modules should override global packages + set(CMAKE_MODULE_PATH "${CPM_MODULE_PATH};${CMAKE_MODULE_PATH}") +endif() -function(download_cpm) - message(STATUS "Downloading CPM.cmake to ${CPM_DOWNLOAD_LOCATION}") - file(DOWNLOAD - https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake - ${CPM_DOWNLOAD_LOCATION} +if(NOT CPM_DONT_CREATE_PACKAGE_LOCK) + set(CPM_PACKAGE_LOCK_FILE + "${CMAKE_BINARY_DIR}/cpm-package-lock.cmake" + CACHE INTERNAL "" ) + file(WRITE ${CPM_PACKAGE_LOCK_FILE} + "# CPM Package Lock\n# This file should be committed to version control\n\n" + ) +endif() + +include(FetchContent) + +# Try to infer package name from git repository uri (path or url) +function(cpm_package_name_from_git_uri URI RESULT) + if("${URI}" MATCHES "([^/:]+)/?.git/?$") + set(${RESULT} + ${CMAKE_MATCH_1} + PARENT_SCOPE + ) + else() + unset(${RESULT} PARENT_SCOPE) + endif() endfunction() -if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) - download_cpm() -else() - # resume download if it previously failed - file(READ ${CPM_DOWNLOAD_LOCATION} check) - if("${check}" STREQUAL "") - download_cpm() +# Try to infer package name and version from a url +function(cpm_package_name_and_ver_from_url url outName outVer) + if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)") + # We matched an archive + set(filename "${CMAKE_MATCH_1}") + + if(filename MATCHES "([a-zA-Z0-9_\\.-]+)[_-]v?(([0-9]+\\.)*[0-9]+[a-zA-Z0-9]*)") + # We matched - (ie foo-1.2.3) + set(${outName} + "${CMAKE_MATCH_1}" + PARENT_SCOPE + ) + set(${outVer} + "${CMAKE_MATCH_2}" + PARENT_SCOPE + ) + elseif(filename MATCHES "(([0-9]+\\.)+[0-9]+[a-zA-Z0-9]*)") + # We couldn't find a name, but we found a version + # + # In many cases (which we don't handle here) the url would look something like + # `irrelevant/ACTUAL_PACKAGE_NAME/irrelevant/1.2.3.zip`. In such a case we can't possibly + # distinguish the package name from the irrelevant bits. Moreover if we try to match the + # package name from the filename, we'd get bogus at best. + unset(${outName} PARENT_SCOPE) + set(${outVer} + "${CMAKE_MATCH_1}" + PARENT_SCOPE + ) + else() + # Boldly assume that the file name is the package name. + # + # Yes, something like `irrelevant/ACTUAL_NAME/irrelevant/download.zip` will ruin our day, but + # such cases should be quite rare. No popular service does this... we think. + set(${outName} + "${filename}" + PARENT_SCOPE + ) + unset(${outVer} PARENT_SCOPE) + endif() + else() + # No ideas yet what to do with non-archives + unset(${outName} PARENT_SCOPE) + unset(${outVer} PARENT_SCOPE) + endif() +endfunction() + +function(cpm_find_package NAME VERSION) + string(REPLACE " " ";" EXTRA_ARGS "${ARGN}") + find_package(${NAME} ${VERSION} ${EXTRA_ARGS} QUIET) + if(${CPM_ARGS_NAME}_FOUND) + if(DEFINED ${CPM_ARGS_NAME}_VERSION) + set(VERSION ${${CPM_ARGS_NAME}_VERSION}) + endif() + cpm_message(STATUS "${CPM_INDENT} Using local package ${CPM_ARGS_NAME}@${VERSION}") + CPMRegisterPackage(${CPM_ARGS_NAME} "${VERSION}") + set(CPM_PACKAGE_FOUND + YES + PARENT_SCOPE + ) + else() + set(CPM_PACKAGE_FOUND + NO + PARENT_SCOPE + ) + endif() +endfunction() + +# Create a custom FindXXX.cmake module for a CPM package This prevents `find_package(NAME)` from +# finding the system library +function(cpm_create_module_file Name) + if(NOT CPM_DONT_UPDATE_MODULE_PATH) + # erase any previous modules + file(WRITE ${CPM_MODULE_PATH}/Find${Name}.cmake + "include(\"${CPM_FILE}\")\n${ARGN}\nset(${Name}_FOUND TRUE)" + ) + endif() +endfunction() + +# Find a package locally or fallback to CPMAddPackage +function(CPMFindPackage) + set(oneValueArgs NAME VERSION GIT_TAG FIND_PACKAGE_ARGUMENTS) + + cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "" ${ARGN}) + + if(NOT DEFINED CPM_ARGS_VERSION) + if(DEFINED CPM_ARGS_GIT_TAG) + cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION) + endif() + endif() + + set(downloadPackage ${CPM_DOWNLOAD_ALL}) + if(DEFINED CPM_DOWNLOAD_${CPM_ARGS_NAME}) + set(downloadPackage ${CPM_DOWNLOAD_${CPM_ARGS_NAME}}) + elseif(DEFINED ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}}) + set(downloadPackage $ENV{CPM_DOWNLOAD_${CPM_ARGS_NAME}}) + endif() + if(downloadPackage) + CPMAddPackage(${ARGN}) + cpm_export_variables(${CPM_ARGS_NAME}) + return() + endif() + + cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS}) + + if(NOT CPM_PACKAGE_FOUND) + CPMAddPackage(${ARGN}) + cpm_export_variables(${CPM_ARGS_NAME}) + endif() + +endfunction() + +# checks if a package has been added before +function(cpm_check_if_package_already_added CPM_ARGS_NAME CPM_ARGS_VERSION) + if("${CPM_ARGS_NAME}" IN_LIST CPM_PACKAGES) + CPMGetPackageVersion(${CPM_ARGS_NAME} CPM_PACKAGE_VERSION) + if("${CPM_PACKAGE_VERSION}" VERSION_LESS "${CPM_ARGS_VERSION}") + message( + WARNING + "${CPM_INDENT} Requires a newer version of ${CPM_ARGS_NAME} (${CPM_ARGS_VERSION}) than currently included (${CPM_PACKAGE_VERSION})." + ) + endif() + cpm_get_fetch_properties(${CPM_ARGS_NAME}) + set(${CPM_ARGS_NAME}_ADDED NO) + set(CPM_PACKAGE_ALREADY_ADDED + YES + PARENT_SCOPE + ) + cpm_export_variables(${CPM_ARGS_NAME}) + else() + set(CPM_PACKAGE_ALREADY_ADDED + NO + PARENT_SCOPE + ) + endif() +endfunction() + +# Parse the argument of CPMAddPackage in case a single one was provided and convert it to a list of +# arguments which can then be parsed idiomatically. For example gh:foo/bar@1.2.3 will be converted +# to: GITHUB_REPOSITORY;foo/bar;VERSION;1.2.3 +function(cpm_parse_add_package_single_arg arg outArgs) + # Look for a scheme + if("${arg}" MATCHES "^([a-zA-Z]+):(.+)$") + string(TOLOWER "${CMAKE_MATCH_1}" scheme) + set(uri "${CMAKE_MATCH_2}") + + # Check for CPM-specific schemes + if(scheme STREQUAL "gh") + set(out "GITHUB_REPOSITORY;${uri}") + set(packageType "git") + elseif(scheme STREQUAL "gl") + set(out "GITLAB_REPOSITORY;${uri}") + set(packageType "git") + elseif(scheme STREQUAL "bb") + set(out "BITBUCKET_REPOSITORY;${uri}") + set(packageType "git") + # A CPM-specific scheme was not found. Looks like this is a generic URL so try to determine + # type + elseif(arg MATCHES ".git/?(@|#|$)") + set(out "GIT_REPOSITORY;${arg}") + set(packageType "git") + else() + # Fall back to a URL + set(out "URL;${arg}") + set(packageType "archive") + + # We could also check for SVN since FetchContent supports it, but SVN is so rare these days. + # We just won't bother with the additional complexity it will induce in this function. SVN is + # done by multi-arg + endif() + else() + if(arg MATCHES ".git/?(@|#|$)") + set(out "GIT_REPOSITORY;${arg}") + set(packageType "git") + else() + # Give up + message(FATAL_ERROR "${CPM_INDENT} Can't determine package type of '${arg}'") + endif() + endif() + + # For all packages we interpret @... as version. Only replace the last occurrence. Thus URIs + # containing '@' can be used + string(REGEX REPLACE "@([^@]+)$" ";VERSION;\\1" out "${out}") + + # Parse the rest according to package type + if(packageType STREQUAL "git") + # For git repos we interpret #... as a tag or branch or commit hash + string(REGEX REPLACE "#([^#]+)$" ";GIT_TAG;\\1" out "${out}") + elseif(packageType STREQUAL "archive") + # For archives we interpret #... as a URL hash. + string(REGEX REPLACE "#([^#]+)$" ";URL_HASH;\\1" out "${out}") + # We don't try to parse the version if it's not provided explicitly. cpm_get_version_from_url + # should do this at a later point + else() + # We should never get here. This is an assertion and hitting it means there's a problem with the + # code above. A packageType was set, but not handled by this if-else. + message(FATAL_ERROR "${CPM_INDENT} Unsupported package type '${packageType}' of '${arg}'") + endif() + + set(${outArgs} + ${out} + PARENT_SCOPE + ) +endfunction() + +# Check that the working directory for a git repo is clean +function(cpm_check_git_working_dir_is_clean repoPath gitTag isClean) + + find_package(Git REQUIRED) + + if(NOT GIT_EXECUTABLE) + # No git executable, assume directory is clean + set(${isClean} + TRUE + PARENT_SCOPE + ) + return() + endif() + + # check for uncommitted changes + execute_process( + COMMAND ${GIT_EXECUTABLE} status --porcelain + RESULT_VARIABLE resultGitStatus + OUTPUT_VARIABLE repoStatus + OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET + WORKING_DIRECTORY ${repoPath} + ) + if(resultGitStatus) + # not supposed to happen, assume clean anyway + message(WARNING "${CPM_INDENT} Calling git status on folder ${repoPath} failed") + set(${isClean} + TRUE + PARENT_SCOPE + ) + return() + endif() + + if(NOT "${repoStatus}" STREQUAL "") + set(${isClean} + FALSE + PARENT_SCOPE + ) + return() + endif() + + # check for committed changes + execute_process( + COMMAND ${GIT_EXECUTABLE} diff -s --exit-code ${gitTag} + RESULT_VARIABLE resultGitDiff + OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_QUIET + WORKING_DIRECTORY ${repoPath} + ) + + if(${resultGitDiff} EQUAL 0) + set(${isClean} + TRUE + PARENT_SCOPE + ) + else() + set(${isClean} + FALSE + PARENT_SCOPE + ) + endif() + +endfunction() + +# Add PATCH_COMMAND to CPM_ARGS_UNPARSED_ARGUMENTS. This method consumes a list of files in ARGN +# then generates a `PATCH_COMMAND` appropriate for `ExternalProject_Add()`. This command is appended +# to the parent scope's `CPM_ARGS_UNPARSED_ARGUMENTS`. +function(cpm_add_patches) + # Return if no patch files are supplied. + if(NOT ARGN) + return() + endif() + + # Find the patch program. + find_program(PATCH_EXECUTABLE patch) + if(WIN32 AND NOT PATCH_EXECUTABLE) + # The Windows git executable is distributed with patch.exe. Find the path to the executable, if + # it exists, then search `../usr/bin` and `../../usr/bin` for patch.exe. + find_package(Git QUIET) + if(GIT_EXECUTABLE) + get_filename_component(extra_search_path ${GIT_EXECUTABLE} DIRECTORY) + get_filename_component(extra_search_path_1up ${extra_search_path} DIRECTORY) + get_filename_component(extra_search_path_2up ${extra_search_path_1up} DIRECTORY) + find_program( + PATCH_EXECUTABLE patch HINTS "${extra_search_path_1up}/usr/bin" + "${extra_search_path_2up}/usr/bin" + ) + endif() + endif() + if(NOT PATCH_EXECUTABLE) + message(FATAL_ERROR "Couldn't find `patch` executable to use with PATCHES keyword.") + endif() + + # Create a temporary + set(temp_list ${CPM_ARGS_UNPARSED_ARGUMENTS}) + + # Ensure each file exists (or error out) and add it to the list. + set(first_item True) + foreach(PATCH_FILE ${ARGN}) + # Make sure the patch file exists, if we can't find it, try again in the current directory. + if(NOT EXISTS "${PATCH_FILE}") + if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}") + message(FATAL_ERROR "Couldn't find patch file: '${PATCH_FILE}'") + endif() + set(PATCH_FILE "${CMAKE_CURRENT_LIST_DIR}/${PATCH_FILE}") + endif() + + # Convert to absolute path for use with patch file command. + get_filename_component(PATCH_FILE "${PATCH_FILE}" ABSOLUTE) + + # The first patch entry must be preceded by "PATCH_COMMAND" while the following items are + # preceded by "&&". + if(first_item) + set(first_item False) + list(APPEND temp_list "PATCH_COMMAND") + else() + list(APPEND temp_list "&&") + endif() + # Add the patch command to the list + list(APPEND temp_list "${PATCH_EXECUTABLE}" "-p1" "<" "${PATCH_FILE}") + endforeach() + + # Move temp out into parent scope. + set(CPM_ARGS_UNPARSED_ARGUMENTS + ${temp_list} + PARENT_SCOPE + ) + +endfunction() + +# method to overwrite internal FetchContent properties, to allow using CPM.cmake to overload +# FetchContent calls. As these are internal cmake properties, this method should be used carefully +# and may need modification in future CMake versions. Source: +# https://github.com/Kitware/CMake/blob/dc3d0b5a0a7d26d43d6cfeb511e224533b5d188f/Modules/FetchContent.cmake#L1152 +function(cpm_override_fetchcontent contentName) + cmake_parse_arguments(PARSE_ARGV 1 arg "" "SOURCE_DIR;BINARY_DIR" "") + if(NOT "${arg_UNPARSED_ARGUMENTS}" STREQUAL "") + message(FATAL_ERROR "${CPM_INDENT} Unsupported arguments: ${arg_UNPARSED_ARGUMENTS}") + endif() + + string(TOLOWER ${contentName} contentNameLower) + set(prefix "_FetchContent_${contentNameLower}") + + set(propertyName "${prefix}_sourceDir") + define_property( + GLOBAL + PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} "${arg_SOURCE_DIR}") + + set(propertyName "${prefix}_binaryDir") + define_property( + GLOBAL + PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} "${arg_BINARY_DIR}") + + set(propertyName "${prefix}_populated") + define_property( + GLOBAL + PROPERTY ${propertyName} + BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()" + FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}" + ) + set_property(GLOBAL PROPERTY ${propertyName} TRUE) +endfunction() + +# Download and add a package from source +function(CPMAddPackage) + cpm_set_policies() + + list(LENGTH ARGN argnLength) + if(argnLength EQUAL 1) + cpm_parse_add_package_single_arg("${ARGN}" ARGN) + + # The shorthand syntax implies EXCLUDE_FROM_ALL and SYSTEM + set(ARGN "${ARGN};EXCLUDE_FROM_ALL;YES;SYSTEM;YES;") + endif() + + set(oneValueArgs + NAME + FORCE + VERSION + GIT_TAG + DOWNLOAD_ONLY + GITHUB_REPOSITORY + GITLAB_REPOSITORY + BITBUCKET_REPOSITORY + GIT_REPOSITORY + SOURCE_DIR + FIND_PACKAGE_ARGUMENTS + NO_CACHE + SYSTEM + GIT_SHALLOW + EXCLUDE_FROM_ALL + SOURCE_SUBDIR + CUSTOM_CACHE_KEY + ) + + set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND PATCHES) + + cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") + + # Set default values for arguments + + if(NOT DEFINED CPM_ARGS_VERSION) + if(DEFINED CPM_ARGS_GIT_TAG) + cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION) + endif() + endif() + + if(CPM_ARGS_DOWNLOAD_ONLY) + set(DOWNLOAD_ONLY ${CPM_ARGS_DOWNLOAD_ONLY}) + else() + set(DOWNLOAD_ONLY NO) + endif() + + if(DEFINED CPM_ARGS_GITHUB_REPOSITORY) + set(CPM_ARGS_GIT_REPOSITORY "https://github.com/${CPM_ARGS_GITHUB_REPOSITORY}.git") + elseif(DEFINED CPM_ARGS_GITLAB_REPOSITORY) + set(CPM_ARGS_GIT_REPOSITORY "https://gitlab.com/${CPM_ARGS_GITLAB_REPOSITORY}.git") + elseif(DEFINED CPM_ARGS_BITBUCKET_REPOSITORY) + set(CPM_ARGS_GIT_REPOSITORY "https://bitbucket.org/${CPM_ARGS_BITBUCKET_REPOSITORY}.git") endif() -endif() -include(${CPM_DOWNLOAD_LOCATION}) + if(DEFINED CPM_ARGS_GIT_REPOSITORY) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_REPOSITORY ${CPM_ARGS_GIT_REPOSITORY}) + if(NOT DEFINED CPM_ARGS_GIT_TAG) + set(CPM_ARGS_GIT_TAG v${CPM_ARGS_VERSION}) + endif() + + # If a name wasn't provided, try to infer it from the git repo + if(NOT DEFINED CPM_ARGS_NAME) + cpm_package_name_from_git_uri(${CPM_ARGS_GIT_REPOSITORY} CPM_ARGS_NAME) + endif() + endif() + + set(CPM_SKIP_FETCH FALSE) + + if(DEFINED CPM_ARGS_GIT_TAG) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_TAG ${CPM_ARGS_GIT_TAG}) + # If GIT_SHALLOW is explicitly specified, honor the value. + if(DEFINED CPM_ARGS_GIT_SHALLOW) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW ${CPM_ARGS_GIT_SHALLOW}) + endif() + endif() + + if(DEFINED CPM_ARGS_URL) + # If a name or version aren't provided, try to infer them from the URL + list(GET CPM_ARGS_URL 0 firstUrl) + cpm_package_name_and_ver_from_url(${firstUrl} nameFromUrl verFromUrl) + # If we fail to obtain name and version from the first URL, we could try other URLs if any. + # However multiple URLs are expected to be quite rare, so for now we won't bother. + + # If the caller provided their own name and version, they trump the inferred ones. + if(NOT DEFINED CPM_ARGS_NAME) + set(CPM_ARGS_NAME ${nameFromUrl}) + endif() + if(NOT DEFINED CPM_ARGS_VERSION) + set(CPM_ARGS_VERSION ${verFromUrl}) + endif() + + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS URL "${CPM_ARGS_URL}") + endif() + + # Check for required arguments + + if(NOT DEFINED CPM_ARGS_NAME) + message( + FATAL_ERROR + "${CPM_INDENT} 'NAME' was not provided and couldn't be automatically inferred for package added with arguments: '${ARGN}'" + ) + endif() + + # Check if package has been added before + cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}") + if(CPM_PACKAGE_ALREADY_ADDED) + cpm_export_variables(${CPM_ARGS_NAME}) + return() + endif() + + # Check for manual overrides + if(NOT CPM_ARGS_FORCE AND NOT "${CPM_${CPM_ARGS_NAME}_SOURCE}" STREQUAL "") + set(PACKAGE_SOURCE ${CPM_${CPM_ARGS_NAME}_SOURCE}) + set(CPM_${CPM_ARGS_NAME}_SOURCE "") + CPMAddPackage( + NAME "${CPM_ARGS_NAME}" + SOURCE_DIR "${PACKAGE_SOURCE}" + EXCLUDE_FROM_ALL "${CPM_ARGS_EXCLUDE_FROM_ALL}" + SYSTEM "${CPM_ARGS_SYSTEM}" + PATCHES "${CPM_ARGS_PATCHES}" + OPTIONS "${CPM_ARGS_OPTIONS}" + SOURCE_SUBDIR "${CPM_ARGS_SOURCE_SUBDIR}" + DOWNLOAD_ONLY "${DOWNLOAD_ONLY}" + FORCE True + ) + cpm_export_variables(${CPM_ARGS_NAME}) + return() + endif() + + # Check for available declaration + if(NOT CPM_ARGS_FORCE AND NOT "${CPM_DECLARATION_${CPM_ARGS_NAME}}" STREQUAL "") + set(declaration ${CPM_DECLARATION_${CPM_ARGS_NAME}}) + set(CPM_DECLARATION_${CPM_ARGS_NAME} "") + CPMAddPackage(${declaration}) + cpm_export_variables(${CPM_ARGS_NAME}) + # checking again to ensure version and option compatibility + cpm_check_if_package_already_added(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}") + return() + endif() + + if(NOT CPM_ARGS_FORCE) + if(CPM_USE_LOCAL_PACKAGES OR CPM_LOCAL_PACKAGES_ONLY) + cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS}) + + if(CPM_PACKAGE_FOUND) + cpm_export_variables(${CPM_ARGS_NAME}) + return() + endif() + + if(CPM_LOCAL_PACKAGES_ONLY) + message( + SEND_ERROR + "${CPM_INDENT} ${CPM_ARGS_NAME} not found via find_package(${CPM_ARGS_NAME} ${CPM_ARGS_VERSION})" + ) + endif() + endif() + endif() + + CPMRegisterPackage("${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}") + + if(DEFINED CPM_ARGS_GIT_TAG) + set(PACKAGE_INFO "${CPM_ARGS_GIT_TAG}") + elseif(DEFINED CPM_ARGS_SOURCE_DIR) + set(PACKAGE_INFO "${CPM_ARGS_SOURCE_DIR}") + else() + set(PACKAGE_INFO "${CPM_ARGS_VERSION}") + endif() + + if(DEFINED FETCHCONTENT_BASE_DIR) + # respect user's FETCHCONTENT_BASE_DIR if set + set(CPM_FETCHCONTENT_BASE_DIR ${FETCHCONTENT_BASE_DIR}) + else() + set(CPM_FETCHCONTENT_BASE_DIR ${CMAKE_BINARY_DIR}/_deps) + endif() + + cpm_add_patches(${CPM_ARGS_PATCHES}) + + if(DEFINED CPM_ARGS_DOWNLOAD_COMMAND) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND ${CPM_ARGS_DOWNLOAD_COMMAND}) + elseif(DEFINED CPM_ARGS_SOURCE_DIR) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${CPM_ARGS_SOURCE_DIR}) + if(NOT IS_ABSOLUTE ${CPM_ARGS_SOURCE_DIR}) + # Expand `CPM_ARGS_SOURCE_DIR` relative path. This is important because EXISTS doesn't work + # for relative paths. + get_filename_component( + source_directory ${CPM_ARGS_SOURCE_DIR} REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR} + ) + else() + set(source_directory ${CPM_ARGS_SOURCE_DIR}) + endif() + if(NOT EXISTS ${source_directory}) + string(TOLOWER ${CPM_ARGS_NAME} lower_case_name) + # remove timestamps so CMake will re-download the dependency + file(REMOVE_RECURSE "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild") + endif() + elseif(CPM_SOURCE_CACHE AND NOT CPM_ARGS_NO_CACHE) + string(TOLOWER ${CPM_ARGS_NAME} lower_case_name) + set(origin_parameters ${CPM_ARGS_UNPARSED_ARGUMENTS}) + list(SORT origin_parameters) + if(CPM_ARGS_CUSTOM_CACHE_KEY) + # Application set a custom unique directory name + set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY}) + elseif(CPM_USE_NAMED_CACHE_DIRECTORIES) + string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG") + set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME}) + else() + string(SHA1 origin_hash "${origin_parameters}") + set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}) + endif() + # Expand `download_directory` relative path. This is important because EXISTS doesn't work for + # relative paths. + get_filename_component(download_directory ${download_directory} ABSOLUTE) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory}) + + if(CPM_SOURCE_CACHE) + file(LOCK ${download_directory}/../cmake.lock) + endif() + + if(EXISTS ${download_directory}) + if(CPM_SOURCE_CACHE) + file(LOCK ${download_directory}/../cmake.lock RELEASE) + endif() + + cpm_store_fetch_properties( + ${CPM_ARGS_NAME} "${download_directory}" + "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build" + ) + cpm_get_fetch_properties("${CPM_ARGS_NAME}") + + if(DEFINED CPM_ARGS_GIT_TAG AND NOT (PATCH_COMMAND IN_LIST CPM_ARGS_UNPARSED_ARGUMENTS)) + # warn if cache has been changed since checkout + cpm_check_git_working_dir_is_clean(${download_directory} ${CPM_ARGS_GIT_TAG} IS_CLEAN) + if(NOT ${IS_CLEAN}) + message( + WARNING "${CPM_INDENT} Cache for ${CPM_ARGS_NAME} (${download_directory}) is dirty" + ) + endif() + endif() + + cpm_add_subdirectory( + "${CPM_ARGS_NAME}" + "${DOWNLOAD_ONLY}" + "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}" + "${${CPM_ARGS_NAME}_BINARY_DIR}" + "${CPM_ARGS_EXCLUDE_FROM_ALL}" + "${CPM_ARGS_SYSTEM}" + "${CPM_ARGS_OPTIONS}" + ) + set(PACKAGE_INFO "${PACKAGE_INFO} at ${download_directory}") + + # As the source dir is already cached/populated, we override the call to FetchContent. + set(CPM_SKIP_FETCH TRUE) + cpm_override_fetchcontent( + "${lower_case_name}" SOURCE_DIR "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}" + BINARY_DIR "${${CPM_ARGS_NAME}_BINARY_DIR}" + ) + + else() + # Enable shallow clone when GIT_TAG is not a commit hash. Our guess may not be accurate, but + # it should guarantee no commit hash get mis-detected. + if(NOT DEFINED CPM_ARGS_GIT_SHALLOW) + cpm_is_git_tag_commit_hash("${CPM_ARGS_GIT_TAG}" IS_HASH) + if(NOT ${IS_HASH}) + list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW TRUE) + endif() + endif() + + # remove timestamps so CMake will re-download the dependency + file(REMOVE_RECURSE ${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild) + set(PACKAGE_INFO "${PACKAGE_INFO} to ${download_directory}") + endif() + endif() + + cpm_create_module_file(${CPM_ARGS_NAME} "CPMAddPackage(\"${ARGN}\")") + + if(CPM_PACKAGE_LOCK_ENABLED) + if((CPM_ARGS_VERSION AND NOT CPM_ARGS_SOURCE_DIR) OR CPM_INCLUDE_ALL_IN_PACKAGE_LOCK) + cpm_add_to_package_lock(${CPM_ARGS_NAME} "${ARGN}") + elseif(CPM_ARGS_SOURCE_DIR) + cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "local directory") + else() + cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "${ARGN}") + endif() + endif() + + cpm_message( + STATUS "${CPM_INDENT} Adding package ${CPM_ARGS_NAME}@${CPM_ARGS_VERSION} (${PACKAGE_INFO})" + ) + + if(NOT CPM_SKIP_FETCH) + # CMake 3.28 added EXCLUDE, SYSTEM (3.25), and SOURCE_SUBDIR (3.18) to FetchContent_Declare. + # Calling FetchContent_MakeAvailable will then internally forward these options to + # add_subdirectory. Up until these changes, we had to call FetchContent_Populate and + # add_subdirectory separately, which is no longer necessary and has been deprecated as of 3.30. + set(fetchContentDeclareExtraArgs "") + if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.28.0") + if(${CPM_ARGS_EXCLUDE_FROM_ALL}) + list(APPEND fetchContentDeclareExtraArgs EXCLUDE_FROM_ALL) + endif() + if(${CPM_ARGS_SYSTEM}) + list(APPEND fetchContentDeclareExtraArgs SYSTEM) + endif() + if(DEFINED CPM_ARGS_SOURCE_SUBDIR) + list(APPEND fetchContentDeclareExtraArgs SOURCE_SUBDIR ${CPM_ARGS_SOURCE_SUBDIR}) + endif() + # For CMake version <3.28 OPTIONS are parsed in cpm_add_subdirectory + if(CPM_ARGS_OPTIONS AND NOT DOWNLOAD_ONLY) + foreach(OPTION ${CPM_ARGS_OPTIONS}) + cpm_parse_option("${OPTION}") + set(${OPTION_KEY} "${OPTION_VALUE}") + endforeach() + endif() + endif() + cpm_declare_fetch( + "${CPM_ARGS_NAME}" ${fetchContentDeclareExtraArgs} "${CPM_ARGS_UNPARSED_ARGUMENTS}" + ) + + cpm_fetch_package("${CPM_ARGS_NAME}" ${DOWNLOAD_ONLY} populated ${CPM_ARGS_UNPARSED_ARGUMENTS}) + if(CPM_SOURCE_CACHE AND download_directory) + file(LOCK ${download_directory}/../cmake.lock RELEASE) + endif() + if(${populated} AND ${CMAKE_VERSION} VERSION_LESS "3.28.0") + cpm_add_subdirectory( + "${CPM_ARGS_NAME}" + "${DOWNLOAD_ONLY}" + "${${CPM_ARGS_NAME}_SOURCE_DIR}/${CPM_ARGS_SOURCE_SUBDIR}" + "${${CPM_ARGS_NAME}_BINARY_DIR}" + "${CPM_ARGS_EXCLUDE_FROM_ALL}" + "${CPM_ARGS_SYSTEM}" + "${CPM_ARGS_OPTIONS}" + ) + endif() + cpm_get_fetch_properties("${CPM_ARGS_NAME}") + endif() + + set(${CPM_ARGS_NAME}_ADDED YES) + cpm_export_variables("${CPM_ARGS_NAME}") +endfunction() + +# Fetch a previously declared package +macro(CPMGetPackage Name) + if(DEFINED "CPM_DECLARATION_${Name}") + CPMAddPackage(NAME ${Name}) + else() + message(SEND_ERROR "${CPM_INDENT} Cannot retrieve package ${Name}: no declaration available") + endif() +endmacro() + +# export variables available to the caller to the parent scope expects ${CPM_ARGS_NAME} to be set +macro(cpm_export_variables name) + set(${name}_SOURCE_DIR + "${${name}_SOURCE_DIR}" + PARENT_SCOPE + ) + set(${name}_BINARY_DIR + "${${name}_BINARY_DIR}" + PARENT_SCOPE + ) + set(${name}_ADDED + "${${name}_ADDED}" + PARENT_SCOPE + ) + set(CPM_LAST_PACKAGE_NAME + "${name}" + PARENT_SCOPE + ) +endmacro() + +# declares a package, so that any call to CPMAddPackage for the package name will use these +# arguments instead. Previous declarations will not be overridden. +macro(CPMDeclarePackage Name) + if(NOT DEFINED "CPM_DECLARATION_${Name}") + set("CPM_DECLARATION_${Name}" "${ARGN}") + endif() +endmacro() + +function(cpm_add_to_package_lock Name) + if(NOT CPM_DONT_CREATE_PACKAGE_LOCK) + cpm_prettify_package_arguments(PRETTY_ARGN false ${ARGN}) + file(APPEND ${CPM_PACKAGE_LOCK_FILE} "# ${Name}\nCPMDeclarePackage(${Name}\n${PRETTY_ARGN})\n") + endif() +endfunction() + +function(cpm_add_comment_to_package_lock Name) + if(NOT CPM_DONT_CREATE_PACKAGE_LOCK) + cpm_prettify_package_arguments(PRETTY_ARGN true ${ARGN}) + file(APPEND ${CPM_PACKAGE_LOCK_FILE} + "# ${Name} (unversioned)\n# CPMDeclarePackage(${Name}\n${PRETTY_ARGN}#)\n" + ) + endif() +endfunction() + +# includes the package lock file if it exists and creates a target `cpm-update-package-lock` to +# update it +macro(CPMUsePackageLock file) + if(NOT CPM_DONT_CREATE_PACKAGE_LOCK) + get_filename_component(CPM_ABSOLUTE_PACKAGE_LOCK_PATH ${file} ABSOLUTE) + if(EXISTS ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}) + include(${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}) + endif() + if(NOT TARGET cpm-update-package-lock) + add_custom_target( + cpm-update-package-lock COMMAND ${CMAKE_COMMAND} -E copy ${CPM_PACKAGE_LOCK_FILE} + ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH} + ) + endif() + set(CPM_PACKAGE_LOCK_ENABLED true) + endif() +endmacro() + +# registers a package that has been added to CPM +function(CPMRegisterPackage PACKAGE VERSION) + list(APPEND CPM_PACKAGES ${PACKAGE}) + set(CPM_PACKAGES + ${CPM_PACKAGES} + CACHE INTERNAL "" + ) + set("CPM_PACKAGE_${PACKAGE}_VERSION" + ${VERSION} + CACHE INTERNAL "" + ) +endfunction() + +# retrieve the current version of the package to ${OUTPUT} +function(CPMGetPackageVersion PACKAGE OUTPUT) + set(${OUTPUT} + "${CPM_PACKAGE_${PACKAGE}_VERSION}" + PARENT_SCOPE + ) +endfunction() + +# declares a package in FetchContent_Declare +function(cpm_declare_fetch PACKAGE) + if(${CPM_DRY_RUN}) + cpm_message(STATUS "${CPM_INDENT} Package not declared (dry run)") + return() + endif() + + FetchContent_Declare(${PACKAGE} ${ARGN}) +endfunction() + +# returns properties for a package previously defined by cpm_declare_fetch +function(cpm_get_fetch_properties PACKAGE) + if(${CPM_DRY_RUN}) + return() + endif() + + set(${PACKAGE}_SOURCE_DIR + "${CPM_PACKAGE_${PACKAGE}_SOURCE_DIR}" + PARENT_SCOPE + ) + set(${PACKAGE}_BINARY_DIR + "${CPM_PACKAGE_${PACKAGE}_BINARY_DIR}" + PARENT_SCOPE + ) +endfunction() + +function(cpm_store_fetch_properties PACKAGE source_dir binary_dir) + if(${CPM_DRY_RUN}) + return() + endif() + + set(CPM_PACKAGE_${PACKAGE}_SOURCE_DIR + "${source_dir}" + CACHE INTERNAL "" + ) + set(CPM_PACKAGE_${PACKAGE}_BINARY_DIR + "${binary_dir}" + CACHE INTERNAL "" + ) +endfunction() + +# adds a package as a subdirectory if viable, according to provided options +function( + cpm_add_subdirectory + PACKAGE + DOWNLOAD_ONLY + SOURCE_DIR + BINARY_DIR + EXCLUDE + SYSTEM + OPTIONS +) + + if(NOT DOWNLOAD_ONLY AND EXISTS ${SOURCE_DIR}/CMakeLists.txt) + set(addSubdirectoryExtraArgs "") + if(EXCLUDE) + list(APPEND addSubdirectoryExtraArgs EXCLUDE_FROM_ALL) + endif() + if("${SYSTEM}" AND "${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.25") + # https://cmake.org/cmake/help/latest/prop_dir/SYSTEM.html#prop_dir:SYSTEM + list(APPEND addSubdirectoryExtraArgs SYSTEM) + endif() + if(OPTIONS) + foreach(OPTION ${OPTIONS}) + cpm_parse_option("${OPTION}") + set(${OPTION_KEY} "${OPTION_VALUE}") + endforeach() + endif() + set(CPM_OLD_INDENT "${CPM_INDENT}") + set(CPM_INDENT "${CPM_INDENT} ${PACKAGE}:") + add_subdirectory(${SOURCE_DIR} ${BINARY_DIR} ${addSubdirectoryExtraArgs}) + set(CPM_INDENT "${CPM_OLD_INDENT}") + endif() +endfunction() + +# downloads a previously declared package via FetchContent and exports the variables +# `${PACKAGE}_SOURCE_DIR` and `${PACKAGE}_BINARY_DIR` to the parent scope +function(cpm_fetch_package PACKAGE DOWNLOAD_ONLY populated) + set(${populated} + FALSE + PARENT_SCOPE + ) + if(${CPM_DRY_RUN}) + cpm_message(STATUS "${CPM_INDENT} Package ${PACKAGE} not fetched (dry run)") + return() + endif() + + FetchContent_GetProperties(${PACKAGE}) + + string(TOLOWER "${PACKAGE}" lower_case_name) + + if(NOT ${lower_case_name}_POPULATED) + if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.28.0") + if(DOWNLOAD_ONLY) + # MakeAvailable will call add_subdirectory internally which is not what we want when + # DOWNLOAD_ONLY is set. Populate will only download the dependency without adding it to the + # build + FetchContent_Populate( + ${PACKAGE} + SOURCE_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-src" + BINARY_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-build" + SUBBUILD_DIR "${CPM_FETCHCONTENT_BASE_DIR}/${lower_case_name}-subbuild" + ${ARGN} + ) + else() + FetchContent_MakeAvailable(${PACKAGE}) + endif() + else() + FetchContent_Populate(${PACKAGE}) + endif() + set(${populated} + TRUE + PARENT_SCOPE + ) + endif() + + cpm_store_fetch_properties( + ${CPM_ARGS_NAME} ${${lower_case_name}_SOURCE_DIR} ${${lower_case_name}_BINARY_DIR} + ) + + set(${PACKAGE}_SOURCE_DIR + ${${lower_case_name}_SOURCE_DIR} + PARENT_SCOPE + ) + set(${PACKAGE}_BINARY_DIR + ${${lower_case_name}_BINARY_DIR} + PARENT_SCOPE + ) +endfunction() + +# splits a package option +function(cpm_parse_option OPTION) + string(REGEX MATCH "^[^ ]+" OPTION_KEY "${OPTION}") + string(LENGTH "${OPTION}" OPTION_LENGTH) + string(LENGTH "${OPTION_KEY}" OPTION_KEY_LENGTH) + if(OPTION_KEY_LENGTH STREQUAL OPTION_LENGTH) + # no value for key provided, assume user wants to set option to "ON" + set(OPTION_VALUE "ON") + else() + math(EXPR OPTION_KEY_LENGTH "${OPTION_KEY_LENGTH}+1") + string(SUBSTRING "${OPTION}" "${OPTION_KEY_LENGTH}" "-1" OPTION_VALUE) + endif() + set(OPTION_KEY + "${OPTION_KEY}" + PARENT_SCOPE + ) + set(OPTION_VALUE + "${OPTION_VALUE}" + PARENT_SCOPE + ) +endfunction() + +# guesses the package version from a git tag +function(cpm_get_version_from_git_tag GIT_TAG RESULT) + string(LENGTH ${GIT_TAG} length) + if(length EQUAL 40) + # GIT_TAG is probably a git hash + set(${RESULT} + 0 + PARENT_SCOPE + ) + else() + string(REGEX MATCH "v?([0123456789.]*).*" _ ${GIT_TAG}) + set(${RESULT} + ${CMAKE_MATCH_1} + PARENT_SCOPE + ) + endif() +endfunction() + +# guesses if the git tag is a commit hash or an actual tag or a branch name. +function(cpm_is_git_tag_commit_hash GIT_TAG RESULT) + string(LENGTH "${GIT_TAG}" length) + # full hash has 40 characters, and short hash has at least 7 characters. + if(length LESS 7 OR length GREATER 40) + set(${RESULT} + 0 + PARENT_SCOPE + ) + else() + if(${GIT_TAG} MATCHES "^[a-fA-F0-9]+$") + set(${RESULT} + 1 + PARENT_SCOPE + ) + else() + set(${RESULT} + 0 + PARENT_SCOPE + ) + endif() + endif() +endfunction() + +function(cpm_prettify_package_arguments OUT_VAR IS_IN_COMMENT) + set(oneValueArgs + NAME + FORCE + VERSION + GIT_TAG + DOWNLOAD_ONLY + GITHUB_REPOSITORY + GITLAB_REPOSITORY + BITBUCKET_REPOSITORY + GIT_REPOSITORY + SOURCE_DIR + FIND_PACKAGE_ARGUMENTS + NO_CACHE + SYSTEM + GIT_SHALLOW + EXCLUDE_FROM_ALL + SOURCE_SUBDIR + ) + set(multiValueArgs URL OPTIONS DOWNLOAD_COMMAND) + cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + foreach(oneArgName ${oneValueArgs}) + if(DEFINED CPM_ARGS_${oneArgName}) + if(${IS_IN_COMMENT}) + string(APPEND PRETTY_OUT_VAR "#") + endif() + if(${oneArgName} STREQUAL "SOURCE_DIR") + string(REPLACE ${CMAKE_SOURCE_DIR} "\${CMAKE_SOURCE_DIR}" CPM_ARGS_${oneArgName} + ${CPM_ARGS_${oneArgName}} + ) + endif() + string(APPEND PRETTY_OUT_VAR " ${oneArgName} ${CPM_ARGS_${oneArgName}}\n") + endif() + endforeach() + foreach(multiArgName ${multiValueArgs}) + if(DEFINED CPM_ARGS_${multiArgName}) + if(${IS_IN_COMMENT}) + string(APPEND PRETTY_OUT_VAR "#") + endif() + string(APPEND PRETTY_OUT_VAR " ${multiArgName}\n") + foreach(singleOption ${CPM_ARGS_${multiArgName}}) + if(${IS_IN_COMMENT}) + string(APPEND PRETTY_OUT_VAR "#") + endif() + string(APPEND PRETTY_OUT_VAR " \"${singleOption}\"\n") + endforeach() + endif() + endforeach() + + if(NOT "${CPM_ARGS_UNPARSED_ARGUMENTS}" STREQUAL "") + if(${IS_IN_COMMENT}) + string(APPEND PRETTY_OUT_VAR "#") + endif() + string(APPEND PRETTY_OUT_VAR " ") + foreach(CPM_ARGS_UNPARSED_ARGUMENT ${CPM_ARGS_UNPARSED_ARGUMENTS}) + string(APPEND PRETTY_OUT_VAR " ${CPM_ARGS_UNPARSED_ARGUMENT}") + endforeach() + string(APPEND PRETTY_OUT_VAR "\n") + endif() + + set(${OUT_VAR} + ${PRETTY_OUT_VAR} + PARENT_SCOPE + ) + +endfunction() diff --git a/examples/filter_plot_tool/filter_plot_tool.cpp b/examples/filter_plot_tool/filter_plot_tool.cpp index 53e6444..f284853 100644 --- a/examples/filter_plot_tool/filter_plot_tool.cpp +++ b/examples/filter_plot_tool/filter_plot_tool.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include #include #include diff --git a/examples/filters_example_plugin/FilterPlotComponent.cpp b/examples/filters_example_plugin/FilterPlotComponent.cpp index 5d4426f..72fd024 100644 --- a/examples/filters_example_plugin/FilterPlotComponent.cpp +++ b/examples/filters_example_plugin/FilterPlotComponent.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "FilterPlotComponent.h" #include "FiltersPlugin.h" diff --git a/examples/filters_example_plugin/FilterPlotComponent.h b/examples/filters_example_plugin/FilterPlotComponent.h index b34d5ce..2ba35ae 100644 --- a/examples/filters_example_plugin/FilterPlotComponent.h +++ b/examples/filters_example_plugin/FilterPlotComponent.h @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #ifndef SSTFILTERS_EXAMPLES_FILTERS_EXAMPLE_PLUGIN_FILTERPLOTCOMPONENT_H #define SSTFILTERS_EXAMPLES_FILTERS_EXAMPLE_PLUGIN_FILTERPLOTCOMPONENT_H diff --git a/examples/filters_example_plugin/FiltersPlugin.cpp b/examples/filters_example_plugin/FiltersPlugin.cpp index c6d3ece..acfe024 100644 --- a/examples/filters_example_plugin/FiltersPlugin.cpp +++ b/examples/filters_example_plugin/FiltersPlugin.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "FiltersPlugin.h" #include "FiltersPluginEditor.h" @@ -13,10 +27,10 @@ FiltersPlugin::FiltersPlugin() vts(*this, nullptr, juce::Identifier("Parameters"), createParameters()) { using namespace ParamTags; - freqHzParam = vts.getRawParameterValue (freqTag); - resParam = vts.getRawParameterValue (resTag); - filterTypeParam = vts.getRawParameterValue (filterTypeTag); - filterSubTypeParam = vts.getRawParameterValue (filterSubTypeTag); + freqHzParam = vts.getRawParameterValue(freqTag); + resParam = vts.getRawParameterValue(resTag); + filterTypeParam = vts.getRawParameterValue(filterTypeTag); + filterSubTypeParam = vts.getRawParameterValue(filterSubTypeTag); } juce::AudioProcessorValueTreeState::ParameterLayout FiltersPlugin::createParameters() @@ -49,11 +63,13 @@ juce::AudioProcessorValueTreeState::ParameterLayout FiltersPlugin::createParamet resTag, "Resonance", juce::NormalisableRange{0.0f, 1.0f}, 0.5f)); juce::StringArray filterTypeChoices; - for (const auto& filter_type_name : sst::filters::filter_type_names) + for (const auto &filter_type_name : sst::filters::filter_type_names) filterTypeChoices.add(filter_type_name); - params.push_back(std::make_unique (filterTypeTag, "Filter Type", filterTypeChoices, 0)); - params.push_back(std::make_unique(filterSubTypeTag, "Filter Sub-Type", 0, sst::filters::FilterSubType::st_tripole_HHH3, 0)); + params.push_back(std::make_unique(filterTypeTag, "Filter Type", + filterTypeChoices, 0)); + params.push_back(std::make_unique( + filterSubTypeTag, "Filter Sub-Type", 0, sst::filters::FilterSubType::st_tripole_HHH3, 0)); return {params.begin(), params.end()}; } @@ -80,8 +96,8 @@ void FiltersPlugin::prepareToPlay(double sampleRate, int samplesPerBlock) for (auto &filt : filterUnits) filt.reset(); - lastFilterType = ParamConversions::getFilterType (filterTypeParam); - lastFilterSubType = ParamConversions::getFilterSubType (filterSubTypeParam); + lastFilterType = ParamConversions::getFilterType(filterTypeParam); + lastFilterSubType = ParamConversions::getFilterSubType(filterSubTypeParam); } void FiltersPlugin::processBlock(juce::AudioBuffer &buffer, juce::MidiBuffer &) @@ -102,51 +118,48 @@ void FiltersPlugin::processBlock(juce::AudioBuffer &buffer, juce::MidiBuf } auto filterUnitPtr = sst::filters::GetQFPtrFilterUnit(filterType, filterSubType); - coeffMaker.MakeCoeffs(ParamConversions::freq_hz_to_note_num (*freqHzParam), *resParam, filterType, filterSubType, nullptr, false); + coeffMaker.MakeCoeffs(ParamConversions::freq_hz_to_note_num(*freqHzParam), *resParam, + filterType, filterSubType, nullptr, false); if (filterUnitPtr == nullptr) return; // no filter to process! for (int ch = 0; ch < numChannels; ++ch) { - auto* x = buffer.getWritePointer (ch); + auto *x = buffer.getWritePointer(ch); - auto& filter = filterUnits[ch]; - coeffMaker.updateState (filter.filterState); + auto &filter = filterUnits[ch]; + coeffMaker.updateState(filter.filterState); for (int n = 0; n < numSamples; ++n) { auto yVec = filterUnitPtr(&filter.filterState, _mm_set_ps1(x[n])); float yArr alignas(16)[4]; - _mm_store_ps (yArr, yVec); + _mm_store_ps(yArr, yVec); x[n] = yArr[0]; } - } coeffMaker.updateCoefficients(filterUnits[0].filterState); } -juce::AudioProcessorEditor *FiltersPlugin::createEditor() -{ - return new FiltersPluginEditor (*this); -} +juce::AudioProcessorEditor *FiltersPlugin::createEditor() { return new FiltersPluginEditor(*this); } void FiltersPlugin::getStateInformation(juce::MemoryBlock &data) { auto state = vts.copyState(); - std::unique_ptr xml (state.createXml()); - copyXmlToBinary (*xml, data); + std::unique_ptr xml(state.createXml()); + copyXmlToBinary(*xml, data); } void FiltersPlugin::setStateInformation(const void *data, int sizeInBytes) { - std::unique_ptr xmlState (getXmlFromBinary (data, sizeInBytes)); + std::unique_ptr xmlState(getXmlFromBinary(data, sizeInBytes)); if (xmlState != nullptr) - if (xmlState->hasTagName (vts.state.getType())) - vts.replaceState (juce::ValueTree::fromXml (*xmlState)); + if (xmlState->hasTagName(vts.state.getType())) + vts.replaceState(juce::ValueTree::fromXml(*xmlState)); } // This creates new instances of the plugin diff --git a/examples/filters_example_plugin/FiltersPlugin.h b/examples/filters_example_plugin/FiltersPlugin.h index 5e9543b..079cadf 100644 --- a/examples/filters_example_plugin/FiltersPlugin.h +++ b/examples/filters_example_plugin/FiltersPlugin.h @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #ifndef SSTFILTERS_EXAMPLES_FILTERS_EXAMPLE_PLUGIN_FILTERSPLUGIN_H #define SSTFILTERS_EXAMPLES_FILTERS_EXAMPLE_PLUGIN_FILTERSPLUGIN_H diff --git a/examples/filters_example_plugin/FiltersPluginEditor.cpp b/examples/filters_example_plugin/FiltersPluginEditor.cpp index a7798ba..0117e11 100644 --- a/examples/filters_example_plugin/FiltersPluginEditor.cpp +++ b/examples/filters_example_plugin/FiltersPluginEditor.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "FiltersPluginEditor.h" class FiltersPluginEditor::SubTypeComboBoxParameterAttachment : private juce::ComboBox::Listener diff --git a/examples/filters_example_plugin/FiltersPluginEditor.h b/examples/filters_example_plugin/FiltersPluginEditor.h index 3564be9..6ab8f0c 100644 --- a/examples/filters_example_plugin/FiltersPluginEditor.h +++ b/examples/filters_example_plugin/FiltersPluginEditor.h @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #ifndef SSTFILTERS_EXAMPLES_FILTERS_EXAMPLE_PLUGIN_FILTERSPLUGINEDITOR_H #define SSTFILTERS_EXAMPLES_FILTERS_EXAMPLE_PLUGIN_FILTERSPLUGINEDITOR_H diff --git a/include-extras/sst/filters/FilterPlotter.h b/include-extras/sst/filters/FilterPlotter.h index f65ba79..c1f6a5a 100644 --- a/include-extras/sst/filters/FilterPlotter.h +++ b/include-extras/sst/filters/FilterPlotter.h @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #ifndef INCLUDE_EXTRAS_SST_FILTERS_FILTERPLOTTER_H #define INCLUDE_EXTRAS_SST_FILTERS_FILTERPLOTTER_H @@ -28,7 +42,7 @@ class FilterPlotter std::pair, std::vector> plotFilterMagnitudeResponse(sst::filters::FilterType filterType, sst::filters::FilterSubType filterSubType, float pitch, float res, - const FilterPlotParameters& params = {}) + const FilterPlotParameters ¶ms = {}) { // set up input sweep std::vector sweepBuffer(fftSize, 0.0f); @@ -36,9 +50,9 @@ class FilterPlotter // set up filter float delayBuffer[4][sst::filters::utilities::MAX_FB_COMB + - sst::filters::utilities::SincTable::FIRipol_N]; + sst::filters::utilities::SincTable::FIRipol_N]; auto filterState = sst::filters::QuadFilterUnitState{}; - for (auto i=0; i<4; ++i) + for (auto i = 0; i < 4; ++i) { filterState.DB[i] = &(delayBuffer[i][0]); } @@ -52,37 +66,45 @@ class FilterPlotter // process filter std::vector filterBuffer(fftSize, 0.0f); if (filterUnitPtr != nullptr) - runFilter (filterState, filterUnitPtr, sweepBuffer.data(), filterBuffer.data(), fftSize); + runFilter(filterState, filterUnitPtr, sweepBuffer.data(), filterBuffer.data(), fftSize); else - std::copy (sweepBuffer.begin(), sweepBuffer.end(), filterBuffer.begin()); + std::copy(sweepBuffer.begin(), sweepBuffer.end(), filterBuffer.begin()); - auto magResponseDB = computeFrequencyResponse(sweepBuffer.data(), filterBuffer.data(), fftSize); - auto magResponseDBSmoothed = freqSmooth(magResponseDB.data(), (int) magResponseDB.size(), params.freqSmoothOctaves); - auto freqAxis = fftFreqs((int) magResponseDB.size(), 1.0f / params.sampleRate); + auto magResponseDB = + computeFrequencyResponse(sweepBuffer.data(), filterBuffer.data(), fftSize); + auto magResponseDBSmoothed = + freqSmooth(magResponseDB.data(), (int)magResponseDB.size(), params.freqSmoothOctaves); + auto freqAxis = fftFreqs((int)magResponseDB.size(), 1.0f / params.sampleRate); - return { std::move (freqAxis), std::move (magResponseDBSmoothed) }; + return {std::move(freqAxis), std::move(magResponseDBSmoothed)}; } private: - static void generateLogSweep(float *buffer, int nSamples, const FilterPlotParameters& params) + static void generateLogSweep(float *buffer, int nSamples, const FilterPlotParameters ¶ms) { const auto beta = (float)nSamples / std::log(params.endFreqHz / params.startFreqHz); for (int i = 0; i < nSamples; i++) { - float phase = 2.0f * (float)M_PI * beta * params.startFreqHz * - (std::pow(params.endFreqHz / params.startFreqHz, (float)i / (float)nSamples) - 1.0f); + float phase = + 2.0f * (float)M_PI * beta * params.startFreqHz * + (std::pow(params.endFreqHz / params.startFreqHz, (float)i / (float)nSamples) - + 1.0f); - buffer[i] = params.inputAmplitude * std::sin((phase + (float)M_PI / 180.0f) / params.sampleRate); + buffer[i] = params.inputAmplitude * + std::sin((phase + (float)M_PI / 180.0f) / params.sampleRate); } } - static void runFilter (sst::filters::QuadFilterUnitState &filterState, sst::filters::FilterUnitQFPtr &filterUnitPtr, const float* inBuffer, float* outBuffer, int numSamples) + static void runFilter(sst::filters::QuadFilterUnitState &filterState, + sst::filters::FilterUnitQFPtr &filterUnitPtr, const float *inBuffer, + float *outBuffer, int numSamples) { // reset filter state - std::fill (filterState.R, &filterState.R[sst::filters::n_filter_registers], _mm_setzero_ps()); + std::fill(filterState.R, &filterState.R[sst::filters::n_filter_registers], + _mm_setzero_ps()); - for (int i=0; i<4; ++i) + for (int i = 0; i < 4; ++i) { filterState.WP[i] = 0; filterState.active[i] = 0; @@ -94,24 +116,25 @@ class FilterPlotter auto yVec = filterUnitPtr(&filterState, _mm_set_ps1(inBuffer[i])); float yArr alignas(16)[4]; - _mm_store_ps (yArr, yVec); + _mm_store_ps(yArr, yVec); outBuffer[i] = yArr[0]; } }; - std::vector computeFrequencyResponse(float* sweepBuffer, float* filterBuffer, int numSamples) + std::vector computeFrequencyResponse(float *sweepBuffer, float *filterBuffer, + int numSamples) { const auto fftDataSize = numSamples * 2; - std::vector sweepFFT (fftDataSize, 0.0f); - std::copy (sweepBuffer, sweepBuffer + numSamples, sweepFFT.begin()); - fft.performFrequencyOnlyForwardTransform (sweepFFT.data(), true); + std::vector sweepFFT(fftDataSize, 0.0f); + std::copy(sweepBuffer, sweepBuffer + numSamples, sweepFFT.begin()); + fft.performFrequencyOnlyForwardTransform(sweepFFT.data(), true); - std::vector filtFFT (fftDataSize, 0.0f); - std::copy (filterBuffer, filterBuffer + numSamples, filtFFT.begin()); - fft.performFrequencyOnlyForwardTransform (filtFFT.data(), true); + std::vector filtFFT(fftDataSize, 0.0f); + std::copy(filterBuffer, filterBuffer + numSamples, filtFFT.begin()); + fft.performFrequencyOnlyForwardTransform(filtFFT.data(), true); const auto fftOutSize = numSamples / 2 + 1; - std::vector magnitudeResponseDB (fftOutSize, 0.0f); + std::vector magnitudeResponseDB(fftOutSize, 0.0f); for (int i = 0; i < fftOutSize; ++i) magnitudeResponseDB[i] = juce::Decibels::gainToDecibels(filtFFT[i] / sweepFFT[i]); @@ -120,26 +143,29 @@ class FilterPlotter static std::vector fftFreqs(int N, float T) { - auto val = 0.5f / ((float) N * T); + auto val = 0.5f / ((float)N * T); - std::vector results (N, 0.0f); - std::iota (results.begin(), results.end(), 0.0f); - std::transform(results.begin(), results.end(), results.begin(), [val] (auto x) { return x * val; }); + std::vector results(N, 0.0f); + std::iota(results.begin(), results.end(), 0.0f); + std::transform(results.begin(), results.end(), results.begin(), + [val](auto x) { return x * val; }); return results; } - static std::vector freqSmooth (const float* data, int numSamples, float smFactor = 1.0f / 24.0f) + static std::vector freqSmooth(const float *data, int numSamples, + float smFactor = 1.0f / 24.0f) { - const auto s = smFactor > 1.0f ? smFactor : std::sqrt (std::pow (2.0f, smFactor)); + const auto s = smFactor > 1.0f ? smFactor : std::sqrt(std::pow(2.0f, smFactor)); - std::vector smoothedVec (numSamples, 0.0f); + std::vector smoothedVec(numSamples, 0.0f); for (int i = 0; i < numSamples; ++i) { - auto i1 = std::max (int ((float) i / s), 0); - auto i2 = std::min (int ((float) i * s) + 1, numSamples - 1); + auto i1 = std::max(int((float)i / s), 0); + auto i2 = std::min(int((float)i * s) + 1, numSamples - 1); - smoothedVec[i] = i2 > i1 ? std::accumulate(data + i1, data + i2, 0.0f) / float (i2 - i1) : 0.0f; + smoothedVec[i] = + i2 > i1 ? std::accumulate(data + i1, data + i2, 0.0f) / float(i2 - i1) : 0.0f; } return smoothedVec; diff --git a/include/sst/filters/CutoffWarp.h b/include/sst/filters/CutoffWarp.h index 885365d..1553540 100644 --- a/include/sst/filters/CutoffWarp.h +++ b/include/sst/filters/CutoffWarp.h @@ -37,10 +37,10 @@ static float clampedFrequency(float pitch, float sampleRate, TuningProvider *pro return freq; } -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) enum Saturator { @@ -52,48 +52,53 @@ enum Saturator // this is a duplicate of the code in QuadFilterWaveshapers.cpp except without the multiplication by // 'drive' and without the unused QuadFilterWaveshaperState pointer. -static inline __m128 ojd_waveshaper_ps(const __m128 x) noexcept +static inline SIMD_M128 ojd_waveshaper_ps(const SIMD_M128 x) noexcept { - const auto pm17 = _mm_set1_ps(-1.7f); - const auto p11 = _mm_set1_ps(1.1f); - const auto pm03 = _mm_set1_ps(-0.3f); - const auto p09 = _mm_set1_ps(0.9f); - - const auto denLow = _mm_set1_ps(1.f / (4 * (1 - 0.3f))); - const auto denHigh = _mm_set1_ps(1.f / (4 * (1 - 0.9f))); - - auto maskNeg = _mm_cmple_ps(x, pm17); // in <= -1.7f - auto maskPos = _mm_cmpge_ps(x, p11); // in > 1.1f - auto maskLow = _mm_andnot_ps(maskNeg, _mm_cmplt_ps(x, pm03)); // in > -1.7 && in < =0.3 - auto maskHigh = _mm_andnot_ps(maskPos, _mm_cmpgt_ps(x, p09)); // in > 0.9 && in < 1.1 - auto maskMid = _mm_and_ps(_mm_cmpge_ps(x, pm03), _mm_cmple_ps(x, p09)); // the middle - - const auto vNeg = _mm_set1_ps(-1.0); - const auto vPos = _mm_set1_ps(1.0); + const auto pm17 = SIMD_MM(set1_ps)(-1.7f); + const auto p11 = SIMD_MM(set1_ps)(1.1f); + const auto pm03 = SIMD_MM(set1_ps)(-0.3f); + const auto p09 = SIMD_MM(set1_ps)(0.9f); + + const auto denLow = SIMD_MM(set1_ps)(1.f / (4 * (1 - 0.3f))); + const auto denHigh = SIMD_MM(set1_ps)(1.f / (4 * (1 - 0.9f))); + + auto maskNeg = SIMD_MM(cmple_ps)(x, pm17); // in <= -1.7f + auto maskPos = SIMD_MM(cmpge_ps)(x, p11); // in > 1.1f + auto maskLow = + SIMD_MM(andnot_ps)(maskNeg, SIMD_MM(cmplt_ps)(x, pm03)); // in > -1.7 && in < =0.3 + auto maskHigh = SIMD_MM(andnot_ps)(maskPos, SIMD_MM(cmpgt_ps)(x, p09)); // in > 0.9 && in < 1.1 + auto maskMid = + SIMD_MM(and_ps)(SIMD_MM(cmpge_ps)(x, pm03), SIMD_MM(cmple_ps)(x, p09)); // the middle + + const auto vNeg = SIMD_MM(set1_ps)(-1.0); + const auto vPos = SIMD_MM(set1_ps)(1.0); auto vMid = x; - auto xlow = _mm_sub_ps(x, pm03); - auto vLow = _mm_add_ps(xlow, _mm_mul_ps(denLow, _mm_mul_ps(xlow, xlow))); - vLow = _mm_add_ps(vLow, pm03); + auto xlow = SIMD_MM(sub_ps)(x, pm03); + auto vLow = SIMD_MM(add_ps)(xlow, SIMD_MM(mul_ps)(denLow, SIMD_MM(mul_ps)(xlow, xlow))); + vLow = SIMD_MM(add_ps)(vLow, pm03); - auto xhi = _mm_sub_ps(x, p09); - auto vHi = _mm_sub_ps(xhi, _mm_mul_ps(denHigh, _mm_mul_ps(xhi, xhi))); - vHi = _mm_add_ps(vHi, p09); + auto xhi = SIMD_MM(sub_ps)(x, p09); + auto vHi = SIMD_MM(sub_ps)(xhi, SIMD_MM(mul_ps)(denHigh, SIMD_MM(mul_ps)(xhi, xhi))); + vHi = SIMD_MM(add_ps)(vHi, p09); - return _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_and_ps(maskNeg, vNeg), _mm_and_ps(maskLow, vLow)), - _mm_add_ps(_mm_and_ps(maskHigh, vHi), _mm_and_ps(maskPos, vPos))), - _mm_and_ps(maskMid, vMid)); + return SIMD_MM(add_ps)( + SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(and_ps)(maskNeg, vNeg), SIMD_MM(and_ps)(maskLow, vLow)), + SIMD_MM(add_ps)(SIMD_MM(and_ps)(maskHigh, vHi), SIMD_MM(and_ps)(maskPos, vPos))), + SIMD_MM(and_ps)(maskMid, vMid)); } -static inline __m128 doNLFilter(const __m128 input, const __m128 a1, const __m128 a2, - const __m128 b0, const __m128 b1, const __m128 b2, - const __m128 makeup, const int sat, __m128 &z1, __m128 &z2) noexcept +static inline SIMD_M128 doNLFilter(const SIMD_M128 input, const SIMD_M128 a1, const SIMD_M128 a2, + const SIMD_M128 b0, const SIMD_M128 b1, const SIMD_M128 b2, + const SIMD_M128 makeup, const int sat, SIMD_M128 &z1, + SIMD_M128 &z2) noexcept { // out = z1 + b0 * input - const __m128 out = A(z1, M(b0, input)); + const auto out = A(z1, M(b0, input)); // nonlinear feedback = saturator(out) - __m128 nf; + SIMD_M128 nf; switch (sat) { case SAT_SOFT: @@ -228,7 +233,7 @@ void makeCoefficients(FilterCoefficientMaker *cm, float freq, fl } template -inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) +inline SIMD_M128 process(QuadFilterUnitState *__restrict f, SIMD_M128 input) { // lower 2 bits of subtype is the stage count const int stages = subtype & 3; diff --git a/include/sst/filters/CytomicSVF.h b/include/sst/filters/CytomicSVF.h index d4c917d..f7cd829 100644 --- a/include/sst/filters/CytomicSVF.h +++ b/include/sst/filters/CytomicSVF.h @@ -59,13 +59,13 @@ namespace sst::filters { struct CytomicSVF { - __m128 ic1eq{_mm_setzero_ps()}, ic2eq{_mm_setzero_ps()}; - __m128 g, k, gk, a1, a2, a3, m0, m1, m2; + SIMD_M128 ic1eq{SIMD_MM(setzero_ps)()}, ic2eq{SIMD_MM(setzero_ps)()}; + SIMD_M128 g, k, gk, a1, a2, a3, m0, m1, m2; - __m128 oneSSE{_mm_set1_ps(1.0)}; - __m128 negoneSSE{_mm_set1_ps(-1.0)}; - __m128 twoSSE{_mm_set1_ps(2.0)}; - __m128 negtwoSSE{_mm_set1_ps(-2.0)}; + SIMD_M128 oneSSE{SIMD_MM(set1_ps)(1.0)}; + SIMD_M128 negoneSSE{SIMD_MM(set1_ps)(-1.0)}; + SIMD_M128 twoSSE{SIMD_MM(set1_ps)(2.0)}; + SIMD_M128 negtwoSSE{SIMD_MM(set1_ps)(-2.0)}; enum Mode { LP, @@ -94,13 +94,13 @@ struct CytomicSVF res = std::clamp(res, 0.f, 0.98f); bellShelfAmp = std::max(bellShelfAmp, 0.001f); - g = _mm_set1_ps(sst::basic_blocks::dsp::fasttan(M_PI * conorm)); - k = _mm_set1_ps(2.0 - 2 * res); + g = SIMD_MM(set1_ps)(sst::basic_blocks::dsp::fasttan(M_PI * conorm)); + k = SIMD_MM(set1_ps)(2.0 - 2 * res); if (mode == BELL) { - k = _mm_div_ps(k, _mm_set1_ps(bellShelfAmp)); + k = SIMD_MM(div_ps)(k, SIMD_MM(set1_ps)(bellShelfAmp)); } - setCoeffPostGK(mode, _mm_set1_ps(bellShelfAmp)); + setCoeffPostGK(mode, SIMD_MM(set1_ps)(bellShelfAmp)); } void setCoeff(Mode mode, float freqL, float freqR, float resL, float resR, float srInv, @@ -108,87 +108,88 @@ struct CytomicSVF { auto coL = M_PI * std::clamp(freqL * srInv, 0.f, 0.499f); // stable until nyquist auto coR = M_PI * std::clamp(freqR * srInv, 0.f, 0.499f); // stable until nyquist - g = sst::basic_blocks::dsp::fasttanhSSE(_mm_set_ps(0, 0, coR, coL)); - auto res = _mm_set_ps(0, 0, std::clamp(resR, 0.f, 0.98f), std::clamp(resL, 0.f, 0.98f)); + g = sst::basic_blocks::dsp::fasttanhSSE(SIMD_MM(set_ps)(0, 0, coR, coL)); + auto res = + SIMD_MM(set_ps)(0, 0, std::clamp(resR, 0.f, 0.98f), std::clamp(resL, 0.f, 0.98f)); auto bellShelfAmp = - _mm_set_ps(0, 0, std::max(bellShelfAmpL, 0.001f), std::max(bellShelfAmpR, 0.001f)); + SIMD_MM(set_ps)(0, 0, std::max(bellShelfAmpL, 0.001f), std::max(bellShelfAmpR, 0.001f)); - k = _mm_sub_ps(twoSSE, _mm_mul_ps(twoSSE, res)); + k = SIMD_MM(sub_ps)(twoSSE, SIMD_MM(mul_ps)(twoSSE, res)); if (mode == BELL) { - k = _mm_div_ps(k, bellShelfAmp); + k = SIMD_MM(div_ps)(k, bellShelfAmp); } setCoeffPostGK(mode, bellShelfAmp); } - void setCoeffPostGK(Mode mode, __m128 bellShelfSSE) + void setCoeffPostGK(Mode mode, SIMD_M128 bellShelfSSE) { - gk = _mm_add_ps(g, k); - a1 = _mm_div_ps(oneSSE, _mm_add_ps(oneSSE, _mm_mul_ps(g, gk))); - a2 = _mm_mul_ps(g, a1); - a3 = _mm_mul_ps(g, a2); + gk = SIMD_MM(add_ps)(g, k); + a1 = SIMD_MM(div_ps)(oneSSE, SIMD_MM(add_ps)(oneSSE, SIMD_MM(mul_ps)(g, gk))); + a2 = SIMD_MM(mul_ps)(g, a1); + a3 = SIMD_MM(mul_ps)(g, a2); switch (mode) { case LP: - m0 = _mm_setzero_ps(); - m1 = _mm_setzero_ps(); + m0 = SIMD_MM(setzero_ps)(); + m1 = SIMD_MM(setzero_ps)(); m2 = oneSSE; break; case BP: - m0 = _mm_setzero_ps(); + m0 = SIMD_MM(setzero_ps)(); m1 = oneSSE; - m2 = _mm_setzero_ps(); + m2 = SIMD_MM(setzero_ps)(); break; case HP: m0 = oneSSE; - m1 = _mm_sub_ps(_mm_setzero_ps(), k); + m1 = SIMD_MM(sub_ps)(SIMD_MM(setzero_ps)(), k); m2 = negoneSSE; break; case NOTCH: m0 = oneSSE; - m1 = _mm_sub_ps(_mm_setzero_ps(), k); - m2 = _mm_setzero_ps(); + m1 = SIMD_MM(sub_ps)(SIMD_MM(setzero_ps)(), k); + m2 = SIMD_MM(setzero_ps)(); break; case PEAK: m0 = oneSSE; - m1 = _mm_sub_ps(_mm_setzero_ps(), k); + m1 = SIMD_MM(sub_ps)(SIMD_MM(setzero_ps)(), k); m2 = negtwoSSE; break; case ALL: m0 = oneSSE; - m1 = _mm_mul_ps(negtwoSSE, k); - m2 = _mm_setzero_ps(); + m1 = SIMD_MM(mul_ps)(negtwoSSE, k); + m2 = SIMD_MM(setzero_ps)(); break; case BELL: { auto A = bellShelfSSE; m0 = oneSSE; - m1 = _mm_mul_ps(k, _mm_sub_ps(_mm_mul_ps(A, A), oneSSE)); - m2 = _mm_setzero_ps(); + m1 = SIMD_MM(mul_ps)(k, SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(A, A), oneSSE)); + m2 = SIMD_MM(setzero_ps)(); } break; case LOW_SHELF: { auto A = bellShelfSSE; m0 = oneSSE; - m1 = _mm_mul_ps(k, _mm_sub_ps(A, oneSSE)); - m2 = _mm_sub_ps(_mm_mul_ps(A, A), oneSSE); + m1 = SIMD_MM(mul_ps)(k, SIMD_MM(sub_ps)(A, oneSSE)); + m2 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(A, A), oneSSE); } break; case HIGH_SHELF: { auto A = bellShelfSSE; - m0 = _mm_mul_ps(A, A); - m1 = _mm_mul_ps(_mm_mul_ps(k, _mm_sub_ps(oneSSE, A)), A); - m2 = _mm_sub_ps(oneSSE, _mm_mul_ps(A, A)); + m0 = SIMD_MM(mul_ps)(A, A); + m1 = SIMD_MM(mul_ps)(SIMD_MM(mul_ps)(k, SIMD_MM(sub_ps)(oneSSE, A)), A); + m2 = SIMD_MM(sub_ps)(oneSSE, SIMD_MM(mul_ps)(A, A)); } break; default: - m0 = _mm_setzero_ps(); - m1 = _mm_setzero_ps(); - m2 = _mm_setzero_ps(); + m0 = SIMD_MM(setzero_ps)(); + m1 = SIMD_MM(setzero_ps)(); + m2 = SIMD_MM(setzero_ps)(); break; } } @@ -208,41 +209,43 @@ struct CytomicSVF static void step(CytomicSVF &that, float &L, float &R) { - auto vin = _mm_set_ps(0, 0, R, L); + auto vin = SIMD_MM(set_ps)(0, 0, R, L); auto res = stepSSE(that, vin); float r4 alignas(16)[4]; - _mm_store_ps(r4, res); + SIMD_MM(store_ps)(r4, res); L = r4[0]; R = r4[1]; } - static __m128 stepSSE(CytomicSVF &that, __m128 vin) + static SIMD_M128 stepSSE(CytomicSVF &that, SIMD_M128 vin) { // v3 = v0 - ic2eq - auto v3 = _mm_sub_ps(vin, that.ic2eq); + auto v3 = SIMD_MM(sub_ps)(vin, that.ic2eq); // v1 = a1 * ic1eq + a2 * v3 - auto v1 = _mm_add_ps(_mm_mul_ps(that.a1, that.ic1eq), _mm_mul_ps(that.a2, v3)); + auto v1 = + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(that.a1, that.ic1eq), SIMD_MM(mul_ps)(that.a2, v3)); // v2 = ic2eq + a2 * ic1eq + a3 * v3 - auto v2 = _mm_add_ps(that.ic2eq, - _mm_add_ps(_mm_mul_ps(that.a2, that.ic1eq), _mm_mul_ps(that.a3, v3))); + auto v2 = SIMD_MM(add_ps)(that.ic2eq, SIMD_MM(add_ps)(SIMD_MM(mul_ps)(that.a2, that.ic1eq), + SIMD_MM(mul_ps)(that.a3, v3))); // ic1eq = 2 * v1 - ic1eq - that.ic1eq = _mm_sub_ps(_mm_mul_ps(that.twoSSE, v1), that.ic1eq); + that.ic1eq = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(that.twoSSE, v1), that.ic1eq); // ic2eq = 2 * v2 - ic2eq - that.ic2eq = _mm_sub_ps(_mm_mul_ps(that.twoSSE, v2), that.ic2eq); + that.ic2eq = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(that.twoSSE, v2), that.ic2eq); - return _mm_add_ps(_mm_mul_ps(that.m0, vin), - _mm_add_ps(_mm_mul_ps(that.m1, v1), _mm_mul_ps(that.m2, v2))); + return SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(that.m0, vin), + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(that.m1, v1), SIMD_MM(mul_ps)(that.m2, v2))); } /* * Process across a block with smoothing */ - __m128 a1_prior, a2_prior, a3_prior; - __m128 da1, da2, da3; + SIMD_M128 a1_prior, a2_prior, a3_prior; + SIMD_M128 da1, da2, da3; bool firstBlock{true}; template @@ -267,16 +270,16 @@ struct CytomicSVF // then for each one calculate the change across the block static constexpr float obsf = 1.f / blockSize; - auto obs = _mm_set1_ps(obsf); + auto obs = SIMD_MM(set1_ps)(obsf); // and set the changeup, and reset a1 to the prior value so we move in the block - da1 = _mm_mul_ps(_mm_sub_ps(a1, a1_prior), obs); + da1 = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(a1, a1_prior), obs); a1 = a1_prior; - da2 = _mm_mul_ps(_mm_sub_ps(a2, a2_prior), obs); + da2 = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(a2, a2_prior), obs); a2 = a2_prior; - da3 = _mm_mul_ps(_mm_sub_ps(a3, a3_prior), obs); + da3 = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(a3, a3_prior), obs); a3 = a3_prior; } @@ -304,41 +307,41 @@ struct CytomicSVF // then for each one calculate the change across the block static constexpr float obsf = 1.f / blockSize; - auto obs = _mm_set1_ps(obsf); + auto obs = SIMD_MM(set1_ps)(obsf); // and set the changeup, and reset a1 to the prior value so we move in the block - da1 = _mm_mul_ps(_mm_sub_ps(a1, a1_prior), obs); + da1 = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(a1, a1_prior), obs); a1 = a1_prior; - da2 = _mm_mul_ps(_mm_sub_ps(a2, a2_prior), obs); + da2 = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(a2, a2_prior), obs); a2 = a2_prior; - da3 = _mm_mul_ps(_mm_sub_ps(a3, a3_prior), obs); + da3 = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(a3, a3_prior), obs); a3 = a3_prior; } template void retainCoeffForBlock() { - da1 = _mm_setzero_ps(); - da2 = _mm_setzero_ps(); - da3 = _mm_setzero_ps(); + da1 = SIMD_MM(setzero_ps)(); + da2 = SIMD_MM(setzero_ps)(); + da3 = SIMD_MM(setzero_ps)(); } void processBlockStep(float &L, float &R) { step(*this, L, R); - a1 = _mm_add_ps(a1, da1); - a2 = _mm_add_ps(a2, da2); - a3 = _mm_add_ps(a3, da3); + a1 = SIMD_MM(add_ps)(a1, da1); + a2 = SIMD_MM(add_ps)(a2, da2); + a3 = SIMD_MM(add_ps)(a3, da3); } void processBlockStep(float &L) { float tmp{0.f}; step(*this, L, tmp); - a1 = _mm_add_ps(a1, da1); - a2 = _mm_add_ps(a2, da2); - a3 = _mm_add_ps(a3, da3); + a1 = SIMD_MM(add_ps)(a1, da1); + a2 = SIMD_MM(add_ps)(a2, da2); + a3 = SIMD_MM(add_ps)(a3, da3); } template @@ -363,8 +366,8 @@ struct CytomicSVF void init() { - ic1eq = _mm_setzero_ps(); - ic2eq = _mm_setzero_ps(); + ic1eq = SIMD_MM(setzero_ps)(); + ic2eq = SIMD_MM(setzero_ps)(); } }; } // namespace sst::filters diff --git a/include/sst/filters/DiodeLadder.h b/include/sst/filters/DiodeLadder.h index 0f3ee24..cc39216 100644 --- a/include/sst/filters/DiodeLadder.h +++ b/include/sst/filters/DiodeLadder.h @@ -34,31 +34,31 @@ static float clampedFrequency(float pitch, float sampleRate, TuningProvider *pro return freq; } -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define D(a, b) _mm_div_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define D(a, b) SIMD_MM(div_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) // reciprocal -#define reci(a) _mm_rcp_ps(a) +#define reci(a) SIMD_MM(rcp_ps)(a) -static inline __m128 getFO(const __m128 beta, const __m128 delta, const __m128 feedback, - const __m128 z) noexcept +static inline SIMD_M128 getFO(const SIMD_M128 beta, const SIMD_M128 delta, const SIMD_M128 feedback, + const SIMD_M128 z) noexcept { // (feedback * delta + z) * beta return M(A(M(feedback, delta), z), beta); } // @TODO: it looks like the `beta` and `delta` arguments are not being used? -static inline __m128 doLpf(const __m128 input, const __m128 alpha, const __m128 beta, - const __m128 gamma, const __m128 delta, const __m128 epsilon, - const __m128 ma0, const __m128 feedback, const __m128 feedback_output, - __m128 &z) noexcept +static inline SIMD_M128 doLpf(const SIMD_M128 input, const SIMD_M128 alpha, const SIMD_M128 beta, + const SIMD_M128 gamma, const SIMD_M128 delta, const SIMD_M128 epsilon, + const SIMD_M128 ma0, const SIMD_M128 feedback, + const SIMD_M128 feedback_output, SIMD_M128 &z) noexcept { // input * gamma + feedback + epsilon * feedback_output - const __m128 i = A(A(M(input, gamma), feedback), M(epsilon, feedback_output)); - const __m128 v = M(S(M(ma0, i), z), alpha); - const __m128 result = A(v, z); + const auto i = A(A(M(input, gamma), feedback), M(epsilon, feedback_output)); + const auto v = M(S(M(ma0, i), z), alpha); + const auto result = A(v, z); z = A(v, result); return result; } @@ -122,7 +122,7 @@ void makeCoefficients(FilterCoefficientMaker *cm, float freq, fl } template -inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) +inline SIMD_M128 process(QuadFilterUnitState *__restrict f, SIMD_M128 input) { for (int i = 0; i < n_cm_coeffs; ++i) { @@ -131,72 +131,70 @@ inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) // hopefully the optimiser will take care of the duplicatey bits - const __m128 zero = F(0.0f); - const __m128 one = F(1.0f); - const __m128 half = F(0.5f); + const auto zero = F(0.0f); + const auto one = F(1.0f); + const auto half = F(0.5f); - const __m128 sg3 = f->C[dlf_G4]; - const __m128 sg2 = M(sg3, f->C[dlf_G3]); - const __m128 sg1 = M(sg2, f->C[dlf_G2]); + const auto sg3 = f->C[dlf_G4]; + const auto sg2 = M(sg3, f->C[dlf_G3]); + const auto sg1 = M(sg2, f->C[dlf_G2]); // sg4 is 1.0, just inline it - const __m128 g = f->C[dlf_g]; + const auto g = f->C[dlf_g]; // g plus one, common so do it only once - const __m128 gp1 = A(g, one); + const auto gp1 = A(g, one); // half of g - const __m128 hg = M(f->C[dlf_g], half); + const auto hg = M(f->C[dlf_g], half); // 1.0 / (gp1 - g * G2) - const __m128 beta1 = reci(S(gp1, M(g, f->C[dlf_G2]))); + const auto beta1 = reci(S(gp1, M(g, f->C[dlf_G2]))); // 1.0 / (gp1 - g * 0.5 * G3 - const __m128 beta2 = reci(S(gp1, M(hg, f->C[dlf_G3]))); + const auto beta2 = reci(S(gp1, M(hg, f->C[dlf_G3]))); // 1.0 / (gp1 - g * 0.5 * G4 - const __m128 beta3 = reci(S(gp1, M(hg, f->C[dlf_G4]))); + const auto beta3 = reci(S(gp1, M(hg, f->C[dlf_G4]))); // 1.0 / gp1 - const __m128 beta4 = reci(gp1); + const auto beta4 = reci(gp1); // nothing to compute for deltas, inline them // G1 * G2 + 1.0 - const __m128 gamma1 = A(M(f->C[dlf_G1], f->C[dlf_G2]), one); + const auto gamma1 = A(M(f->C[dlf_G1], f->C[dlf_G2]), one); // G2 * G3 + 1.0 - const __m128 gamma2 = A(M(f->C[dlf_G2], f->C[dlf_G3]), one); + const auto gamma2 = A(M(f->C[dlf_G2], f->C[dlf_G3]), one); // G3 * G4 + 1.0 - const __m128 gamma3 = A(M(f->C[dlf_G3], f->C[dlf_G4]), one); + const auto gamma3 = A(M(f->C[dlf_G3], f->C[dlf_G4]), one); // gamma4 is always 1.0, just inline it // nothing to compute for epsilons or ma0, inline them // feedback4 is always zero, inline it - const __m128 feedback3 = getFO(beta4, zero, zero, f->R[dlf_z4]); - const __m128 feedback2 = getFO(beta3, hg, f->R[dlf_feedback3], f->R[dlf_z3]); - const __m128 feedback1 = getFO(beta2, hg, f->R[dlf_feedback2], f->R[dlf_z2]); + const auto feedback3 = getFO(beta4, zero, zero, f->R[dlf_z4]); + const auto feedback2 = getFO(beta3, hg, f->R[dlf_feedback3], f->R[dlf_z3]); + const auto feedback1 = getFO(beta2, hg, f->R[dlf_feedback2], f->R[dlf_z2]); - const __m128 sigma = A(A(A(M(sg1, getFO(beta1, g, feedback1, f->R[dlf_z1])), - M(sg2, getFO(beta2, hg, feedback2, f->R[dlf_z2]))), - M(sg3, getFO(beta3, hg, feedback3, f->R[dlf_z3]))), - M(one, getFO(beta4, zero, zero, f->R[dlf_z4]))); + const auto sigma = A(A(A(M(sg1, getFO(beta1, g, feedback1, f->R[dlf_z1])), + M(sg2, getFO(beta2, hg, feedback2, f->R[dlf_z2]))), + M(sg3, getFO(beta3, hg, feedback3, f->R[dlf_z3]))), + M(one, getFO(beta4, zero, zero, f->R[dlf_z4]))); f->R[dlf_feedback3] = feedback3; f->R[dlf_feedback2] = feedback2; f->R[dlf_feedback1] = feedback1; // gain compensation - const __m128 comp = M(A(M(F(0.3f), f->C[dlf_km]), one), input); + const auto comp = M(A(M(F(0.3f), f->C[dlf_km]), one), input); // (comp - km * sigma) / (km * gamma + 1.0) - const __m128 u = D(S(comp, M(f->C[dlf_km], sigma)), A(M(f->C[dlf_km], f->C[dlf_gamma]), one)); - - const __m128 result1 = doLpf(u, f->C[dlf_alpha], beta1, gamma1, g, f->C[dlf_G2], one, feedback1, - getFO(beta1, g, feedback1, f->R[dlf_z1]), f->R[dlf_z1]); - const __m128 result2 = - doLpf(result1, f->C[dlf_alpha], beta2, gamma2, hg, f->C[dlf_G3], half, feedback2, - getFO(beta2, hg, feedback2, f->R[dlf_z2]), f->R[dlf_z2]); - const __m128 result3 = - doLpf(result2, f->C[dlf_alpha], beta3, gamma3, hg, f->C[dlf_G4], half, feedback3, - getFO(beta3, hg, feedback3, f->R[dlf_z3]), f->R[dlf_z3]); - const __m128 result4 = doLpf(result3, f->C[dlf_alpha], beta4, one, zero, zero, half, zero, - getFO(beta4, zero, zero, f->R[dlf_z4]), f->R[dlf_z4]); + const auto u = D(S(comp, M(f->C[dlf_km], sigma)), A(M(f->C[dlf_km], f->C[dlf_gamma]), one)); + + const auto result1 = doLpf(u, f->C[dlf_alpha], beta1, gamma1, g, f->C[dlf_G2], one, feedback1, + getFO(beta1, g, feedback1, f->R[dlf_z1]), f->R[dlf_z1]); + const auto result2 = doLpf(result1, f->C[dlf_alpha], beta2, gamma2, hg, f->C[dlf_G3], half, + feedback2, getFO(beta2, hg, feedback2, f->R[dlf_z2]), f->R[dlf_z2]); + const auto result3 = doLpf(result2, f->C[dlf_alpha], beta3, gamma3, hg, f->C[dlf_G4], half, + feedback3, getFO(beta3, hg, feedback3, f->R[dlf_z3]), f->R[dlf_z3]); + const auto result4 = doLpf(result3, f->C[dlf_alpha], beta4, one, zero, zero, half, zero, + getFO(beta4, zero, zero, f->R[dlf_z4]), f->R[dlf_z4]); // Just like in QuadFilterUnit.cpp/LPMOOGquad, it's fine for the whole quad to return the same // subtype because integer parameters like f->WP are not modulatable and QuadFilterUnit is only diff --git a/include/sst/filters/FilterCoefficientMaker_Impl.h b/include/sst/filters/FilterCoefficientMaker_Impl.h index 11dd4a8..0fa7e06 100644 --- a/include/sst/filters/FilterCoefficientMaker_Impl.h +++ b/include/sst/filters/FilterCoefficientMaker_Impl.h @@ -45,9 +45,9 @@ void FilterCoefficientMaker::setSampleRateAndBlockSize(float new namespace detail { -inline void set1f(__m128 &m, int i, float f) { *((float *)&m + i) = f; } +inline void set1f(SIMD_M128 &m, int i, float f) { *((float *)&m + i) = f; } -inline float get1f(__m128 m, int i) { return *((float *)&m + i); } +inline float get1f(SIMD_M128 m, int i) { return *((float *)&m + i); } } // namespace detail template @@ -58,8 +58,8 @@ void FilterCoefficientMaker::updateState(StateType &state, int c { for (int i = 0; i < n_cm_coeffs; ++i) { - state.C[i] = _mm_set1_ps(C[i]); - state.dC[i] = _mm_set1_ps(dC[i]); + state.C[i] = SIMD_MM(set1_ps)(C[i]); + state.dC[i] = SIMD_MM(set1_ps)(dC[i]); } } else diff --git a/include/sst/filters/HalfRateFilter.h b/include/sst/filters/HalfRateFilter.h index 8a2fda5..2518659 100644 --- a/include/sst/filters/HalfRateFilter.h +++ b/include/sst/filters/HalfRateFilter.h @@ -16,6 +16,7 @@ #define INCLUDE_SST_FILTERS_HALFRATEFILTER_H #include +#include #include "sst/utilities/globals.h" namespace sst::filters::HalfRate @@ -28,16 +29,16 @@ class alignas(16) HalfRateFilter { private: // Remember leave these first so they stay aligned - __m128 va[halfrate_max_M]; - __m128 vx0[halfrate_max_M]; - __m128 vx1[halfrate_max_M]; - __m128 vx2[halfrate_max_M]; - __m128 vy0[halfrate_max_M]; - __m128 vy1[halfrate_max_M]; - __m128 vy2[halfrate_max_M]; - __m128 oldout; + SIMD_M128 va[halfrate_max_M]; + SIMD_M128 vx0[halfrate_max_M]; + SIMD_M128 vx1[halfrate_max_M]; + SIMD_M128 vx2[halfrate_max_M]; + SIMD_M128 vy0[halfrate_max_M]; + SIMD_M128 vy1[halfrate_max_M]; + SIMD_M128 vy2[halfrate_max_M]; + SIMD_M128 oldout; - const __m128 half = _mm_set_ps1(0.5f); + const SIMD_M128 half = SIMD_MM(set_ps1)(0.5f); public: /** @@ -58,30 +59,30 @@ class alignas(16) HalfRateFilter void process_block(float *floatL, float *floatR, int nsamples) { - __m128 *__restrict L = (__m128 *)floatL; - __m128 *__restrict R = (__m128 *)floatR; - __m128 o[hr_BLOCK_SIZE]; + SIMD_M128 *__restrict L = (SIMD_M128 *)floatL; + SIMD_M128 *__restrict R = (SIMD_M128 *)floatR; + SIMD_M128 o[hr_BLOCK_SIZE]; auto N = nsamples; // fill the buffer with interleaved stereo samples for (int k = 0; k < N; k += 4) { //[o3,o2,o1,o0] = [L0,L0,R0,R0] - o[k] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(0, 0, 0, 0)); - o[k + 1] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(1, 1, 1, 1)); - o[k + 2] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(2, 2, 2, 2)); - o[k + 3] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(3, 3, 3, 3)); + o[k] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(0, 0, 0, 0)); + o[k + 1] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + o[k + 2] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(2, 2, 2, 2)); + o[k + 3] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(3, 3, 3, 3)); } // process filters for (auto j = 0U; j < M; j++) { - __m128 tx0 = vx0[j]; - __m128 tx1 = vx1[j]; - __m128 tx2 = vx2[j]; - __m128 ty0 = vy0[j]; - __m128 ty1 = vy1[j]; - __m128 ty2 = vy2[j]; - __m128 ta = va[j]; + auto tx0 = vx0[j]; + auto tx1 = vx1[j]; + auto tx2 = vx2[j]; + auto ty0 = vy0[j]; + auto ty1 = vy1[j]; + auto ty2 = vy2[j]; + auto ta = va[j]; for (int k = 0; k < N; k += 2) { @@ -93,7 +94,7 @@ class alignas(16) HalfRateFilter ty2 = ty1; ty1 = ty0; // allpass filter 1 - ty0 = _mm_add_ps(tx2, _mm_mul_ps(_mm_sub_ps(tx0, ty2), ta)); + ty0 = SIMD_MM(add_ps)(tx2, SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(tx0, ty2), ta)); o[k] = ty0; // shuffle inputs @@ -104,7 +105,7 @@ class alignas(16) HalfRateFilter ty2 = ty1; ty1 = ty0; // allpass filter 1 - ty0 = _mm_add_ps(tx2, _mm_mul_ps(_mm_sub_ps(tx0, ty2), ta)); + ty0 = SIMD_MM(add_ps)(tx2, SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(tx0, ty2), ta)); o[k + 1] = ty0; } vx0[j] = tx0; @@ -135,40 +136,40 @@ class alignas(16) HalfRateFilter float *fL = (float *)L; float *fR = (float *)R; - __m128 faR = _mm_setzero_ps(); - __m128 fbR = _mm_setzero_ps(); + auto faR = SIMD_MM(setzero_ps)(); + auto fbR = SIMD_MM(setzero_ps)(); for (int k = 0; k < N; k++) { // const double output=(filter_a.process(input)+oldout)*0.5; // oldout=filter_b.process(input); - __m128 vL = _mm_add_ss(o[k], oldout); - vL = _mm_mul_ss(vL, half); - _mm_store_ss(&fL[k], vL); + auto vL = SIMD_MM(add_ss)(o[k], oldout); + vL = SIMD_MM(mul_ss)(vL, half); + SIMD_MM(store_ss)(&fL[k], vL); - faR = _mm_movehl_ps(faR, o[k]); - fbR = _mm_movehl_ps(fbR, oldout); + faR = SIMD_MM(movehl_ps)(faR, o[k]); + fbR = SIMD_MM(movehl_ps)(fbR, oldout); - __m128 vR = _mm_add_ss(faR, fbR); - vR = _mm_mul_ss(vR, half); - _mm_store_ss(&fR[k], vR); + auto vR = SIMD_MM(add_ss)(faR, fbR); + vR = SIMD_MM(mul_ss)(vR, half); + SIMD_MM(store_ss)(&fR[k], vR); - oldout = _mm_shuffle_ps(o[k], o[k], _MM_SHUFFLE(3, 3, 1, 1)); + oldout = SIMD_MM(shuffle_ps)(o[k], o[k], SIMD_MM_SHUFFLE(3, 3, 1, 1)); } } void process_block_D2(float *floatL, float *floatR, int nsamples, float *outL = 0, float *outR = 0) // process in-place. the new block will be half the size { - __m128 *L = (__m128 *)floatL; - __m128 *R = (__m128 *)floatR; - __m128 o[hr_BLOCK_SIZE]; + auto *L = (SIMD_M128 *)floatL; + auto *R = (SIMD_M128 *)floatR; + SIMD_M128 o[hr_BLOCK_SIZE]; /* * fill the buffer with interleaved stereo samples by rotating the * input simd-in-time a bit * - * _mm_shuffle_ps(a,b,_MM_SHUFFLE(i,j,k,l)) returns a[i], a[j], b[k], b[l] + * SIMD_MM(shuffle_ps)(a,b,SIMD_MM_SHUFFLE(i,j,k,l)) returns a[i], a[j], b[k], b[l] * * So this loop makes o look like the rotation of L and R. That is * @@ -178,10 +179,10 @@ class alignas(16) HalfRateFilter */ for (int k = 0; k < nsamples; k += 4) { - o[k] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(0, 0, 0, 0)); - o[k + 1] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(1, 1, 1, 1)); - o[k + 2] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(2, 2, 2, 2)); - o[k + 3] = _mm_shuffle_ps(L[k >> 2], R[k >> 2], _MM_SHUFFLE(3, 3, 3, 3)); + o[k] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(0, 0, 0, 0)); + o[k + 1] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + o[k + 2] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(2, 2, 2, 2)); + o[k + 3] = SIMD_MM(shuffle_ps)(L[k >> 2], R[k >> 2], SIMD_MM_SHUFFLE(3, 3, 3, 3)); } /* @@ -210,13 +211,13 @@ class alignas(16) HalfRateFilter */ for (auto j = 0U; j < M; j++) { - __m128 tx0 = vx0[j]; - __m128 tx1 = vx1[j]; - __m128 tx2 = vx2[j]; - __m128 ty0 = vy0[j]; - __m128 ty1 = vy1[j]; - __m128 ty2 = vy2[j]; - __m128 ta = va[j]; + auto tx0 = vx0[j]; + auto tx1 = vx1[j]; + auto tx2 = vx2[j]; + auto ty0 = vy0[j]; + auto ty1 = vy1[j]; + auto ty2 = vy2[j]; + auto ta = va[j]; // Why is this loop hand-unrolled? for (int k = 0; k < nsamples; k += 2) @@ -229,7 +230,7 @@ class alignas(16) HalfRateFilter ty2 = ty1; ty1 = ty0; // allpass filter 1 - ty0 = _mm_add_ps(tx2, _mm_mul_ps(_mm_sub_ps(tx0, ty2), ta)); + ty0 = SIMD_MM(add_ps)(tx2, SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(tx0, ty2), ta)); o[k] = ty0; // shuffle inputs @@ -240,7 +241,7 @@ class alignas(16) HalfRateFilter ty2 = ty1; ty1 = ty0; // allpass filter 1 - ty0 = _mm_add_ps(tx2, _mm_mul_ps(_mm_sub_ps(tx0, ty2), ta)); + ty0 = SIMD_MM(add_ps)(tx2, SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(tx0, ty2), ta)); o[k + 1] = ty0; } vx0[j] = tx0; @@ -251,20 +252,20 @@ class alignas(16) HalfRateFilter vy2[j] = ty2; } - __m128 aR = _mm_setzero_ps(); - __m128 bR = _mm_setzero_ps(); - __m128 cR = _mm_setzero_ps(); - __m128 dR = _mm_setzero_ps(); + auto aR = SIMD_MM(setzero_ps)(); + auto bR = SIMD_MM(setzero_ps)(); + auto cR = SIMD_MM(setzero_ps)(); + auto dR = SIMD_MM(setzero_ps)(); if (outL) - L = (__m128 *)outL; + L = (SIMD_M128 *)outL; if (outR) - R = (__m128 *)outR; + R = (SIMD_M128 *)outR; /* * OK so now we have all the filtered signals we want to reconstruct the output. * This is basically the sample selection stage. To read this code you need - * to remember that _mm_movehl_ps(a,b) results in b[3], b[4], a[3], a[4] as the + * to remember that SIMD_MM(movehl_ps)(a,b) results in b[3], b[4], a[3], a[4] as the * simd output. * * The code had this comment @@ -274,29 +275,29 @@ class alignas(16) HalfRateFilter * * atop this code * - * __m128 tL0 = _mm_shuffle_ps(o[k], o[k], _MM_SHUFFLE(1, 1, 1, 1)); - * __m128 tR0 = _mm_shuffle_ps(o[k], o[k], _MM_SHUFFLE(3, 3, 3, 3)); - * __m128 aL = _mm_add_ss(tL0, o[k + 1]); - * aR = _mm_movehl_ps(aR, o[k + 1]); - * aR = _mm_add_ss(aR, tR0); + * auto tL0 = SIMD_MM(shuffle_ps)(o[k], o[k], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + * auto tR0 = SIMD_MM(shuffle_ps)(o[k], o[k], SIMD_MM_SHUFFLE(3, 3, 3, 3)); + * auto aL = SIMD_MM(add_ss)(tL0, o[k + 1]); + * aR = SIMD_MM(movehl_ps)(aR, o[k + 1]); + * aR = SIMD_MM(add_ss)(aR, tR0); * * So can we make that tie out? Remembering o now has the for B_L A_L, B_R, A_R * - * So tL0 = _mm_shuffle_ps(o[k], o[k], 11111) + * So tL0 = SIMD_MM(shuffle_ps)(o[k], o[k], 11111) * or tL0 = o[k][1] in every slot or tl0 is A_L across the board at sample k. * Similarly tR0 = A_R across the board at sample K. * - * Now recall _mm_add_ss(a,b) gives you (a[0]+b[0], a[1], a[2], a[3]) so now we do + * Now recall SIMD_MM(add_ss)(a,b) gives you (a[0]+b[0], a[1], a[2], a[3]) so now we do * - * __m128 aL = _mm_add_ss(tL0, o[k + 1]); + * auto aL = SIMD_MM(add_ss)(tL0, o[k + 1]); * aL = (A_L[k] + B_L[k+1], A_L[k], A_L[k], A_L[k]); * * Now * - * aR = _mm_movehl_ps(aR, o[k + 1]); + * aR = SIMD_MM(movehl_ps)(aR, o[k + 1]); * aR = B_R[k+1], A_R[k+1], aR[3], aR[4] * - * aR = _mm_add_ss(aR, tR0) or + * aR = SIMD_MM(add_ss)(aR, tR0) or * aR = (A_R[k] + B_R[k+1], A_R[k+1], aR[3], aR[4]) * * (At this point I'm suspecting that the rest of the SIMD registeres in A wont matter) @@ -306,14 +307,14 @@ class alignas(16) HalfRateFilter * * So once those stages are assembled we do this * - * aL = _mm_movelh_ps(aL, bL); - * cL = _mm_movelh_ps(cL, dL); - * L[k >> 3] = _mm_shuffle_ps(aL, cL, _MM_SHUFFLE(2, 0, 2, 0)); + * aL = SIMD_MM(movelh_ps)(aL, bL); + * cL = SIMD_MM(movelh_ps)(cL, dL); + * L[k >> 3] = SIMD_MM(shuffle_ps)(aL, cL, SIMD_MM_SHUFFLE(2, 0, 2, 0)); * * And similarly for R. So what's that doing. So first of all _mm_novelh_ps [note lh * not hl] has signatlre * - * _mm_movelh_ps(a,b) = a[0],a[1],b[0],b[1] + * SIMD_MM(movelh_ps)(a,b) = a[0],a[1],b[0],b[1] * * so this sets * @@ -342,78 +343,78 @@ class alignas(16) HalfRateFilter /* const double output=(filter_a.process(input)+oldout)*0.5; oldout=filter_b.process(input);*/ - __m128 tL0 = _mm_shuffle_ps(o[k], o[k], _MM_SHUFFLE(1, 1, 1, 1)); - __m128 tR0 = _mm_shuffle_ps(o[k], o[k], _MM_SHUFFLE(3, 3, 3, 3)); - __m128 aL = _mm_add_ss(tL0, o[k + 1]); - aR = _mm_movehl_ps(aR, o[k + 1]); - aR = _mm_add_ss(aR, tR0); - - tL0 = _mm_shuffle_ps(o[k + 2], o[k + 2], _MM_SHUFFLE(1, 1, 1, 1)); - tR0 = _mm_shuffle_ps(o[k + 2], o[k + 2], _MM_SHUFFLE(3, 3, 3, 3)); - __m128 bL = _mm_add_ss(tL0, o[k + 3]); - bR = _mm_movehl_ps(aR, o[k + 3]); - bR = _mm_add_ss(bR, tR0); - - tL0 = _mm_shuffle_ps(o[k + 4], o[k + 4], _MM_SHUFFLE(1, 1, 1, 1)); - tR0 = _mm_shuffle_ps(o[k + 4], o[k + 4], _MM_SHUFFLE(3, 3, 3, 3)); - __m128 cL = _mm_add_ss(tL0, o[k + 5]); - cR = _mm_movehl_ps(cR, o[k + 5]); - cR = _mm_add_ss(cR, tR0); - - tL0 = _mm_shuffle_ps(o[k + 6], o[k + 6], _MM_SHUFFLE(1, 1, 1, 1)); - tR0 = _mm_shuffle_ps(o[k + 6], o[k + 6], _MM_SHUFFLE(3, 3, 3, 3)); - __m128 dL = _mm_add_ss(tL0, o[k + 7]); - dR = _mm_movehl_ps(dR, o[k + 7]); - dR = _mm_add_ss(dR, tR0); - - aL = _mm_movelh_ps(aL, bL); - cL = _mm_movelh_ps(cL, dL); - aR = _mm_movelh_ps(aR, bR); - cR = _mm_movelh_ps(cR, dR); - - L[k >> 3] = _mm_shuffle_ps(aL, cL, _MM_SHUFFLE(2, 0, 2, 0)); - R[k >> 3] = _mm_shuffle_ps(aR, cR, _MM_SHUFFLE(2, 0, 2, 0)); + auto tL0 = SIMD_MM(shuffle_ps)(o[k], o[k], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + auto tR0 = SIMD_MM(shuffle_ps)(o[k], o[k], SIMD_MM_SHUFFLE(3, 3, 3, 3)); + auto aL = SIMD_MM(add_ss)(tL0, o[k + 1]); + aR = SIMD_MM(movehl_ps)(aR, o[k + 1]); + aR = SIMD_MM(add_ss)(aR, tR0); + + tL0 = SIMD_MM(shuffle_ps)(o[k + 2], o[k + 2], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + tR0 = SIMD_MM(shuffle_ps)(o[k + 2], o[k + 2], SIMD_MM_SHUFFLE(3, 3, 3, 3)); + auto bL = SIMD_MM(add_ss)(tL0, o[k + 3]); + bR = SIMD_MM(movehl_ps)(aR, o[k + 3]); + bR = SIMD_MM(add_ss)(bR, tR0); + + tL0 = SIMD_MM(shuffle_ps)(o[k + 4], o[k + 4], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + tR0 = SIMD_MM(shuffle_ps)(o[k + 4], o[k + 4], SIMD_MM_SHUFFLE(3, 3, 3, 3)); + auto cL = SIMD_MM(add_ss)(tL0, o[k + 5]); + cR = SIMD_MM(movehl_ps)(cR, o[k + 5]); + cR = SIMD_MM(add_ss)(cR, tR0); + + tL0 = SIMD_MM(shuffle_ps)(o[k + 6], o[k + 6], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + tR0 = SIMD_MM(shuffle_ps)(o[k + 6], o[k + 6], SIMD_MM_SHUFFLE(3, 3, 3, 3)); + auto dL = SIMD_MM(add_ss)(tL0, o[k + 7]); + dR = SIMD_MM(movehl_ps)(dR, o[k + 7]); + dR = SIMD_MM(add_ss)(dR, tR0); + + aL = SIMD_MM(movelh_ps)(aL, bL); + cL = SIMD_MM(movelh_ps)(cL, dL); + aR = SIMD_MM(movelh_ps)(aR, bR); + cR = SIMD_MM(movelh_ps)(cR, dR); + + L[k >> 3] = SIMD_MM(shuffle_ps)(aL, cL, SIMD_MM_SHUFFLE(2, 0, 2, 0)); + R[k >> 3] = SIMD_MM(shuffle_ps)(aR, cR, SIMD_MM_SHUFFLE(2, 0, 2, 0)); // optional: *=0.5; - const __m128 half = _mm_set_ps1(0.5f); - L[k >> 3] = _mm_mul_ps(L[k >> 3], half); - R[k >> 3] = _mm_mul_ps(R[k >> 3], half); + const auto half = SIMD_MM(set_ps1)(0.5f); + L[k >> 3] = SIMD_MM(mul_ps)(L[k >> 3], half); + R[k >> 3] = SIMD_MM(mul_ps)(R[k >> 3], half); } } void process_block_U2(float *floatL_in, float *floatR_in, float *floatL, float *floatR, int nsamples) { - __m128 *L = (__m128 *)floatL; - __m128 *R = (__m128 *)floatR; - __m128 *L_in = (__m128 *)floatL_in; - __m128 *R_in = (__m128 *)floatR_in; + auto *L = (SIMD_M128 *)floatL; + auto *R = (SIMD_M128 *)floatR; + auto *L_in = (SIMD_M128 *)floatL_in; + auto *R_in = (SIMD_M128 *)floatR_in; - __m128 o[hr_BLOCK_SIZE]; + SIMD_M128 o[hr_BLOCK_SIZE]; // fill the buffer with interleaved stereo samples for (int k = 0; k < nsamples; k += 8) { //[o3,o2,o1,o0] = [L0,L0,R0,R0] - o[k] = _mm_shuffle_ps(L_in[k >> 3], R_in[k >> 3], _MM_SHUFFLE(0, 0, 0, 0)); - o[k + 1] = _mm_setzero_ps(); - o[k + 2] = _mm_shuffle_ps(L_in[k >> 3], R_in[k >> 3], _MM_SHUFFLE(1, 1, 1, 1)); - o[k + 3] = _mm_setzero_ps(); - o[k + 4] = _mm_shuffle_ps(L_in[k >> 3], R_in[k >> 3], _MM_SHUFFLE(2, 2, 2, 2)); - o[k + 5] = _mm_setzero_ps(); - o[k + 6] = _mm_shuffle_ps(L_in[k >> 3], R_in[k >> 3], _MM_SHUFFLE(3, 3, 3, 3)); - o[k + 7] = _mm_setzero_ps(); + o[k] = SIMD_MM(shuffle_ps)(L_in[k >> 3], R_in[k >> 3], SIMD_MM_SHUFFLE(0, 0, 0, 0)); + o[k + 1] = SIMD_MM(setzero_ps)(); + o[k + 2] = SIMD_MM(shuffle_ps)(L_in[k >> 3], R_in[k >> 3], SIMD_MM_SHUFFLE(1, 1, 1, 1)); + o[k + 3] = SIMD_MM(setzero_ps)(); + o[k + 4] = SIMD_MM(shuffle_ps)(L_in[k >> 3], R_in[k >> 3], SIMD_MM_SHUFFLE(2, 2, 2, 2)); + o[k + 5] = SIMD_MM(setzero_ps)(); + o[k + 6] = SIMD_MM(shuffle_ps)(L_in[k >> 3], R_in[k >> 3], SIMD_MM_SHUFFLE(3, 3, 3, 3)); + o[k + 7] = SIMD_MM(setzero_ps)(); } // process filters for (auto j = 0U; j < M; j++) { - __m128 tx0 = vx0[j]; - __m128 tx1 = vx1[j]; - __m128 tx2 = vx2[j]; - __m128 ty0 = vy0[j]; - __m128 ty1 = vy1[j]; - __m128 ty2 = vy2[j]; - __m128 ta = va[j]; + auto tx0 = vx0[j]; + auto tx1 = vx1[j]; + auto tx2 = vx2[j]; + auto ty0 = vy0[j]; + auto ty1 = vy1[j]; + auto ty2 = vy2[j]; + auto ta = va[j]; for (int k = 0; k < nsamples; k += 2) { @@ -425,7 +426,7 @@ class alignas(16) HalfRateFilter ty2 = ty1; ty1 = ty0; // allpass filter 1 - ty0 = _mm_add_ps(tx2, _mm_mul_ps(_mm_sub_ps(tx0, ty2), ta)); + ty0 = SIMD_MM(add_ps)(tx2, SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(tx0, ty2), ta)); o[k] = ty0; // shuffle inputs @@ -436,7 +437,7 @@ class alignas(16) HalfRateFilter ty2 = ty1; ty1 = ty0; // allpass filter 1 - ty0 = _mm_add_ps(tx2, _mm_mul_ps(_mm_sub_ps(tx0, ty2), ta)); + ty0 = SIMD_MM(add_ps)(tx2, SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(tx0, ty2), ta)); o[k + 1] = ty0; } vx0[j] = tx0; @@ -447,33 +448,33 @@ class alignas(16) HalfRateFilter vy2[j] = ty2; } - /*__m128 aR = _mm_setzero_ps(); - __m128 bR = _mm_setzero_ps(); - __m128 cR = _mm_setzero_ps(); - __m128 dR = _mm_setzero_ps();*/ + /*auto aR = SIMD_MM(setzero_ps)(); + auto bR = SIMD_MM(setzero_ps)(); + auto cR = SIMD_MM(setzero_ps)(); + auto dR = SIMD_MM(setzero_ps)();*/ float *fL = (float *)L; float *fR = (float *)R; - __m128 faR = _mm_setzero_ps(); - __m128 fbR = _mm_setzero_ps(); + auto faR = SIMD_MM(setzero_ps)(); + auto fbR = SIMD_MM(setzero_ps)(); for (int k = 0; k < nsamples; k++) { // const double output=(filter_a.process(input)+oldout)*0.5; // oldout=filter_b.process(input); - __m128 vL = _mm_add_ss(o[k], oldout); - vL = _mm_mul_ss(vL, half); - _mm_store_ss(&fL[k], vL); + auto vL = SIMD_MM(add_ss)(o[k], oldout); + vL = SIMD_MM(mul_ss)(vL, half); + SIMD_MM(store_ss)(&fL[k], vL); - faR = _mm_movehl_ps(faR, o[k]); - fbR = _mm_movehl_ps(fbR, oldout); + faR = SIMD_MM(movehl_ps)(faR, o[k]); + fbR = SIMD_MM(movehl_ps)(fbR, oldout); - __m128 vR = _mm_add_ss(faR, fbR); - vR = _mm_mul_ss(vR, half); - _mm_store_ss(&fR[k], vR); + auto vR = SIMD_MM(add_ss)(faR, fbR); + vR = SIMD_MM(mul_ss)(vR, half); + SIMD_MM(store_ss)(&fR[k], vR); - oldout = _mm_shuffle_ps(o[k], o[k], _MM_SHUFFLE(3, 3, 1, 1)); + oldout = SIMD_MM(shuffle_ps)(o[k], o[k], SIMD_MM_SHUFFLE(3, 3, 1, 1)); } // If you want to avoid downsampling, do this @@ -496,7 +497,7 @@ class alignas(16) HalfRateFilter { for (auto i = 0U; i < M; i++) { - va[i] = _mm_setzero_ps(); + va[i] = SIMD_MM(setzero_ps)(); } int order = M << 1; @@ -631,22 +632,22 @@ class alignas(16) HalfRateFilter { for (auto i = 0U; i < M; i++) { - // va[i] = _mm_set_ps(cA[i],cB[i],cA[i],cB[i]); - va[i] = _mm_set_ps(cB[i], cA[i], cB[i], cA[i]); + // va[i] = SIMD_MM(set_ps)(cA[i],cB[i],cA[i],cB[i]); + va[i] = SIMD_MM(set_ps)(cB[i], cA[i], cB[i], cA[i]); } } void reset() { for (auto i = 0U; i < M; i++) { - vx0[i] = _mm_setzero_ps(); - vx1[i] = _mm_setzero_ps(); - vx2[i] = _mm_setzero_ps(); - vy0[i] = _mm_setzero_ps(); - vy1[i] = _mm_setzero_ps(); - vy2[i] = _mm_setzero_ps(); + vx0[i] = SIMD_MM(setzero_ps)(); + vx1[i] = SIMD_MM(setzero_ps)(); + vx2[i] = SIMD_MM(setzero_ps)(); + vy0[i] = SIMD_MM(setzero_ps)(); + vy1[i] = SIMD_MM(setzero_ps)(); + vy2[i] = SIMD_MM(setzero_ps)(); } - oldout = _mm_setzero_ps(); + oldout = SIMD_MM(setzero_ps)(); } private: diff --git a/include/sst/filters/K35Filter.h b/include/sst/filters/K35Filter.h index d5213aa..96dc03e 100644 --- a/include/sst/filters/K35Filter.h +++ b/include/sst/filters/K35Filter.h @@ -34,11 +34,11 @@ static float clampedFrequency(float pitch, float sampleRate, TuningProvider *pro return std::clamp(freq, 5.f, (sampleRate * 0.3f)); } -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define D(a, b) _mm_div_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define D(a, b) SIMD_MM(div_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) // note that things that were NOPs in the Odin code have been removed. // m_gamma remains 1.0 so xn * m_gamma == xn; that's a NOP @@ -47,14 +47,14 @@ static float clampedFrequency(float pitch, float sampleRate, TuningProvider *pro // m_a_0 remains 1 so that's also a NOP // so we only need to compute: // (xn - z) * alpha + za -static inline __m128 doLpf(const __m128 &G, const __m128 &input, __m128 &z) noexcept +static inline SIMD_M128 doLpf(const SIMD_M128 &G, const SIMD_M128 &input, SIMD_M128 &z) noexcept { - const __m128 v = M(S(input, z), G); - const __m128 result = A(v, z); + const auto v = M(S(input, z), G); + const auto result = A(v, z); z = A(v, result); return result; } -static inline __m128 doHpf(const __m128 &G, const __m128 &input, __m128 &z) noexcept +static inline SIMD_M128 doHpf(const SIMD_M128 &G, const SIMD_M128 &input, SIMD_M128 &z) noexcept { return S(input, doLpf(G, input, z)); } @@ -125,47 +125,47 @@ inline void processCoeffs(QuadFilterUnitState *__restrict f) f->C[i] = A(f->C[i], f->dC[i]); } -inline __m128 process_lp(QuadFilterUnitState *__restrict f, __m128 input) +inline SIMD_M128 process_lp(QuadFilterUnitState *__restrict f, SIMD_M128 input) { processCoeffs(f); - const __m128 y1 = doLpf(f->C[k35_G], input, f->R[k35_lz]); + const auto y1 = doLpf(f->C[k35_G], input, f->R[k35_lz]); // (lpf beta * lpf2 feedback) + (hpf beta * hpf1 feedback) - const __m128 s35 = A(M(f->C[k35_lb], f->R[k35_2z]), M(f->C[k35_hb], f->R[k35_hz])); + const auto s35 = A(M(f->C[k35_lb], f->R[k35_2z]), M(f->C[k35_hb], f->R[k35_hz])); // alpha * (y1 + s35) - const __m128 u_clean = M(f->C[k35_alpha], A(y1, s35)); - const __m128 u_driven = basic_blocks::dsp::fasttanhSSEclamped(M(u_clean, f->C[k35_saturation])); - const __m128 u = + const auto u_clean = M(f->C[k35_alpha], A(y1, s35)); + const auto u_driven = basic_blocks::dsp::fasttanhSSEclamped(M(u_clean, f->C[k35_saturation])); + const auto u = A(M(u_clean, f->C[k35_saturation_blend_inv]), M(u_driven, f->C[k35_saturation_blend])); // mk * lpf2(u) - const __m128 y = M(f->C[k35_k], doLpf(f->C[k35_G], u, f->R[k35_2z])); + const auto y = M(f->C[k35_k], doLpf(f->C[k35_G], u, f->R[k35_2z])); doHpf(f->C[k35_G], y, f->R[k35_hz]); - const __m128 result = D(y, f->C[k35_k]); + const auto result = D(y, f->C[k35_k]); return result; } -inline __m128 process_hp(QuadFilterUnitState *__restrict f, __m128 input) +inline SIMD_M128 process_hp(QuadFilterUnitState *__restrict f, SIMD_M128 input) { processCoeffs(f); - const __m128 y1 = doHpf(f->C[k35_G], input, f->R[k35_hz]); + const auto y1 = doHpf(f->C[k35_G], input, f->R[k35_hz]); // (lpf beta * lpf2 feedback) + (hpf beta * hpf1 feedback) - const __m128 s35 = A(M(f->C[k35_hb], f->R[k35_2z]), M(f->C[k35_lb], f->R[k35_lz])); + const auto s35 = A(M(f->C[k35_hb], f->R[k35_2z]), M(f->C[k35_lb], f->R[k35_lz])); // alpha * (y1 + s35) - const __m128 u = M(f->C[k35_alpha], A(y1, s35)); + const auto u = M(f->C[k35_alpha], A(y1, s35)); // mk * lpf2(u) - const __m128 y_clean = M(f->C[k35_k], u); - const __m128 y_driven = basic_blocks::dsp::fasttanhSSEclamped(M(y_clean, f->C[k35_saturation])); - const __m128 y = + const auto y_clean = M(f->C[k35_k], u); + const auto y_driven = basic_blocks::dsp::fasttanhSSEclamped(M(y_clean, f->C[k35_saturation])); + const auto y = A(M(y_clean, f->C[k35_saturation_blend_inv]), M(y_driven, f->C[k35_saturation_blend])); doLpf(f->C[k35_G], doHpf(f->C[k35_G], y, f->R[k35_2z]), f->R[k35_lz]); - const __m128 result = D(y, f->C[k35_k]); + const auto result = D(y, f->C[k35_k]); return result; } diff --git a/include/sst/filters/OBXDFilter.h b/include/sst/filters/OBXDFilter.h index 5c72973..2fdfcdb 100644 --- a/include/sst/filters/OBXDFilter.h +++ b/include/sst/filters/OBXDFilter.h @@ -63,20 +63,20 @@ enum Params static constexpr int ssew = 4; -const __m128 zero = _mm_set1_ps(0.0f); -const __m128 nine_two_zero = _mm_set1_ps(0.00920833f); -const __m128 zero_zero_five = _mm_set1_ps(0.05f); -const __m128 eight_seven_six = _mm_set1_ps(0.0876f); -const __m128 one_zero_three = _mm_set1_ps(0.0103592f); -const __m128 one_eight_five = _mm_set1_ps(0.185f); -const __m128 zero_four_five = _mm_set1_ps(0.45f); -const __m128 zero_five = _mm_set1_ps(0.5f); -const __m128 one = _mm_set1_ps(1.0f); -const __m128 one_three_five = _mm_set1_ps(1.035f); -const __m128 two = _mm_set1_ps(2.0f); -const __m128 three = _mm_set1_ps(3.0f); -const __m128 gainAdjustment2Pole = _mm_set1_ps(0.74f); -const __m128 gainAdjustment4Pole = _mm_set1_ps(0.6f); +const auto zero = SIMD_MM(set1_ps)(0.0f); +const auto nine_two_zero = SIMD_MM(set1_ps)(0.00920833f); +const auto zero_zero_five = SIMD_MM(set1_ps)(0.05f); +const auto eight_seven_six = SIMD_MM(set1_ps)(0.0876f); +const auto one_zero_three = SIMD_MM(set1_ps)(0.0103592f); +const auto one_eight_five = SIMD_MM(set1_ps)(0.185f); +const auto zero_four_five = SIMD_MM(set1_ps)(0.45f); +const auto zero_five = SIMD_MM(set1_ps)(0.5f); +const auto one = SIMD_MM(set1_ps)(1.0f); +const auto one_three_five = SIMD_MM(set1_ps)(1.035f); +const auto two = SIMD_MM(set1_ps)(2.0f); +const auto three = SIMD_MM(set1_ps)(3.0f); +const auto gainAdjustment2Pole = SIMD_MM(set1_ps)(0.74f); +const auto gainAdjustment4Pole = SIMD_MM(set1_ps)(0.6f); template inline void makeCoefficients(FilterCoefficientMaker *cm, Poles p, float freq, @@ -133,141 +133,152 @@ inline void makeCoefficients(FilterCoefficientMaker *cm, Poles p cm->FromDirect(lC); } -inline __m128 diodePairResistanceApprox(__m128 x) +inline SIMD_M128 diodePairResistanceApprox(SIMD_M128 x) { // return (((((0.0103592f * x) + 0.00920833f) * x + 0.185f) * x + 0.05f) * x + 1.0f); - return _mm_add_ps( - _mm_mul_ps( - _mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(one_zero_three, x), - nine_two_zero), - x), - one_eight_five), - x), - zero_zero_five), + return SIMD_MM(add_ps)( + SIMD_MM(mul_ps)( + SIMD_MM(add_ps)( + SIMD_MM(mul_ps)( + SIMD_MM(add_ps)( + SIMD_MM(mul_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(one_zero_three, x), nine_two_zero), x), + one_eight_five), + x), + zero_zero_five), x), one); // Taylor approximation of a slightly mismatched diode pair } // resolve 0-delay feedback -inline __m128 NewtonRaphson12dB(__m128 sample, QuadFilterUnitState *__restrict f) +inline SIMD_M128 NewtonRaphson12dB(SIMD_M128 sample, QuadFilterUnitState *__restrict f) { // calculating feedback non-linear transconducance and compensated for R (-1) // boosting non-linearity - __m128 tCfb; - __m128 selfOscEnabledMask = _mm_cmpeq_ps(f->C[self_osc_push], one); - __m128 selfOscOffVal = - _mm_sub_ps(diodePairResistanceApprox(_mm_mul_ps(f->R[s1], eight_seven_six)), one); - __m128 selfOscOnVal = _mm_sub_ps( - diodePairResistanceApprox(_mm_mul_ps(f->R[s1], eight_seven_six)), one_three_five); - tCfb = _mm_add_ps(_mm_and_ps(selfOscEnabledMask, selfOscOnVal), - _mm_andnot_ps(selfOscEnabledMask, selfOscOffVal)); + SIMD_M128 tCfb; + auto selfOscEnabledMask = SIMD_MM(cmpeq_ps)(f->C[self_osc_push], one); + auto selfOscOffVal = + SIMD_MM(sub_ps)(diodePairResistanceApprox(SIMD_MM(mul_ps)(f->R[s1], eight_seven_six)), one); + auto selfOscOnVal = SIMD_MM(sub_ps)( + diodePairResistanceApprox(SIMD_MM(mul_ps)(f->R[s1], eight_seven_six)), one_three_five); + tCfb = SIMD_MM(add_ps)(SIMD_MM(and_ps)(selfOscEnabledMask, selfOscOnVal), + SIMD_MM(andnot_ps)(selfOscEnabledMask, selfOscOffVal)); // resolve linear feedback // float y = ((sample - 2*(s1*(R+tCfb)) - g*s1 - s2)/(1+ g*(2*(R+tCfb)+ g))); - __m128 y = _mm_div_ps( - _mm_sub_ps( - _mm_sub_ps(_mm_sub_ps(sample, _mm_mul_ps(two, _mm_mul_ps(f->R[s1], - _mm_add_ps(f->C[R12], tCfb)))), - _mm_mul_ps(f->C[g12], f->R[s1])), + auto y = SIMD_MM(div_ps)( + SIMD_MM(sub_ps)( + SIMD_MM(sub_ps)( + SIMD_MM(sub_ps)( + sample, SIMD_MM(mul_ps)( + two, SIMD_MM(mul_ps)(f->R[s1], SIMD_MM(add_ps)(f->C[R12], tCfb)))), + SIMD_MM(mul_ps)(f->C[g12], f->R[s1])), f->R[s2]), - _mm_add_ps( - one, _mm_mul_ps(f->C[g12], - _mm_add_ps(_mm_mul_ps(two, _mm_add_ps(f->C[R12], tCfb)), f->C[g12])))); + SIMD_MM(add_ps)( + one, + SIMD_MM(mul_ps)(f->C[g12], + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(two, SIMD_MM(add_ps)(f->C[R12], tCfb)), + f->C[g12])))); return y; } -inline __m128 process_2_pole(QuadFilterUnitState *__restrict f, __m128 sample) +inline SIMD_M128 process_2_pole(QuadFilterUnitState *__restrict f, SIMD_M128 sample) { for (int i = 0; i < n_obxd12_coeff; i++) { - f->C[i] = _mm_add_ps(f->C[i], f->dC[i]); + f->C[i] = SIMD_MM(add_ps)(f->C[i], f->dC[i]); } // float v = ((sample- R * s1*2 - g2*s1 - s2)/(1+ R*g1*2 + g1*g2)); - __m128 v = NewtonRaphson12dB(sample, f); + auto v = NewtonRaphson12dB(sample, f); // float y1 = v * g + s1; - __m128 y1 = _mm_add_ps(_mm_mul_ps(v, f->C[g12]), f->R[s1]); + auto y1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(v, f->C[g12]), f->R[s1]); // s1 = v * g + y1; - f->R[s1] = _mm_add_ps(_mm_mul_ps(v, f->C[g12]), y1); + f->R[s1] = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(v, f->C[g12]), y1); // float y2 = y1 * g + s2; - __m128 y2 = _mm_add_ps(_mm_mul_ps(y1, f->C[g12]), f->R[s2]); + auto y2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(y1, f->C[g12]), f->R[s2]); // s2 = y1 * g + y2; - f->R[s2] = _mm_add_ps(_mm_mul_ps(y1, f->C[g12]), y2); - - __m128 mc; - __m128 mask_bp = _mm_cmpeq_ps(f->C[bandpass], zero); - __m128 bp_false = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(one, f->C[multimode]), y2), - _mm_mul_ps(f->C[multimode], v)); - __m128 mask = _mm_cmplt_ps(f->C[multimode], zero_five); - __m128 val1 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(zero_five, f->C[multimode]), y2), - _mm_mul_ps(f->C[multimode], y1)); - __m128 val2 = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(one, f->C[multimode]), y1), - _mm_mul_ps(_mm_sub_ps(f->C[multimode], zero_five), v)); - __m128 bp_true = _mm_add_ps(_mm_and_ps(mask, val1), _mm_andnot_ps(mask, val2)); - mc = _mm_add_ps(_mm_and_ps(mask_bp, bp_false), _mm_andnot_ps(mask_bp, bp_true)); - return _mm_mul_ps(mc, gainAdjustment2Pole); + f->R[s2] = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(y1, f->C[g12]), y2); + + SIMD_M128 mc; + auto mask_bp = SIMD_MM(cmpeq_ps)(f->C[bandpass], zero); + auto bp_false = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(one, f->C[multimode]), y2), + SIMD_MM(mul_ps)(f->C[multimode], v)); + auto mask = SIMD_MM(cmplt_ps)(f->C[multimode], zero_five); + auto val1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(zero_five, f->C[multimode]), y2), + SIMD_MM(mul_ps)(f->C[multimode], y1)); + auto val2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(one, f->C[multimode]), y1), + SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(f->C[multimode], zero_five), v)); + auto bp_true = SIMD_MM(add_ps)(SIMD_MM(and_ps)(mask, val1), SIMD_MM(andnot_ps)(mask, val2)); + mc = SIMD_MM(add_ps)(SIMD_MM(and_ps)(mask_bp, bp_false), SIMD_MM(andnot_ps)(mask_bp, bp_true)); + return SIMD_MM(mul_ps)(mc, gainAdjustment2Pole); } -inline __m128 NewtonRaphsonR24dB(__m128 sample, __m128 lpc, QuadFilterUnitState *__restrict f) +inline SIMD_M128 NewtonRaphsonR24dB(SIMD_M128 sample, SIMD_M128 lpc, + QuadFilterUnitState *__restrict f) { // float ml = 1 / (1+g24); - __m128 ml = _mm_div_ps(one, _mm_add_ps(one, f->C[g24])); + auto ml = SIMD_MM(div_ps)(one, SIMD_MM(add_ps)(one, f->C[g24])); // float S = (lpc * (lpc * (lpc * f->R[s1] + f->R[s2]) + f->R[s3]) + f->R[s4]) * ml; - __m128 S = _mm_mul_ps( - _mm_add_ps(_mm_mul_ps(lpc, _mm_add_ps(_mm_mul_ps(lpc, _mm_add_ps(_mm_mul_ps(lpc, f->R[s1]), - f->R[s2])), - f->R[s3])), - f->R[s4]), + auto S = SIMD_MM(mul_ps)( + SIMD_MM(add_ps)( + SIMD_MM(mul_ps)( + lpc, + SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(lpc, SIMD_MM(add_ps)(SIMD_MM(mul_ps)(lpc, f->R[s1]), f->R[s2])), + f->R[s3])), + f->R[s4]), ml); // float G = lpc * lpc * lpc * lpc; - __m128 G = _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(lpc, lpc), lpc), lpc); + auto G = SIMD_MM(mul_ps)(SIMD_MM(mul_ps)(SIMD_MM(mul_ps)(lpc, lpc), lpc), lpc); // float y = (sample - f->C[R24] * S) / (1 + f->C[R24] * G); - __m128 y = _mm_div_ps(_mm_sub_ps(sample, _mm_mul_ps(f->C[R24], S)), - _mm_add_ps(one, _mm_mul_ps(f->C[R24], G))); + auto y = SIMD_MM(div_ps)(SIMD_MM(sub_ps)(sample, SIMD_MM(mul_ps)(f->C[R24], S)), + SIMD_MM(add_ps)(one, SIMD_MM(mul_ps)(f->C[R24], G))); return y; } -inline static __m128 tptpc(__m128 &state, __m128 inp, __m128 cutoff) +inline static SIMD_M128 tptpc(SIMD_M128 &state, SIMD_M128 inp, SIMD_M128 cutoff) { - __m128 v = _mm_div_ps(_mm_mul_ps(_mm_sub_ps(inp, state), cutoff), _mm_add_ps(one, cutoff)); - __m128 res = _mm_add_ps(v, state); - state = _mm_add_ps(res, v); + auto v = SIMD_MM(div_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(inp, state), cutoff), + SIMD_MM(add_ps)(one, cutoff)); + auto res = SIMD_MM(add_ps)(v, state); + state = SIMD_MM(add_ps)(res, v); return res; } -inline __m128 process_4_pole(QuadFilterUnitState *__restrict f, __m128 sample) +inline SIMD_M128 process_4_pole(QuadFilterUnitState *__restrict f, SIMD_M128 sample) { for (int i = 0; i < n_obxd24_coeff; i++) { - f->C[i] = _mm_add_ps(f->C[i], f->dC[i]); + f->C[i] = SIMD_MM(add_ps)(f->C[i], f->dC[i]); } // float lpc = f->C[g] / (1 + f->C[g]); - __m128 lpc = _mm_div_ps(f->C[g24], _mm_add_ps(one, f->C[g24])); + auto lpc = SIMD_MM(div_ps)(f->C[g24], SIMD_MM(add_ps)(one, f->C[g24])); // float y0 = NewtonRaphsonR24dB(sample,f->C[g],lpc); - __m128 y0 = NewtonRaphsonR24dB(sample, lpc, f); + auto y0 = NewtonRaphsonR24dB(sample, lpc, f); // first lowpass in cascade // double v = (y0 - f->R[s1]) * lpc; - __m128 v = _mm_mul_ps(_mm_sub_ps(y0, f->R[s1]), lpc); + auto v = SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(y0, f->R[s1]), lpc); // double res = v + f->R[s1]; - __m128 res = _mm_add_ps(v, f->R[s1]); + auto res = SIMD_MM(add_ps)(v, f->R[s1]); // f->R[s1] = res + v; - f->R[s1] = _mm_add_ps(res, v); + f->R[s1] = SIMD_MM(add_ps)(res, v); // damping // f->R[s1] =atan(s1*rcor24)*rcor24inv; - __m128 s1_rcor24 = _mm_mul_ps(f->R[s1], f->C[rcor24]); + auto s1_rcor24 = SIMD_MM(mul_ps)(f->R[s1], f->C[rcor24]); // this array must be aligned to a 16-byte boundary for SSE store/load float s1_rcor24_arr alignas(16)[ssew]; - _mm_store_ps(s1_rcor24_arr, s1_rcor24); + SIMD_MM(store_ps)(s1_rcor24_arr, s1_rcor24); for (int i = 0; i < ssew; i++) { @@ -277,34 +288,36 @@ inline __m128 process_4_pole(QuadFilterUnitState *__restrict f, __m128 sample) s1_rcor24_arr[i] = 0.f; } - s1_rcor24 = _mm_load_ps(s1_rcor24_arr); - f->R[s1] = _mm_mul_ps(s1_rcor24, f->C[rcor24inv]); - - __m128 y1 = res; - __m128 y2 = tptpc(f->R[s2], y1, f->C[g24]); - __m128 y3 = tptpc(f->R[s3], y2, f->C[g24]); - __m128 y4 = tptpc(f->R[s4], y3, f->C[g24]); - - __m128 mc; - - __m128 zero_val = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(one, f->C[pole_mix_scaled]), y4), - _mm_add_ps(f->C[pole_mix_scaled], y3)); - __m128 zero_mask = _mm_cmpeq_ps(f->C[pole_mix_inv_int], zero); - __m128 one_mask = _mm_cmpeq_ps(f->C[pole_mix_inv_int], one); - __m128 one_val = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(one, f->C[pole_mix_scaled]), y3), - _mm_mul_ps(f->C[pole_mix_scaled], y2)); - __m128 two_mask = _mm_cmpeq_ps(f->C[pole_mix_inv_int], two); - __m128 two_val = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(one, f->C[pole_mix_scaled]), y2), - _mm_mul_ps(f->C[pole_mix_scaled], y1)); - __m128 three_mask = _mm_cmpeq_ps(f->C[pole_mix_inv_int], three); - __m128 three_val = y1; - mc = _mm_add_ps(_mm_and_ps(zero_mask, zero_val), _mm_and_ps(one_mask, one_val)); - mc = _mm_add_ps(mc, - _mm_add_ps(_mm_and_ps(two_mask, two_val), _mm_and_ps(three_mask, three_val))); + s1_rcor24 = SIMD_MM(load_ps)(s1_rcor24_arr); + f->R[s1] = SIMD_MM(mul_ps)(s1_rcor24, f->C[rcor24inv]); + + auto y1 = res; + auto y2 = tptpc(f->R[s2], y1, f->C[g24]); + auto y3 = tptpc(f->R[s3], y2, f->C[g24]); + auto y4 = tptpc(f->R[s4], y3, f->C[g24]); + + SIMD_M128 mc; + + auto zero_val = + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(one, f->C[pole_mix_scaled]), y4), + SIMD_MM(add_ps)(f->C[pole_mix_scaled], y3)); + auto zero_mask = SIMD_MM(cmpeq_ps)(f->C[pole_mix_inv_int], zero); + auto one_mask = SIMD_MM(cmpeq_ps)(f->C[pole_mix_inv_int], one); + auto one_val = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(one, f->C[pole_mix_scaled]), y3), + SIMD_MM(mul_ps)(f->C[pole_mix_scaled], y2)); + auto two_mask = SIMD_MM(cmpeq_ps)(f->C[pole_mix_inv_int], two); + auto two_val = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(SIMD_MM(sub_ps)(one, f->C[pole_mix_scaled]), y2), + SIMD_MM(mul_ps)(f->C[pole_mix_scaled], y1)); + auto three_mask = SIMD_MM(cmpeq_ps)(f->C[pole_mix_inv_int], three); + auto three_val = y1; + mc = SIMD_MM(add_ps)(SIMD_MM(and_ps)(zero_mask, zero_val), SIMD_MM(and_ps)(one_mask, one_val)); + mc = SIMD_MM(add_ps)(mc, SIMD_MM(add_ps)(SIMD_MM(and_ps)(two_mask, two_val), + SIMD_MM(and_ps)(three_mask, three_val))); // half volume compensation - auto out = _mm_mul_ps(mc, _mm_add_ps(one, _mm_mul_ps(f->C[R24], zero_four_five))); - return _mm_mul_ps(out, gainAdjustment4Pole); + auto out = + SIMD_MM(mul_ps)(mc, SIMD_MM(add_ps)(one, SIMD_MM(mul_ps)(f->C[R24], zero_four_five))); + return SIMD_MM(mul_ps)(out, gainAdjustment4Pole); } } // namespace sst::filters::OBXDFilter diff --git a/include/sst/filters/QuadFilterUnit.h b/include/sst/filters/QuadFilterUnit.h index 379d998..c4895ec 100644 --- a/include/sst/filters/QuadFilterUnit.h +++ b/include/sst/filters/QuadFilterUnit.h @@ -29,13 +29,13 @@ constexpr int n_filter_registers = 16; struct alignas(16) QuadFilterUnitState { /** Filter coefficients */ - __m128 C[n_cm_coeffs]; + SIMD_M128 C[n_cm_coeffs]; /** Filter coefficients "delta" */ - __m128 dC[n_cm_coeffs]; + SIMD_M128 dC[n_cm_coeffs]; /** Filter state */ - __m128 R[n_filter_registers]; + SIMD_M128 R[n_filter_registers]; /** Array of pointers to the filter's delay buffers */ float *DB[4]; @@ -54,7 +54,7 @@ struct alignas(16) QuadFilterUnitState }; /** Typedef alias for a filter unit processing method. */ -typedef __m128 (*FilterUnitQFPtr)(QuadFilterUnitState *__restrict, __m128 in); +typedef SIMD_M128 (*FilterUnitQFPtr)(QuadFilterUnitState *__restrict, SIMD_M128 in); /** * Returns a filter unit pointer and optionally applies gain scaling. The gain diff --git a/include/sst/filters/QuadFilterUnit_Impl.h b/include/sst/filters/QuadFilterUnit_Impl.h index f03294c..6f033ea 100644 --- a/include/sst/filters/QuadFilterUnit_Impl.h +++ b/include/sst/filters/QuadFilterUnit_Impl.h @@ -30,212 +30,223 @@ namespace sst::filters { -inline __m128 SVFLP12Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SVFLP12Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // F1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // Q1 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // F1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // Q1 - __m128 L = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[0], f->R[0])); - __m128 H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[0])); - __m128 B = _mm_add_ps(f->R[0], _mm_mul_ps(f->C[0], H)); + auto L = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[0], f->R[0])); + auto H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[0])); + auto B = SIMD_MM(add_ps)(f->R[0], SIMD_MM(mul_ps)(f->C[0], H)); - __m128 L2 = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - __m128 H2 = _mm_sub_ps(_mm_sub_ps(in, L2), _mm_mul_ps(f->C[1], B)); - __m128 B2 = _mm_add_ps(B, _mm_mul_ps(f->C[0], H2)); + auto L2 = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + auto H2 = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L2), SIMD_MM(mul_ps)(f->C[1], B)); + auto B2 = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H2)); - f->R[0] = _mm_mul_ps(B2, f->R[2]); - f->R[1] = _mm_mul_ps(L2, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(B2, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(L2, f->R[2]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[2], _mm_mul_ps(B, B)))); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[2], SIMD_MM(mul_ps)(B, B)))); - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Gain - return _mm_mul_ps(L2, f->C[3]); + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Gain + return SIMD_MM(mul_ps)(L2, f->C[3]); } -inline __m128 SVFLP24Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SVFLP24Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // F1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // Q1 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // F1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // Q1 - __m128 L = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[0], f->R[0])); - __m128 H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[0])); - __m128 B = _mm_add_ps(f->R[0], _mm_mul_ps(f->C[0], H)); + auto L = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[0], f->R[0])); + auto H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[0])); + auto B = SIMD_MM(add_ps)(f->R[0], SIMD_MM(mul_ps)(f->C[0], H)); - L = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], B)); - B = _mm_add_ps(B, _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], B)); + B = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H)); - f->R[0] = _mm_mul_ps(B, f->R[2]); - f->R[1] = _mm_mul_ps(L, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(B, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(L, f->R[2]); in = L; - L = _mm_add_ps(f->R[4], _mm_mul_ps(f->C[0], f->R[3])); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[3])); - B = _mm_add_ps(f->R[3], _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(f->R[4], SIMD_MM(mul_ps)(f->C[0], f->R[3])); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[3])); + B = SIMD_MM(add_ps)(f->R[3], SIMD_MM(mul_ps)(f->C[0], H)); - L = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], B)); - B = _mm_add_ps(B, _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], B)); + B = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H)); - f->R[3] = _mm_mul_ps(B, f->R[2]); - f->R[4] = _mm_mul_ps(L, f->R[2]); + f->R[3] = SIMD_MM(mul_ps)(B, f->R[2]); + f->R[4] = SIMD_MM(mul_ps)(L, f->R[2]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[2], _mm_mul_ps(B, B)))); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[2], SIMD_MM(mul_ps)(B, B)))); - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Gain - return _mm_mul_ps(L, f->C[3]); + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Gain + return SIMD_MM(mul_ps)(L, f->C[3]); } -inline __m128 SVFHP24Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SVFHP24Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // F1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // Q1 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // F1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // Q1 - __m128 L = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[0], f->R[0])); - __m128 H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[0])); - __m128 B = _mm_add_ps(f->R[0], _mm_mul_ps(f->C[0], H)); + auto L = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[0], f->R[0])); + auto H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[0])); + auto B = SIMD_MM(add_ps)(f->R[0], SIMD_MM(mul_ps)(f->C[0], H)); - L = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], B)); - B = _mm_add_ps(B, _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], B)); + B = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H)); - f->R[0] = _mm_mul_ps(B, f->R[2]); - f->R[1] = _mm_mul_ps(L, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(B, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(L, f->R[2]); in = H; - L = _mm_add_ps(f->R[4], _mm_mul_ps(f->C[0], f->R[3])); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[3])); - B = _mm_add_ps(f->R[3], _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(f->R[4], SIMD_MM(mul_ps)(f->C[0], f->R[3])); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[3])); + B = SIMD_MM(add_ps)(f->R[3], SIMD_MM(mul_ps)(f->C[0], H)); - L = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], B)); - B = _mm_add_ps(B, _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], B)); + B = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H)); - f->R[3] = _mm_mul_ps(B, f->R[2]); - f->R[4] = _mm_mul_ps(L, f->R[2]); + f->R[3] = SIMD_MM(mul_ps)(B, f->R[2]); + f->R[4] = SIMD_MM(mul_ps)(L, f->R[2]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[2], _mm_mul_ps(B, B)))); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[2], SIMD_MM(mul_ps)(B, B)))); - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Gain - return _mm_mul_ps(H, f->C[3]); + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Gain + return SIMD_MM(mul_ps)(H, f->C[3]); } -inline __m128 SVFBP24Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SVFBP24Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // F1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // Q1 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // F1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // Q1 - __m128 L = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[0], f->R[0])); - __m128 H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[0])); - __m128 B = _mm_add_ps(f->R[0], _mm_mul_ps(f->C[0], H)); + auto L = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[0], f->R[0])); + auto H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[0])); + auto B = SIMD_MM(add_ps)(f->R[0], SIMD_MM(mul_ps)(f->C[0], H)); - L = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], B)); - B = _mm_add_ps(B, _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], B)); + B = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H)); - f->R[0] = _mm_mul_ps(B, f->R[2]); - f->R[1] = _mm_mul_ps(L, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(B, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(L, f->R[2]); in = B; - L = _mm_add_ps(f->R[4], _mm_mul_ps(f->C[0], f->R[3])); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[3])); - B = _mm_add_ps(f->R[3], _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(f->R[4], SIMD_MM(mul_ps)(f->C[0], f->R[3])); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[3])); + B = SIMD_MM(add_ps)(f->R[3], SIMD_MM(mul_ps)(f->C[0], H)); - L = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], B)); - B = _mm_add_ps(B, _mm_mul_ps(f->C[0], H)); + L = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], B)); + B = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H)); - f->R[3] = _mm_mul_ps(B, f->R[2]); - f->R[4] = _mm_mul_ps(L, f->R[2]); + f->R[3] = SIMD_MM(mul_ps)(B, f->R[2]); + f->R[4] = SIMD_MM(mul_ps)(L, f->R[2]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[2], _mm_mul_ps(B, B)))); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[2], SIMD_MM(mul_ps)(B, B)))); - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Gain - return _mm_mul_ps(B, f->C[3]); + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Gain + return SIMD_MM(mul_ps)(B, f->C[3]); } -inline __m128 SVFHP12Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SVFHP12Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // F1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // Q1 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // F1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // Q1 - __m128 L = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[0], f->R[0])); - __m128 H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[0])); - __m128 B = _mm_add_ps(f->R[0], _mm_mul_ps(f->C[0], H)); + auto L = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[0], f->R[0])); + auto H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[0])); + auto B = SIMD_MM(add_ps)(f->R[0], SIMD_MM(mul_ps)(f->C[0], H)); - __m128 L2 = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - __m128 H2 = _mm_sub_ps(_mm_sub_ps(in, L2), _mm_mul_ps(f->C[1], B)); - __m128 B2 = _mm_add_ps(B, _mm_mul_ps(f->C[0], H2)); + auto L2 = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + auto H2 = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L2), SIMD_MM(mul_ps)(f->C[1], B)); + auto B2 = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H2)); - f->R[0] = _mm_mul_ps(B2, f->R[2]); - f->R[1] = _mm_mul_ps(L2, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(B2, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(L2, f->R[2]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[2], _mm_mul_ps(B, B)))); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[2], SIMD_MM(mul_ps)(B, B)))); - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Gain - return _mm_mul_ps(H2, f->C[3]); + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Gain + return SIMD_MM(mul_ps)(H2, f->C[3]); } -inline __m128 SVFBP12Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SVFBP12Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // F1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // Q1 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // F1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // Q1 - __m128 L = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[0], f->R[0])); - __m128 H = _mm_sub_ps(_mm_sub_ps(in, L), _mm_mul_ps(f->C[1], f->R[0])); - __m128 B = _mm_add_ps(f->R[0], _mm_mul_ps(f->C[0], H)); + auto L = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[0], f->R[0])); + auto H = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L), SIMD_MM(mul_ps)(f->C[1], f->R[0])); + auto B = SIMD_MM(add_ps)(f->R[0], SIMD_MM(mul_ps)(f->C[0], H)); - __m128 L2 = _mm_add_ps(L, _mm_mul_ps(f->C[0], B)); - __m128 H2 = _mm_sub_ps(_mm_sub_ps(in, L2), _mm_mul_ps(f->C[1], B)); - __m128 B2 = _mm_add_ps(B, _mm_mul_ps(f->C[0], H2)); + auto L2 = SIMD_MM(add_ps)(L, SIMD_MM(mul_ps)(f->C[0], B)); + auto H2 = SIMD_MM(sub_ps)(SIMD_MM(sub_ps)(in, L2), SIMD_MM(mul_ps)(f->C[1], B)); + auto B2 = SIMD_MM(add_ps)(B, SIMD_MM(mul_ps)(f->C[0], H2)); - f->R[0] = _mm_mul_ps(B2, f->R[2]); - f->R[1] = _mm_mul_ps(L2, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(B2, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(L2, f->R[2]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[2], _mm_mul_ps(B, B)))); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[2], SIMD_MM(mul_ps)(B, B)))); - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Gain - return _mm_mul_ps(B2, f->C[3]); + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Gain + return SIMD_MM(mul_ps)(B2, f->C[3]); } -inline __m128 IIR12Aquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR12Aquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // K2 - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Q2 - __m128 f2 = _mm_sub_ps(_mm_mul_ps(f->C[3], in), _mm_mul_ps(f->C[1], f->R[1])); // Q2*in - K2*R1 - __m128 g2 = _mm_add_ps(_mm_mul_ps(f->C[1], in), _mm_mul_ps(f->C[3], f->R[1])); // K2*in + Q2*R1 - - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // K1 - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // Q1 - __m128 f1 = _mm_sub_ps(_mm_mul_ps(f->C[2], f2), _mm_mul_ps(f->C[0], f->R[0])); // Q1*f2 - K1*R0 - __m128 g1 = _mm_add_ps(_mm_mul_ps(f->C[0], f2), _mm_mul_ps(f->C[2], f->R[0])); // K1*f2 + Q1*R0 - - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // V1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // V2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // V3 - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[6], g2), _mm_mul_ps(f->C[5], g1)), - _mm_mul_ps(f->C[4], f1)); + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // K2 + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Q2 + auto f2 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[3], in), + SIMD_MM(mul_ps)(f->C[1], f->R[1])); // Q2*in - K2*R1 + auto g2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], in), + SIMD_MM(mul_ps)(f->C[3], f->R[1])); // K2*in + Q2*R1 + + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // K1 + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // Q1 + auto f1 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[2], f2), + SIMD_MM(mul_ps)(f->C[0], f->R[0])); // Q1*f2 - K1*R0 + auto g1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[0], f2), + SIMD_MM(mul_ps)(f->C[2], f->R[0])); // K1*f2 + Q1*R0 + + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // V1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // V2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // V3 + auto y = + SIMD_MM(add_ps)(SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[6], g2), SIMD_MM(mul_ps)(f->C[5], g1)), + SIMD_MM(mul_ps)(f->C[4], f1)); f->R[0] = f1; f->R[1] = g1; @@ -243,122 +254,136 @@ inline __m128 IIR12Aquad(QuadFilterUnitState *__restrict f, __m128 in) return y; } -inline __m128 IIR12Bquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR12Bquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - __m128 f2 = _mm_sub_ps(_mm_mul_ps(f->C[3], in), _mm_mul_ps(f->C[1], f->R[1])); // Q2*in - K2*R1 - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // K2 - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Q2 - __m128 g2 = _mm_add_ps(_mm_mul_ps(f->C[1], in), _mm_mul_ps(f->C[3], f->R[1])); // K2*in + Q2*R1 - - __m128 f1 = _mm_sub_ps(_mm_mul_ps(f->C[2], f2), _mm_mul_ps(f->C[0], f->R[0])); // Q1*f2 - K1*R0 - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // K1 - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // Q1 - __m128 g1 = _mm_add_ps(_mm_mul_ps(f->C[0], f2), _mm_mul_ps(f->C[2], f->R[0])); // K1*f2 + Q1*R0 - - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // V1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // V2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // V3 - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[6], g2), _mm_mul_ps(f->C[5], g1)), - _mm_mul_ps(f->C[4], f1)); - - f->R[0] = _mm_mul_ps(f1, f->R[2]); - f->R[1] = _mm_mul_ps(g1, f->R[2]); - - f->C[7] = _mm_add_ps(f->C[7], f->dC[7]); // Clipgain - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[7], _mm_mul_ps(y, y)))); + auto f2 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[3], in), + SIMD_MM(mul_ps)(f->C[1], f->R[1])); // Q2*in - K2*R1 + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // K2 + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Q2 + auto g2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], in), + SIMD_MM(mul_ps)(f->C[3], f->R[1])); // K2*in + Q2*R1 + + auto f1 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[2], f2), + SIMD_MM(mul_ps)(f->C[0], f->R[0])); // Q1*f2 - K1*R0 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // K1 + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // Q1 + auto g1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[0], f2), + SIMD_MM(mul_ps)(f->C[2], f->R[0])); // K1*f2 + Q1*R0 + + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // V1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // V2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // V3 + auto y = + SIMD_MM(add_ps)(SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[6], g2), SIMD_MM(mul_ps)(f->C[5], g1)), + SIMD_MM(mul_ps)(f->C[4], f1)); + + f->R[0] = SIMD_MM(mul_ps)(f1, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(g1, f->R[2]); + + f->C[7] = SIMD_MM(add_ps)(f->C[7], f->dC[7]); // Clipgain + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[7], SIMD_MM(mul_ps)(y, y)))); return y; } -inline __m128 IIR12WDFquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR12WDFquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // E1 * sc - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // E2 * sc - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // -E1 / sc - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // -E2 / sc - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // C1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // C2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // D - - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[0]), _mm_mul_ps(f->C[6], in)), - _mm_mul_ps(f->C[5], f->R[1])); - __m128 t = - _mm_add_ps(in, _mm_add_ps(_mm_mul_ps(f->C[2], f->R[0]), _mm_mul_ps(f->C[3], f->R[1]))); - - __m128 s1 = _mm_add_ps(_mm_mul_ps(t, f->C[0]), f->R[0]); - __m128 s2 = _mm_sub_ps(_mm_setzero_ps(), _mm_add_ps(_mm_mul_ps(t, f->C[1]), f->R[1])); + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // E1 * sc + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // E2 * sc + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // -E1 / sc + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // -E2 / sc + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // C1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // C2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // D + + auto y = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[0]), SIMD_MM(mul_ps)(f->C[6], in)), + SIMD_MM(mul_ps)(f->C[5], f->R[1])); + auto t = SIMD_MM(add_ps)( + in, SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[2], f->R[0]), SIMD_MM(mul_ps)(f->C[3], f->R[1]))); + + auto s1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(t, f->C[0]), f->R[0]); + auto s2 = SIMD_MM(sub_ps)(SIMD_MM(setzero_ps)(), + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(t, f->C[1]), f->R[1])); // f->R[0] = s1; // f->R[1] = s2; - f->R[0] = _mm_mul_ps(s1, f->R[2]); - f->R[1] = _mm_mul_ps(s2, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(s1, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(s2, f->R[2]); - f->C[7] = _mm_add_ps(f->C[7], f->dC[7]); // Clipgain - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[7], _mm_mul_ps(y, y)))); + f->C[7] = SIMD_MM(add_ps)(f->C[7], f->dC[7]); // Clipgain + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[7], SIMD_MM(mul_ps)(y, y)))); return y; } -inline __m128 IIR12CFCquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR12CFCquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { // State-space with clipgain (2nd order, limit within register) - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // ar - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // ai - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // b1 - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // c1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // c2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // d + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // ar + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // ai + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // b1 + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // c1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // c2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // d // y(i) = c1.*s(1) + c2.*s(2) + d.*x(i); // s1 = ar.*s(1) - ai.*s(2) + x(i); // s2 = ai.*s(1) + ar.*s(2); - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[0]), _mm_mul_ps(f->C[6], in)), - _mm_mul_ps(f->C[5], f->R[1])); - __m128 s1 = _mm_add_ps(_mm_mul_ps(in, f->C[2]), - _mm_sub_ps(_mm_mul_ps(f->C[0], f->R[0]), _mm_mul_ps(f->C[1], f->R[1]))); - __m128 s2 = _mm_add_ps(_mm_mul_ps(f->C[1], f->R[0]), _mm_mul_ps(f->C[0], f->R[1])); + auto y = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[0]), SIMD_MM(mul_ps)(f->C[6], in)), + SIMD_MM(mul_ps)(f->C[5], f->R[1])); + auto s1 = SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(in, f->C[2]), + SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[0], f->R[0]), SIMD_MM(mul_ps)(f->C[1], f->R[1]))); + auto s2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], f->R[0]), SIMD_MM(mul_ps)(f->C[0], f->R[1])); - f->R[0] = _mm_mul_ps(s1, f->R[2]); - f->R[1] = _mm_mul_ps(s2, f->R[2]); + f->R[0] = SIMD_MM(mul_ps)(s1, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(s2, f->R[2]); - f->C[7] = _mm_add_ps(f->C[7], f->dC[7]); // Clipgain - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[7], _mm_mul_ps(y, y)))); + f->C[7] = SIMD_MM(add_ps)(f->C[7], f->dC[7]); // Clipgain + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = + SIMD_MM(max_ps)(m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[7], SIMD_MM(mul_ps)(y, y)))); return y; } -inline __m128 IIR12CFLquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR12CFLquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { // State-space with softer limiter - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // (ar) - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // (ai) - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // b1 - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // c1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // c2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // d + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // (ar) + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // (ai) + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // b1 + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // c1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // c2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // d // y(i) = c1.*s(1) + c2.*s(2) + d.*x(i); // s1 = ar.*s(1) - ai.*s(2) + x(i); // s2 = ai.*s(1) + ar.*s(2); - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[0]), _mm_mul_ps(f->C[6], in)), - _mm_mul_ps(f->C[5], f->R[1])); - __m128 ar = _mm_mul_ps(f->C[0], f->R[2]); - __m128 ai = _mm_mul_ps(f->C[1], f->R[2]); - __m128 s1 = _mm_add_ps(_mm_mul_ps(in, f->C[2]), - _mm_sub_ps(_mm_mul_ps(ar, f->R[0]), _mm_mul_ps(ai, f->R[1]))); - __m128 s2 = _mm_add_ps(_mm_mul_ps(ai, f->R[0]), _mm_mul_ps(ar, f->R[1])); + auto y = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[0]), SIMD_MM(mul_ps)(f->C[6], in)), + SIMD_MM(mul_ps)(f->C[5], f->R[1])); + auto ar = SIMD_MM(mul_ps)(f->C[0], f->R[2]); + auto ai = SIMD_MM(mul_ps)(f->C[1], f->R[2]); + auto s1 = SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(in, f->C[2]), + SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(ar, f->R[0]), SIMD_MM(mul_ps)(ai, f->R[1]))); + auto s2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(ai, f->R[0]), SIMD_MM(mul_ps)(ar, f->R[1])); f->R[0] = s1; f->R[1] = s2; @@ -367,84 +392,93 @@ inline __m128 IIR12CFLquad(QuadFilterUnitState *__restrict f, __m128 in) mr = mr.*0.99 + m.*0.01;*/ // Limiter - const __m128 m001 = _mm_set1_ps(0.001f); - const __m128 m099 = _mm_set1_ps(0.999f); - const __m128 m1 = _mm_set1_ps(1.0f); - const __m128 m2 = _mm_set1_ps(2.0f); + const auto m001 = SIMD_MM(set1_ps)(0.001f); + const auto m099 = SIMD_MM(set1_ps)(0.999f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + const auto m2 = SIMD_MM(set1_ps)(2.0f); - __m128 m = _mm_rsqrt_ps( - _mm_max_ps(m1, _mm_mul_ps(m2, _mm_and_ps(y, basic_blocks::mechanics::m128_mask_absval)))); - f->R[2] = _mm_add_ps(_mm_mul_ps(f->R[2], m099), _mm_mul_ps(m, m001)); + auto m = SIMD_MM(rsqrt_ps)(SIMD_MM(max_ps)( + m1, SIMD_MM(mul_ps)(m2, SIMD_MM(and_ps)(y, basic_blocks::mechanics::m128_mask_absval)))); + f->R[2] = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->R[2], m099), SIMD_MM(mul_ps)(m, m001)); return y; } -inline __m128 IIR24CFCquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR24CFCquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { // State-space with clipgain (2nd order, limit within register) - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // ar - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // ai - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // b1 - - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // c1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // c2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // d - - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[0]), _mm_mul_ps(f->C[6], in)), - _mm_mul_ps(f->C[5], f->R[1])); - __m128 s1 = _mm_add_ps(_mm_mul_ps(in, f->C[2]), - _mm_sub_ps(_mm_mul_ps(f->C[0], f->R[0]), _mm_mul_ps(f->C[1], f->R[1]))); - __m128 s2 = _mm_add_ps(_mm_mul_ps(f->C[1], f->R[0]), _mm_mul_ps(f->C[0], f->R[1])); - - f->R[0] = _mm_mul_ps(s1, f->R[2]); - f->R[1] = _mm_mul_ps(s2, f->R[2]); - - __m128 y2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[3]), _mm_mul_ps(f->C[6], y)), - _mm_mul_ps(f->C[5], f->R[4])); - __m128 s3 = _mm_add_ps(_mm_mul_ps(y, f->C[2]), - _mm_sub_ps(_mm_mul_ps(f->C[0], f->R[3]), _mm_mul_ps(f->C[1], f->R[4]))); - __m128 s4 = _mm_add_ps(_mm_mul_ps(f->C[1], f->R[3]), _mm_mul_ps(f->C[0], f->R[4])); - - f->R[3] = _mm_mul_ps(s3, f->R[2]); - f->R[4] = _mm_mul_ps(s4, f->R[2]); - - f->C[7] = _mm_add_ps(f->C[7], f->dC[7]); // Clipgain - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[2] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[7], _mm_mul_ps(y2, y2)))); + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // ar + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // ai + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // b1 + + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // c1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // c2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // d + + auto y = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[0]), SIMD_MM(mul_ps)(f->C[6], in)), + SIMD_MM(mul_ps)(f->C[5], f->R[1])); + auto s1 = SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(in, f->C[2]), + SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[0], f->R[0]), SIMD_MM(mul_ps)(f->C[1], f->R[1]))); + auto s2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], f->R[0]), SIMD_MM(mul_ps)(f->C[0], f->R[1])); + + f->R[0] = SIMD_MM(mul_ps)(s1, f->R[2]); + f->R[1] = SIMD_MM(mul_ps)(s2, f->R[2]); + + auto y2 = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[3]), SIMD_MM(mul_ps)(f->C[6], y)), + SIMD_MM(mul_ps)(f->C[5], f->R[4])); + auto s3 = SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(y, f->C[2]), + SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[0], f->R[3]), SIMD_MM(mul_ps)(f->C[1], f->R[4]))); + auto s4 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], f->R[3]), SIMD_MM(mul_ps)(f->C[0], f->R[4])); + + f->R[3] = SIMD_MM(mul_ps)(s3, f->R[2]); + f->R[4] = SIMD_MM(mul_ps)(s4, f->R[2]); + + f->C[7] = SIMD_MM(add_ps)(f->C[7], f->dC[7]); // Clipgain + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[2] = SIMD_MM(max_ps)( + m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[7], SIMD_MM(mul_ps)(y2, y2)))); return y2; } -inline __m128 IIR24CFLquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR24CFLquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { // State-space with softer limiter - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // (ar) - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // (ai) - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // b1 - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // c1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // c2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // d + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // (ar) + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // (ai) + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // b1 + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // c1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // c2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // d - __m128 ar = _mm_mul_ps(f->C[0], f->R[2]); - __m128 ai = _mm_mul_ps(f->C[1], f->R[2]); + auto ar = SIMD_MM(mul_ps)(f->C[0], f->R[2]); + auto ai = SIMD_MM(mul_ps)(f->C[1], f->R[2]); - __m128 y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[0]), _mm_mul_ps(f->C[6], in)), - _mm_mul_ps(f->C[5], f->R[1])); - __m128 s1 = _mm_add_ps(_mm_mul_ps(in, f->C[2]), - _mm_sub_ps(_mm_mul_ps(ar, f->R[0]), _mm_mul_ps(ai, f->R[1]))); - __m128 s2 = _mm_add_ps(_mm_mul_ps(ai, f->R[0]), _mm_mul_ps(ar, f->R[1])); + auto y = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[0]), SIMD_MM(mul_ps)(f->C[6], in)), + SIMD_MM(mul_ps)(f->C[5], f->R[1])); + auto s1 = SIMD_MM(add_ps)( + SIMD_MM(mul_ps)(in, f->C[2]), + SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(ar, f->R[0]), SIMD_MM(mul_ps)(ai, f->R[1]))); + auto s2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(ai, f->R[0]), SIMD_MM(mul_ps)(ar, f->R[1])); f->R[0] = s1; f->R[1] = s2; - __m128 y2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[4], f->R[3]), _mm_mul_ps(f->C[6], y)), - _mm_mul_ps(f->C[5], f->R[4])); - __m128 s3 = _mm_add_ps(_mm_mul_ps(y, f->C[2]), - _mm_sub_ps(_mm_mul_ps(ar, f->R[3]), _mm_mul_ps(ai, f->R[4]))); - __m128 s4 = _mm_add_ps(_mm_mul_ps(ai, f->R[3]), _mm_mul_ps(ar, f->R[4])); + auto y2 = SIMD_MM(add_ps)( + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[4], f->R[3]), SIMD_MM(mul_ps)(f->C[6], y)), + SIMD_MM(mul_ps)(f->C[5], f->R[4])); + auto s3 = + SIMD_MM(add_ps)(SIMD_MM(mul_ps)(y, f->C[2]), SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(ar, f->R[3]), + SIMD_MM(mul_ps)(ai, f->R[4]))); + auto s4 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(ai, f->R[3]), SIMD_MM(mul_ps)(ar, f->R[4])); f->R[3] = s3; f->R[4] = s4; @@ -453,114 +487,126 @@ inline __m128 IIR24CFLquad(QuadFilterUnitState *__restrict f, __m128 in) mr = mr.*0.99 + m.*0.01;*/ // Limiter - const __m128 m001 = _mm_set1_ps(0.001f); - const __m128 m099 = _mm_set1_ps(0.999f); - const __m128 m1 = _mm_set1_ps(1.0f); - const __m128 m2 = _mm_set1_ps(2.0f); + const auto m001 = SIMD_MM(set1_ps)(0.001f); + const auto m099 = SIMD_MM(set1_ps)(0.999f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + const auto m2 = SIMD_MM(set1_ps)(2.0f); - __m128 m = _mm_rsqrt_ps( - _mm_max_ps(m1, _mm_mul_ps(m2, _mm_and_ps(y2, basic_blocks::mechanics::m128_mask_absval)))); - f->R[2] = _mm_add_ps(_mm_mul_ps(f->R[2], m099), _mm_mul_ps(m, m001)); + auto m = SIMD_MM(rsqrt_ps)(SIMD_MM(max_ps)( + m1, SIMD_MM(mul_ps)(m2, SIMD_MM(and_ps)(y2, basic_blocks::mechanics::m128_mask_absval)))); + f->R[2] = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->R[2], m099), SIMD_MM(mul_ps)(m, m001)); return y2; } -inline __m128 IIR24Bquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 IIR24Bquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); // K2 - f->C[3] = _mm_add_ps(f->C[3], f->dC[3]); // Q2 - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); // K1 - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); // Q1 - f->C[4] = _mm_add_ps(f->C[4], f->dC[4]); // V1 - f->C[5] = _mm_add_ps(f->C[5], f->dC[5]); // V2 - f->C[6] = _mm_add_ps(f->C[6], f->dC[6]); // V3 - - __m128 f2 = _mm_sub_ps(_mm_mul_ps(f->C[3], in), _mm_mul_ps(f->C[1], f->R[1])); // Q2*in - K2*R1 - __m128 g2 = _mm_add_ps(_mm_mul_ps(f->C[1], in), _mm_mul_ps(f->C[3], f->R[1])); // K2*in + Q2*R1 - __m128 f1 = _mm_sub_ps(_mm_mul_ps(f->C[2], f2), _mm_mul_ps(f->C[0], f->R[0])); // Q1*f2 - K1*R0 - __m128 g1 = _mm_add_ps(_mm_mul_ps(f->C[0], f2), _mm_mul_ps(f->C[2], f->R[0])); // K1*f2 + Q1*R0 - f->R[0] = _mm_mul_ps(f1, f->R[4]); - f->R[1] = _mm_mul_ps(g1, f->R[4]); - __m128 y1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[6], g2), _mm_mul_ps(f->C[5], g1)), - _mm_mul_ps(f->C[4], f1)); - - f2 = _mm_sub_ps(_mm_mul_ps(f->C[3], y1), _mm_mul_ps(f->C[1], f->R[3])); // Q2*in - K2*R1 - g2 = _mm_add_ps(_mm_mul_ps(f->C[1], y1), _mm_mul_ps(f->C[3], f->R[3])); // K2*in + Q2*R1 - f1 = _mm_sub_ps(_mm_mul_ps(f->C[2], f2), _mm_mul_ps(f->C[0], f->R[2])); // Q1*f2 - K1*R0 - g1 = _mm_add_ps(_mm_mul_ps(f->C[0], f2), _mm_mul_ps(f->C[2], f->R[2])); // K1*f2 + Q1*R0 - f->R[2] = _mm_mul_ps(f1, f->R[4]); - f->R[3] = _mm_mul_ps(g1, f->R[4]); - __m128 y2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(f->C[6], g2), _mm_mul_ps(f->C[5], g1)), - _mm_mul_ps(f->C[4], f1)); - - f->C[7] = _mm_add_ps(f->C[7], f->dC[7]); // Clipgain - const __m128 m01 = _mm_set1_ps(0.1f); - const __m128 m1 = _mm_set1_ps(1.0f); - f->R[4] = _mm_max_ps(m01, _mm_sub_ps(m1, _mm_mul_ps(f->C[7], _mm_mul_ps(y2, y2)))); + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); // K2 + f->C[3] = SIMD_MM(add_ps)(f->C[3], f->dC[3]); // Q2 + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); // K1 + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); // Q1 + f->C[4] = SIMD_MM(add_ps)(f->C[4], f->dC[4]); // V1 + f->C[5] = SIMD_MM(add_ps)(f->C[5], f->dC[5]); // V2 + f->C[6] = SIMD_MM(add_ps)(f->C[6], f->dC[6]); // V3 + + auto f2 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[3], in), + SIMD_MM(mul_ps)(f->C[1], f->R[1])); // Q2*in - K2*R1 + auto g2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], in), + SIMD_MM(mul_ps)(f->C[3], f->R[1])); // K2*in + Q2*R1 + auto f1 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[2], f2), + SIMD_MM(mul_ps)(f->C[0], f->R[0])); // Q1*f2 - K1*R0 + auto g1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[0], f2), + SIMD_MM(mul_ps)(f->C[2], f->R[0])); // K1*f2 + Q1*R0 + f->R[0] = SIMD_MM(mul_ps)(f1, f->R[4]); + f->R[1] = SIMD_MM(mul_ps)(g1, f->R[4]); + auto y1 = + SIMD_MM(add_ps)(SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[6], g2), SIMD_MM(mul_ps)(f->C[5], g1)), + SIMD_MM(mul_ps)(f->C[4], f1)); + + f2 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[3], y1), + SIMD_MM(mul_ps)(f->C[1], f->R[3])); // Q2*in - K2*R1 + g2 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[1], y1), + SIMD_MM(mul_ps)(f->C[3], f->R[3])); // K2*in + Q2*R1 + f1 = SIMD_MM(sub_ps)(SIMD_MM(mul_ps)(f->C[2], f2), + SIMD_MM(mul_ps)(f->C[0], f->R[2])); // Q1*f2 - K1*R0 + g1 = SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[0], f2), + SIMD_MM(mul_ps)(f->C[2], f->R[2])); // K1*f2 + Q1*R0 + f->R[2] = SIMD_MM(mul_ps)(f1, f->R[4]); + f->R[3] = SIMD_MM(mul_ps)(g1, f->R[4]); + auto y2 = + SIMD_MM(add_ps)(SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[6], g2), SIMD_MM(mul_ps)(f->C[5], g1)), + SIMD_MM(mul_ps)(f->C[4], f1)); + + f->C[7] = SIMD_MM(add_ps)(f->C[7], f->dC[7]); // Clipgain + const auto m01 = SIMD_MM(set1_ps)(0.1f); + const auto m1 = SIMD_MM(set1_ps)(1.0f); + f->R[4] = SIMD_MM(max_ps)( + m01, SIMD_MM(sub_ps)(m1, SIMD_MM(mul_ps)(f->C[7], SIMD_MM(mul_ps)(y2, y2)))); return y2; } template -inline __m128 LPMOOGquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 LPMOOGquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); - f->C[2] = _mm_add_ps(f->C[2], f->dC[2]); - - f->R[0] = basic_blocks::dsp::softclip8_ps(_mm_add_ps( - f->R[0], - _mm_mul_ps(f->C[1], - _mm_sub_ps(_mm_sub_ps(_mm_mul_ps(in, f->C[0]), - _mm_mul_ps(f->C[2], _mm_add_ps(f->R[3], f->R[4]))), - f->R[0])))); - f->R[1] = _mm_add_ps(f->R[1], _mm_mul_ps(f->C[1], _mm_sub_ps(f->R[0], f->R[1]))); - f->R[2] = _mm_add_ps(f->R[2], _mm_mul_ps(f->C[1], _mm_sub_ps(f->R[1], f->R[2]))); + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); + f->C[2] = SIMD_MM(add_ps)(f->C[2], f->dC[2]); + + f->R[0] = basic_blocks::dsp::softclip8_ps(SIMD_MM(add_ps)( + f->R[0], SIMD_MM(mul_ps)( + f->C[1], SIMD_MM(sub_ps)( + SIMD_MM(sub_ps)( + SIMD_MM(mul_ps)(in, f->C[0]), + SIMD_MM(mul_ps)(f->C[2], SIMD_MM(add_ps)(f->R[3], f->R[4]))), + f->R[0])))); + f->R[1] = SIMD_MM(add_ps)(f->R[1], SIMD_MM(mul_ps)(f->C[1], SIMD_MM(sub_ps)(f->R[0], f->R[1]))); + f->R[2] = SIMD_MM(add_ps)(f->R[2], SIMD_MM(mul_ps)(f->C[1], SIMD_MM(sub_ps)(f->R[1], f->R[2]))); f->R[4] = f->R[3]; - f->R[3] = _mm_add_ps(f->R[3], _mm_mul_ps(f->C[1], _mm_sub_ps(f->R[2], f->R[3]))); + f->R[3] = SIMD_MM(add_ps)(f->R[3], SIMD_MM(mul_ps)(f->C[1], SIMD_MM(sub_ps)(f->R[2], f->R[3]))); return f->R[subtype]; } -inline __m128 SNHquad(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 SNHquad(QuadFilterUnitState *__restrict f, SIMD_M128 in) { - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); - f->R[0] = _mm_add_ps(f->R[0], f->C[0]); + f->R[0] = SIMD_MM(add_ps)(f->R[0], f->C[0]); - __m128 mask = _mm_cmpgt_ps(f->R[0], _mm_setzero_ps()); + auto mask = SIMD_MM(cmpgt_ps)(f->R[0], SIMD_MM(setzero_ps)()); - f->R[1] = _mm_or_ps(_mm_andnot_ps(mask, f->R[1]), - _mm_and_ps(mask, basic_blocks::dsp::softclip_ps( - _mm_sub_ps(in, _mm_mul_ps(f->C[1], f->R[1]))))); + f->R[1] = SIMD_MM(or_ps)(SIMD_MM(andnot_ps)(mask, f->R[1]), + SIMD_MM(and_ps)(mask, basic_blocks::dsp::softclip_ps(SIMD_MM(sub_ps)( + in, SIMD_MM(mul_ps)(f->C[1], f->R[1]))))); - const __m128 m1 = _mm_set1_ps(-1.f); - f->R[0] = _mm_add_ps(f->R[0], _mm_and_ps(m1, mask)); + const auto m1 = SIMD_MM(set1_ps)(-1.f); + f->R[0] = SIMD_MM(add_ps)(f->R[0], SIMD_MM(and_ps)(m1, mask)); return f->R[1]; } template // COMB_SIZE must be a power of 2 -__m128 COMBquad_SSE2(QuadFilterUnitState *__restrict f, __m128 in) +SIMD_M128 COMBquad_SSE2(QuadFilterUnitState *__restrict f, SIMD_M128 in) { static_assert(utilities::SincTable::FIRipol_M == 256); // changing the constant requires updating the code below - const __m128 m256 = _mm_set1_ps(256.f); - const __m128i m0xff = _mm_set1_epi32(0xff); + const auto m256 = SIMD_MM(set1_ps)(256.f); + const SIMD_M128I m0xff = SIMD_MM(set1_epi32)(0xff); - f->C[0] = _mm_add_ps(f->C[0], f->dC[0]); - f->C[1] = _mm_add_ps(f->C[1], f->dC[1]); + f->C[0] = SIMD_MM(add_ps)(f->C[0], f->dC[0]); + f->C[1] = SIMD_MM(add_ps)(f->C[1], f->dC[1]); - __m128 a = _mm_mul_ps(f->C[0], m256); - __m128i e = _mm_cvtps_epi32(a); + auto a = SIMD_MM(mul_ps)(f->C[0], m256); + SIMD_M128I e = SIMD_MM(cvtps_epi32)(a); int DTi alignas(16)[4], SEi alignas(16)[4]; - __m128i DT = _mm_srli_epi32(e, 8); - _mm_store_si128((__m128i *)DTi, DT); - __m128i SE = _mm_and_si128(e, m0xff); - SE = _mm_sub_epi32(m0xff, SE); - _mm_store_si128((__m128i *)SEi, SE); - __m128 DBRead = _mm_setzero_ps(); + SIMD_M128I DT = SIMD_MM(srli_epi32)(e, 8); + SIMD_MM(store_si128)((SIMD_M128I *)DTi, DT); + SIMD_M128I SE = SIMD_MM(and_si128)(e, m0xff); + SE = SIMD_MM(sub_epi32)(m0xff, SE); + SIMD_MM(store_si128)((SIMD_M128I *)SEi, SE); + auto DBRead = SIMD_MM(setzero_ps)(); for (int i = 0; i < 4; i++) { @@ -569,24 +615,24 @@ __m128 COMBquad_SSE2(QuadFilterUnitState *__restrict f, __m128 in) int RP = (f->WP[i] - DTi[i] - utilities::SincTable::FIRoffset) & (COMB_SIZE - 1); // SINC interpolation (12 samples) - __m128 a = _mm_loadu_ps(&f->DB[i][RP]); + auto a = SIMD_MM(loadu_ps)(&f->DB[i][RP]); SEi[i] *= (utilities::SincTable::FIRipol_N << 1); - __m128 b = _mm_load_ps(&utilities::globalSincTable.sinctable[SEi[i]]); - __m128 o = _mm_mul_ps(a, b); + auto b = SIMD_MM(load_ps)(&utilities::globalSincTable.sinctable[SEi[i]]); + auto o = SIMD_MM(mul_ps)(a, b); - a = _mm_loadu_ps(&f->DB[i][RP + 4]); - b = _mm_load_ps(&utilities::globalSincTable.sinctable[SEi[i] + 4]); - o = _mm_add_ps(o, _mm_mul_ps(a, b)); + a = SIMD_MM(loadu_ps)(&f->DB[i][RP + 4]); + b = SIMD_MM(load_ps)(&utilities::globalSincTable.sinctable[SEi[i] + 4]); + o = SIMD_MM(add_ps)(o, SIMD_MM(mul_ps)(a, b)); - a = _mm_loadu_ps(&f->DB[i][RP + 8]); - b = _mm_load_ps(&utilities::globalSincTable.sinctable[SEi[i] + 8]); - o = _mm_add_ps(o, _mm_mul_ps(a, b)); + a = SIMD_MM(loadu_ps)(&f->DB[i][RP + 8]); + b = SIMD_MM(load_ps)(&utilities::globalSincTable.sinctable[SEi[i] + 8]); + o = SIMD_MM(add_ps)(o, SIMD_MM(mul_ps)(a, b)); - _mm_store_ss((float *)&DBRead + i, sst::basic_blocks::mechanics::sum_ps_to_ss(o)); + SIMD_MM(store_ss)((float *)&DBRead + i, sst::basic_blocks::mechanics::sum_ps_to_ss(o)); } } - __m128 d = _mm_add_ps(in, _mm_mul_ps(DBRead, f->C[1])); + auto d = SIMD_MM(add_ps)(in, SIMD_MM(mul_ps)(DBRead, f->C[1])); d = basic_blocks::dsp::softclip_ps(d); for (int i = 0; i < 4; i++) @@ -594,24 +640,24 @@ __m128 COMBquad_SSE2(QuadFilterUnitState *__restrict f, __m128 in) if (f->active[i]) { // Write to delaybuffer (with "anti-wrapping") - __m128 t = _mm_load_ss((float *)&d + i); - _mm_store_ss(&f->DB[i][f->WP[i]], t); + auto t = SIMD_MM(load_ss)((float *)&d + i); + SIMD_MM(store_ss)(&f->DB[i][f->WP[i]], t); if (f->WP[i] < utilities::SincTable::FIRipol_N) - _mm_store_ss(&f->DB[i][f->WP[i] + COMB_SIZE], t); + SIMD_MM(store_ss)(&f->DB[i][f->WP[i] + COMB_SIZE], t); // Increment write position f->WP[i] = (f->WP[i] + 1) & (COMB_SIZE - 1); } } - return _mm_add_ps(_mm_mul_ps(f->C[3], DBRead), _mm_mul_ps(f->C[2], in)); + return SIMD_MM(add_ps)(SIMD_MM(mul_ps)(f->C[3], DBRead), SIMD_MM(mul_ps)(f->C[2], in)); } -template -__m128 ScaleQFPtr(QuadFilterUnitState *__restrict s, __m128 in) +template +SIMD_M128 ScaleQFPtr(QuadFilterUnitState *__restrict s, SIMD_M128 in) { - const auto scale = _mm_set1_ps(scaleTimes1000 / 1000.f); + const auto scale = SIMD_MM(set1_ps)(scaleTimes1000 / 1000.f); auto res = F(s, in); - return _mm_mul_ps(res, scale); + return SIMD_MM(mul_ps)(res, scale); } template diff --git a/include/sst/filters/ResonanceWarp.h b/include/sst/filters/ResonanceWarp.h index 33ddca4..57070c8 100644 --- a/include/sst/filters/ResonanceWarp.h +++ b/include/sst/filters/ResonanceWarp.h @@ -40,10 +40,10 @@ static float clampedFrequency(float pitch, float sampleRate, TuningProvider *pro return freq; } -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) enum Saturator { @@ -51,12 +51,12 @@ enum Saturator SAT_SOFT }; -static inline __m128 doNLFilter(const __m128 input, const __m128 a1, const __m128 a2, - const __m128 b0, const __m128 b1, const __m128 b2, const int sat, - __m128 &z1, __m128 &z2) noexcept +static inline SIMD_M128 doNLFilter(const SIMD_M128 input, const SIMD_M128 a1, const SIMD_M128 a2, + const SIMD_M128 b0, const SIMD_M128 b1, const SIMD_M128 b2, + const int sat, SIMD_M128 &z1, SIMD_M128 &z2) noexcept { // out = z1 + b0 * input - const __m128 out = A(z1, M(b0, input)); + const auto out = A(z1, M(b0, input)); // z1 = z2 + b1 * input - a1 * out z1 = A(z2, S(M(b1, input), M(a1, out))); @@ -157,7 +157,7 @@ void makeCoefficients(FilterCoefficientMaker *cm, float freq, fl } template -inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) +inline SIMD_M128 process(QuadFilterUnitState *__restrict f, SIMD_M128 input) { // lower 2 bits of subtype is the stage count const int stages = subtype & 3; diff --git a/include/sst/filters/TriPoleFilter.h b/include/sst/filters/TriPoleFilter.h index 0a6d4df..bde1913 100644 --- a/include/sst/filters/TriPoleFilter.h +++ b/include/sst/filters/TriPoleFilter.h @@ -66,23 +66,23 @@ static float clampedFrequency(float pitch, float sampleRate, TuningProvider *pro return freq; } -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define D(a, b) _mm_div_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define D(a, b) SIMD_MM(div_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) #define N(a) S(F(0.0f), a) /** inverse square root sigmoid */ -static inline __m128 thr_sigmoid(__m128 x, float beta) +static inline SIMD_M128 thr_sigmoid(SIMD_M128 x, float beta) { - __m128 vtmp = _mm_mul_ps(x, x); // calculate in*in - __m128 vtmp2 = _mm_add_ps(vtmp, F(beta)); // in*in+1.f - vtmp = _mm_rsqrt_ps(vtmp2); // 1/sqrt(in*in+1.f) - return _mm_mul_ps(vtmp, x); // in*1/sqrt(in*in+1) + auto vtmp = SIMD_MM(mul_ps)(x, x); // calculate in*in + auto vtmp2 = SIMD_MM(add_ps)(vtmp, F(beta)); // in*in+1.f + vtmp = SIMD_MM(rsqrt_ps)(vtmp2); // 1/sqrt(in*in+1.f) + return SIMD_MM(mul_ps)(vtmp, x); // in*1/sqrt(in*in+1) } -static inline __m128 sech2_with_tanh(__m128 tanh_value) +static inline SIMD_M128 sech2_with_tanh(SIMD_M128 tanh_value) { const auto one = F(1.0f); return S(one, M(tanh_value, tanh_value)); @@ -90,29 +90,30 @@ static inline __m128 sech2_with_tanh(__m128 tanh_value) namespace OnePoleLPF { -static inline __m128 linOutput(__m128 x, __m128 z, __m128 b_coeff, __m128 a_coeff) +static inline SIMD_M128 linOutput(SIMD_M128 x, SIMD_M128 z, SIMD_M128 b_coeff, SIMD_M128 a_coeff) { return M(a_coeff, A(M(b_coeff, x), z)); } -static inline __m128 nonlinOutput(__m128 tanh_x, __m128 tanh_y, __m128 z, __m128 b_coeff) +static inline SIMD_M128 nonlinOutput(SIMD_M128 tanh_x, SIMD_M128 tanh_y, SIMD_M128 z, + SIMD_M128 b_coeff) { return A(M(b_coeff, S(tanh_x, tanh_y)), z); } -static inline __m128 getDerivative(__m128 tanh_y, __m128 b_coeff) +static inline SIMD_M128 getDerivative(SIMD_M128 tanh_y, SIMD_M128 b_coeff) { const auto one = F(1.0f); return S(M(N(b_coeff), sech2_with_tanh(tanh_y)), one); } -static inline __m128 getXDerivative(__m128 tanh_x, __m128 b_coeff) +static inline SIMD_M128 getXDerivative(SIMD_M128 tanh_x, SIMD_M128 b_coeff) { return M(b_coeff, sech2_with_tanh(tanh_x)); } -static inline __m128 process(__m128 tanh_x, __m128 z, __m128 estimate, __m128 b_coeff, - __m128 a_coeff, float beta) +static inline SIMD_M128 process(SIMD_M128 tanh_x, SIMD_M128 z, SIMD_M128 estimate, + SIMD_M128 b_coeff, SIMD_M128 a_coeff, float beta) { estimate = linOutput(tanh_x, z, b_coeff, a_coeff); for (int i = 0; i < nIterStage; ++i) @@ -128,26 +129,27 @@ static inline __m128 process(__m128 tanh_x, __m128 z, __m128 estimate, __m128 b_ namespace OnePoleHPF { -static inline __m128 linOutput(__m128 x_minus_x1_plus_z, __m128 a_coeff) +static inline SIMD_M128 linOutput(SIMD_M128 x_minus_x1_plus_z, SIMD_M128 a_coeff) { return M(a_coeff, x_minus_x1_plus_z); } -static inline __m128 nonlinOutput(__m128 x_minus_x1_plus_z, __m128 tanh_y, __m128 b_coeff) +static inline SIMD_M128 nonlinOutput(SIMD_M128 x_minus_x1_plus_z, SIMD_M128 tanh_y, + SIMD_M128 b_coeff) { return A(M(N(b_coeff), tanh_y), x_minus_x1_plus_z); } -static inline __m128 getDerivative(__m128 tanh_y, __m128 b_coeff) +static inline SIMD_M128 getDerivative(SIMD_M128 tanh_y, SIMD_M128 b_coeff) { const auto neg_one = F(-1.0f); return A(M(N(b_coeff), sech2_with_tanh(tanh_y)), neg_one); } -static inline __m128 getXDerivative() { return F(2.0f); } +static inline SIMD_M128 getXDerivative() { return F(2.0f); } -static inline __m128 process(__m128 x, __m128 x1, __m128 z, __m128 estimate, __m128 b_coeff, - __m128 a_coeff, float beta) +static inline SIMD_M128 process(SIMD_M128 x, SIMD_M128 x1, SIMD_M128 z, SIMD_M128 estimate, + SIMD_M128 b_coeff, SIMD_M128 a_coeff, float beta) { auto x_minus_x1_plus_z = A(S(x, x1), z); estimate = linOutput(x_minus_x1_plus_z, a_coeff); @@ -164,30 +166,31 @@ static inline __m128 process(__m128 x, __m128 x1, __m128 z, __m128 estimate, __m namespace OnePoleLPF_FB { -static inline __m128 linOutput(__m128 bx, __m128 z_minus_fb_plus_fb1, __m128 a_coeff) +static inline SIMD_M128 linOutput(SIMD_M128 bx, SIMD_M128 z_minus_fb_plus_fb1, SIMD_M128 a_coeff) { return M(a_coeff, A(bx, z_minus_fb_plus_fb1)); } -static inline __m128 nonlinOutput(__m128 tanh_x, __m128 tanh_y, __m128 z_minus_fb_plus_fb1, - __m128 b_coeff) +static inline SIMD_M128 nonlinOutput(SIMD_M128 tanh_x, SIMD_M128 tanh_y, + SIMD_M128 z_minus_fb_plus_fb1, SIMD_M128 b_coeff) { return A(M(b_coeff, S(tanh_x, tanh_y)), z_minus_fb_plus_fb1); } -static inline __m128 getDerivative(__m128 tanh_y, __m128 b_coeff) +static inline SIMD_M128 getDerivative(SIMD_M128 tanh_y, SIMD_M128 b_coeff) { return OnePoleLPF::getDerivative(tanh_y, b_coeff); } -static inline __m128 getXDerivative() +static inline SIMD_M128 getXDerivative() { const auto two = F(2.0f); return two; } -static inline __m128 process(__m128 tanh_x, __m128 z, __m128 fb, __m128 fb1, __m128 estimate, - __m128 b_coeff, __m128 a_coeff, __m128 bx) +static inline SIMD_M128 process(SIMD_M128 tanh_x, SIMD_M128 z, SIMD_M128 fb, SIMD_M128 fb1, + SIMD_M128 estimate, SIMD_M128 b_coeff, SIMD_M128 a_coeff, + SIMD_M128 bx) { auto z_minus_fb_plus_fb1 = A(S(z, fb), fb1); estimate = linOutput(bx, z_minus_fb_plus_fb1, a_coeff); @@ -204,31 +207,31 @@ static inline __m128 process(__m128 tanh_x, __m128 z, __m128 fb, __m128 fb1, __m namespace OnePoleHPF_FB { -static inline __m128 linOutput(__m128 x_minus_x1_plus_z, __m128 tanh_fb, __m128 a_coeff, - __m128 b_coeff) +static inline SIMD_M128 linOutput(SIMD_M128 x_minus_x1_plus_z, SIMD_M128 tanh_fb, SIMD_M128 a_coeff, + SIMD_M128 b_coeff) { return M(a_coeff, A(M(b_coeff, tanh_fb), x_minus_x1_plus_z)); } -static inline __m128 nonlinOutput(__m128 x_minus_x1_plus_z, __m128 tanh_y, __m128 tanh_fb, - __m128 b_coeff) +static inline SIMD_M128 nonlinOutput(SIMD_M128 x_minus_x1_plus_z, SIMD_M128 tanh_y, + SIMD_M128 tanh_fb, SIMD_M128 b_coeff) { return A(M(b_coeff, S(tanh_fb, tanh_y)), x_minus_x1_plus_z); } -static inline __m128 getDerivative(__m128 tanh_y, __m128 b_coeff) +static inline SIMD_M128 getDerivative(SIMD_M128 tanh_y, SIMD_M128 b_coeff) { const auto neg_one = F(-1.0f); return A(M(N(b_coeff), sech2_with_tanh(tanh_y)), neg_one); } -static inline __m128 getFBDerivative(__m128 tanh_fb, __m128 b_coeff) +static inline SIMD_M128 getFBDerivative(SIMD_M128 tanh_fb, SIMD_M128 b_coeff) { return M(b_coeff, sech2_with_tanh(tanh_fb)); } -static inline __m128 process(__m128 x_minus_x1_plus_z, __m128 tanh_fb, __m128 estimate, - __m128 b_coeff, __m128 a_coeff) +static inline SIMD_M128 process(SIMD_M128 x_minus_x1_plus_z, SIMD_M128 tanh_fb, SIMD_M128 estimate, + SIMD_M128 b_coeff, SIMD_M128 a_coeff) { estimate = linOutput(x_minus_x1_plus_z, tanh_fb, a_coeff, b_coeff); for (int i = 0; i < nIterStage; ++i) @@ -257,45 +260,46 @@ constexpr float one = 0.99f; constexpr float oneOverMult = one / mult; const float betaExpOverMult = beta_exp / mult; -static inline __m128 sign_ps(__m128 x) +static inline SIMD_M128 sign_ps(SIMD_M128 x) { - const __m128 zero = _mm_setzero_ps(); - const __m128 one = _mm_set1_ps(1.0f); - const __m128 neg_one = _mm_set1_ps(-1.0f); + const auto zero = SIMD_MM(setzero_ps)(); + const auto one = SIMD_MM(set1_ps)(1.0f); + const auto neg_one = SIMD_MM(set1_ps)(-1.0f); - __m128 positive = _mm_and_ps(_mm_cmpgt_ps(x, zero), one); - __m128 negative = _mm_and_ps(_mm_cmplt_ps(x, zero), neg_one); + auto positive = SIMD_MM(and_ps)(SIMD_MM(cmpgt_ps)(x, zero), one); + auto negative = SIMD_MM(and_ps)(SIMD_MM(cmplt_ps)(x, zero), neg_one); - return _mm_or_ps(positive, negative); + return SIMD_MM(or_ps)(positive, negative); } -static inline __m128 res_func_ps(__m128 x) +static inline SIMD_M128 res_func_ps(SIMD_M128 x) { x = M(F(mult), x); auto x_abs = basic_blocks::mechanics::abs_ps(x); - auto x_less_than = _mm_cmplt_ps(x_abs, F(max_val)); + auto x_less_than = SIMD_MM(cmplt_ps)(x_abs, F(max_val)); auto y = A(N(basic_blocks::dsp::fastexpSSE( M(F(beta_exp), N(basic_blocks::mechanics::abs_ps(A(x, F(c))))))), F(bias)); y = M(sign_ps(x), M(y, F(oneOverMult))); - return _mm_or_ps(_mm_and_ps(x_less_than, M(x, F(oneOverMult))), _mm_andnot_ps(x_less_than, y)); + return SIMD_MM(or_ps)(SIMD_MM(and_ps)(x_less_than, M(x, F(oneOverMult))), + SIMD_MM(andnot_ps)(x_less_than, y)); } -static inline __m128 res_deriv_ps(__m128 x) +static inline SIMD_M128 res_deriv_ps(SIMD_M128 x) { x = M(F(mult), x); auto x_abs = basic_blocks::mechanics::abs_ps(x); - auto x_less_than = _mm_cmplt_ps(x_abs, F(max_val)); + auto x_less_than = SIMD_MM(cmplt_ps)(x_abs, F(max_val)); auto y = A(basic_blocks::dsp::fastexpSSE( M(F(beta_exp), N(basic_blocks::mechanics::abs_ps(A(x, F(c)))))), F(betaExpOverMult)); - return _mm_or_ps(_mm_and_ps(x_less_than, F(one)), _mm_andnot_ps(x_less_than, y)); + return SIMD_MM(or_ps)(SIMD_MM(and_ps)(x_less_than, F(one)), SIMD_MM(andnot_ps)(x_less_than, y)); } } // namespace ResWaveshaper @@ -354,7 +358,8 @@ void makeCoefficients(FilterCoefficientMaker *cm, float freq, fl cm->FromDirect(C); } -template inline __m128 process(QuadFilterUnitState *__restrict f, __m128 in) +template +inline SIMD_M128 process(QuadFilterUnitState *__restrict f, SIMD_M128 in) { // input gain in = M(F(in_gain), in); @@ -389,7 +394,7 @@ template inline __m128 process(QuadFilterUnitState *__re const auto k_ps = f->C[thr_k]; // define local variables - __m128 tanh_x0, tanh_x1, tanh_x2, tanh_fb, f0_deriv, f1_deriv, f2_deriv, bx, hpf_in; + SIMD_M128 tanh_x0, tanh_x1, tanh_x2, tanh_fb, f0_deriv, f1_deriv, f2_deriv, bx, hpf_in; switch (mode) { case 0: // lowpass diff --git a/include/sst/filters/VintageLadders.h b/include/sst/filters/VintageLadders.h index 0f92634..d7240bb 100644 --- a/include/sst/filters/VintageLadders.h +++ b/include/sst/filters/VintageLadders.h @@ -140,16 +140,16 @@ inline void makeCoefficients(FilterCoefficientMaker *cm, float f cm->FromDirect(lc); } -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) -inline __m128 clip(__m128 value, __m128 _saturation, __m128 _saturationinverse) +inline SIMD_M128 clip(SIMD_M128 value, SIMD_M128 _saturation, SIMD_M128 _saturationinverse) { - const __m128 minusone = F(-1), one = F(1), onethird = F(1.f / 3.f); + const auto minusone = F(-1), one = F(1), onethird = F(1.f / 3.f); auto vtsi = M(value, _saturationinverse); - auto v2 = _mm_min_ps(one, _mm_max_ps(minusone, vtsi)); + auto v2 = SIMD_MM(min_ps)(one, SIMD_MM(max_ps)(minusone, vtsi)); auto v23 = M(v2, M(v2, v2)); auto vkern = S(v2, M(onethird, v23)); auto res = M(_saturation, vkern); @@ -157,9 +157,9 @@ inline __m128 clip(__m128 value, __m128 _saturation, __m128 _saturationinverse) return res; } -inline void calculateDerivatives(__m128 input, __m128 *dstate, __m128 *state, __m128 cutoff, - __m128 resonance, __m128 _saturation, __m128 _saturationInv, - __m128 gComp) +inline void calculateDerivatives(SIMD_M128 input, SIMD_M128 *dstate, SIMD_M128 *state, + SIMD_M128 cutoff, SIMD_M128 resonance, SIMD_M128 _saturation, + SIMD_M128 _saturationInv, SIMD_M128 gComp) { auto satstate0 = clip(state[0], _saturation, _saturationInv); auto satstate1 = clip(state[1], _saturation, _saturationInv); @@ -182,20 +182,20 @@ inline void calculateDerivatives(__m128 input, __m128 *dstate, __m128 *state, __ dstate[3] = M(cutoff, S(satstate2, clip(state[3], _saturation, _saturationInv))); } -inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) +inline SIMD_M128 process(QuadFilterUnitState *__restrict f, SIMD_M128 input) { int i; - __m128 deriv1[4], deriv2[4], deriv3[4], deriv4[4], tempState[4]; + SIMD_M128 deriv1[4], deriv2[4], deriv3[4], deriv4[4], tempState[4]; - __m128 *state = &(f->R[0]); + auto *state = &(f->R[0]); auto stepSize = F(f->sampleRateInv * extraOversampleInv), halfStepSize = F(0.5f * f->sampleRateInv * extraOversampleInv); - const __m128 oneoversix = F(1.f / 6.f), two = F(2.f), dFac = F(extraOversampleInv), - sat = F(saturation), satInv = F(saturationInverse); + const auto oneoversix = F(1.f / 6.f), two = F(2.f), dFac = F(extraOversampleInv), + sat = F(saturation), satInv = F(saturationInverse); - __m128 outputOS[extraOversample]; + SIMD_M128 outputOS[extraOversample]; for (int osi = 0; osi < extraOversample; ++osi) { @@ -204,9 +204,9 @@ inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) f->C[j] = A(f->C[j], M(dFac, f->dC[j])); } - __m128 cutoff = f->C[rkm_cutoff]; - __m128 resonance = f->C[rkm_reso]; - __m128 gComp = f->C[rkm_gComp]; + auto cutoff = f->C[rkm_cutoff]; + auto resonance = f->C[rkm_reso]; + auto gComp = f->C[rkm_gComp]; calculateDerivatives(input, deriv1, state, cutoff, resonance, sat, satInv, gComp); for (i = 0; i < 4; i++) @@ -240,7 +240,7 @@ inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) outputOS[osi] = state[3]; // Zero stuffing - input = _mm_setzero_ps(); + input = SIMD_MM(setzero_ps)(); } /* @@ -254,10 +254,10 @@ inline __m128 process(QuadFilterUnitState *__restrict f, __m128 input) ** Anyway: (2 * sin(pi * x) * sin((pi * x) / 2)) / (pi^2 * x^2), for points -1.5, -1, 0.5, and 0 ** */ - auto ov = _mm_setzero_ps(); - __m128 windowFactors[4]; + auto ov = SIMD_MM(setzero_ps)(); + SIMD_M128 windowFactors[4]; windowFactors[0] = F(-0.0636844f); - windowFactors[1] = _mm_setzero_ps(); + windowFactors[1] = SIMD_MM(setzero_ps)(); windowFactors[2] = F(0.57315917f); windowFactors[3] = F(1); @@ -347,20 +347,19 @@ inline void makeCoefficients(FilterCoefficientMaker *cm, float f cm->FromDirect(lC); } -inline __m128 process(QuadFilterUnitState *__restrict f, __m128 in) +inline SIMD_M128 process(QuadFilterUnitState *__restrict f, SIMD_M128 in) { -#define F(a) _mm_set_ps1(a) -#define M(a, b) _mm_mul_ps(a, b) -#define A(a, b) _mm_add_ps(a, b) -#define S(a, b) _mm_sub_ps(a, b) +#define F(a) SIMD_MM(set_ps1)(a) +#define M(a, b) SIMD_MM(mul_ps)(a, b) +#define A(a, b) SIMD_MM(add_ps)(a, b) +#define S(a, b) SIMD_MM(sub_ps)(a, b) - const __m128 dFac = F(0.5f), half = F(0.5f), one = F(1.0f), four = F(4.0f), m18730 = F(1.8730f), - m04955 = F(0.4995f), mneg06490 = F(-0.6490f), m09988 = F(0.9988f), - mneg39364 = F(-3.9364f), m18409 = F(1.8409f), m09968 = F(0.9968f), - thermal = F(1.f / 70.f), oneoverthermal = F(70.0f), - neg2pi = F(-2.0f * (float)M_PI); + const auto dFac = F(0.5f), half = F(0.5f), one = F(1.0f), four = F(4.0f), m18730 = F(1.8730f), + m04955 = F(0.4995f), mneg06490 = F(-0.6490f), m09988 = F(0.9988f), + mneg39364 = F(-3.9364f), m18409 = F(1.8409f), m09968 = F(0.9968f), + thermal = F(1.f / 70.f), oneoverthermal = F(70.0f), neg2pi = F(-2.0f * (float)M_PI); - __m128 outputOS[2]; + SIMD_M128 outputOS[2]; for (int j = 0; j < 2; ++j) { @@ -383,12 +382,12 @@ inline __m128 process(QuadFilterUnitState *__restrict f, __m128 in) for (int k = 0; k < n_hcoeffs; ++k) { - f->C[k] = _mm_add_ps(f->C[k], _mm_mul_ps(dFac, f->dC[k])); + f->C[k] = SIMD_MM(add_ps)(f->C[k], SIMD_MM(mul_ps)(dFac, f->dC[k])); } // float input = in - resQuad * ( delay[5] - gComp * in ) // Model as an impulse stream - auto input = - _mm_sub_ps(in, _mm_mul_ps(resquad, S(f->R[h_delay + 5], M(f->C[h_gComp], in)))); + auto input = SIMD_MM(sub_ps)( + in, SIMD_MM(mul_ps)(resquad, S(f->R[h_delay + 5], M(f->C[h_gComp], in)))); // delay[0] = stage[0] = delay[0] + tune * (tanh(input * thermal) - stageTanh[0]); f->R[h_stage + 0] = @@ -416,7 +415,7 @@ inline __m128 process(QuadFilterUnitState *__restrict f, __m128 in) // 0.5 sample delay for phase compensation // delay[5] = (stage[3] + delay[4]) * 0.5; - f->R[h_delay + 5] = M(_mm_set_ps1(0.5), A(f->R[h_stage + 3], f->R[h_delay + 4])); + f->R[h_delay + 5] = M(SIMD_MM(set_ps1)(0.5), A(f->R[h_stage + 3], f->R[h_delay + 4])); // delay[4] = stage[3]; f->R[h_delay + 4] = f->R[h_stage + 3]; diff --git a/include/sst/utilities/globals.h b/include/sst/utilities/globals.h index b9a360a..53dbcc3 100644 --- a/include/sst/utilities/globals.h +++ b/include/sst/utilities/globals.h @@ -21,28 +21,7 @@ #include #include // needed for memset/memcpy on GCC -#if MAC - -#if defined(__x86_64__) -#else -#define ARM_NEON 1 -#endif - -#endif - -#if LINUX -#if defined(__aarch64__) || defined(__arm__) -#define ARM_NEON 1 -#endif -#endif - -#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -#include -#else -#define SIMDE_ENABLE_NATIVE_ALIASES -#include "simde/x86/sse2.h" -#endif +#include "sst/basic-blocks/simd/setup.h" namespace sst::filters::utilities { diff --git a/include/sst/utilities/shared.h b/include/sst/utilities/shared.h index 22dd02b..d0e9d72 100644 --- a/include/sst/utilities/shared.h +++ b/include/sst/utilities/shared.h @@ -26,15 +26,15 @@ inline float i2f_binary_cast(int i) return *f; } -const __m128 m128_mask_signbit = _mm_set1_ps(i2f_binary_cast(0x80000000)); -const __m128 m128_mask_absval = _mm_set1_ps(i2f_binary_cast(0x7fffffff)); -const __m128 m128_zero = _mm_set1_ps(0.0f); -const __m128 m128_half = _mm_set1_ps(0.5f); -const __m128 m128_one = _mm_set1_ps(1.0f); -const __m128 m128_two = _mm_set1_ps(2.0f); -const __m128 m128_four = _mm_set1_ps(4.0f); -const __m128 m128_1234 = _mm_set_ps(1.f, 2.f, 3.f, 4.f); -const __m128 m128_0123 = _mm_set_ps(0.f, 1.f, 2.f, 3.f); +const auto m128_mask_signbit = SIMD_MM(set1_ps)(i2f_binary_cast(0x80000000)); +const auto m128_mask_absval = SIMD_MM(set1_ps)(i2f_binary_cast(0x7fffffff)); +const auto m128_zero = SIMD_MM(set1_ps)(0.0f); +const auto m128_half = SIMD_MM(set1_ps)(0.5f); +const auto m128_one = SIMD_MM(set1_ps)(1.0f); +const auto m128_two = SIMD_MM(set1_ps)(2.0f); +const auto m128_four = SIMD_MM(set1_ps)(4.0f); +const auto m128_1234 = SIMD_MM(set_ps)(1.f, 2.f, 3.f, 4.f); +const auto m128_0123 = SIMD_MM(set_ps)(0.f, 1.f, 2.f, 3.f); } // namespace sst::filters::utilities diff --git a/scripts/fix_file_comments.pl b/scripts/fix_file_comments.pl index baca832..d95a964 100644 --- a/scripts/fix_file_comments.pl +++ b/scripts/fix_file_comments.pl @@ -11,11 +11,11 @@ 'include' ); -ind( +find( { wanted => \&findfiles, }, - 'include0extras' + 'include-extras' ); diff --git a/tests/BasicFiltersTest.cpp b/tests/BasicFiltersTest.cpp index 1d1955c..e1f7ed4 100644 --- a/tests/BasicFiltersTest.cpp +++ b/tests/BasicFiltersTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("Basic Filters") diff --git a/tests/BiquadTest.cpp b/tests/BiquadTest.cpp index 882c205..089e234 100644 --- a/tests/BiquadTest.cpp +++ b/tests/BiquadTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "sst/filters/BiquadFilter.h" #include "TestUtils.h" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c7bf43d..d758b91 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,4 @@ +message(STATUS "Add sst-filters-tests") add_executable(sst-filters-tests) target_include_directories(sst-filters-tests PRIVATE . ../libs) target_link_libraries(sst-filters-tests PRIVATE ${PROJECT_NAME} simde) diff --git a/tests/CutoffWarpTest.cpp b/tests/CutoffWarpTest.cpp index e78f43d..616d49b 100644 --- a/tests/CutoffWarpTest.cpp +++ b/tests/CutoffWarpTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("Cutoff Warp") diff --git a/tests/CytomicSVFTests.cpp b/tests/CytomicSVFTests.cpp index b26c2ad..bef926b 100644 --- a/tests/CytomicSVFTests.cpp +++ b/tests/CytomicSVFTests.cpp @@ -1,6 +1,17 @@ -// -// Created by Paul Walker on 4/7/24. -// +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "catch2/catch2.hpp" #include "sst/filters/CytomicSVF.h" diff --git a/tests/DiodeLadderTest.cpp b/tests/DiodeLadderTest.cpp index f09e0f3..bbae286 100644 --- a/tests/DiodeLadderTest.cpp +++ b/tests/DiodeLadderTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("Diode Ladder") diff --git a/tests/HalfRateTest.cpp b/tests/HalfRateTest.cpp index 984f5dd..84f0274 100644 --- a/tests/HalfRateTest.cpp +++ b/tests/HalfRateTest.cpp @@ -1,5 +1,18 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "sst/filters/HalfRateFilter.h" - #include "TestUtils.h" template diff --git a/tests/K35FilterTest.cpp b/tests/K35FilterTest.cpp index d336218..8920714 100644 --- a/tests/K35FilterTest.cpp +++ b/tests/K35FilterTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("K35 Filter") diff --git a/tests/OBXDFilterTest.cpp b/tests/OBXDFilterTest.cpp index 3f8b11d..8544a56 100644 --- a/tests/OBXDFilterTest.cpp +++ b/tests/OBXDFilterTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("OBXD Filter") diff --git a/tests/ResonanceWarpTest.cpp b/tests/ResonanceWarpTest.cpp index 968e9a3..7f39f01 100644 --- a/tests/ResonanceWarpTest.cpp +++ b/tests/ResonanceWarpTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" diff --git a/tests/TestUtils.h b/tests/TestUtils.h index 477a73b..976fdab 100644 --- a/tests/TestUtils.h +++ b/tests/TestUtils.h @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #ifndef TESTS_TESTUTILS_H #define TESTS_TESTUTILS_H @@ -27,17 +41,18 @@ inline float runSine(sst::filters::QuadFilterUnitState &filterState, sst::filters::FilterUnitQFPtr &filterUnitPtr, float testFreq, int numSamples) { // reset filter state - std::fill(filterState.R, &filterState.R[sst::filters::n_filter_registers], _mm_setzero_ps()); + std::fill(filterState.R, &filterState.R[sst::filters::n_filter_registers], + SIMD_MM(setzero_ps)()); std::vector y(numSamples, 0.0f); for (int i = 0; i < numSamples; ++i) { auto x = (float)std::sin(2.0 * M_PI * (double)i * testFreq / sampleRate); - auto yVec = filterUnitPtr(&filterState, _mm_set_ps1(x)); + auto yVec = filterUnitPtr(&filterState, SIMD_MM(set_ps1)(x)); float yArr alignas(16)[4]; - _mm_store_ps(yArr, yVec); + SIMD_MM(store_ps)(yArr, yVec); y[i] = yArr[0]; } diff --git a/tests/TriPoleFilterTest.cpp b/tests/TriPoleFilterTest.cpp index e31be89..08967e6 100644 --- a/tests/TriPoleFilterTest.cpp +++ b/tests/TriPoleFilterTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("TriPole Filter") diff --git a/tests/VintageLaddersTest.cpp b/tests/VintageLaddersTest.cpp index ee9cf54..954817b 100644 --- a/tests/VintageLaddersTest.cpp +++ b/tests/VintageLaddersTest.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #include "TestUtils.h" TEST_CASE("Vintage Ladders") diff --git a/tests/tests.cpp b/tests/tests.cpp index e70a825..98c8424 100644 --- a/tests/tests.cpp +++ b/tests/tests.cpp @@ -1,3 +1,17 @@ +/* + * sst-filters - A header-only collection of SIMD filter + * implementations by the Surge Synth Team + * + * Copyright 2019-2024, various authors, as described in the GitHub + * transaction log. + * + * sst-filters is released under the Gnu General Public Licens + * version 3 or later. Some of the filters in this package + * originated in the version of Surge open sourced in 2018. + * + * All source in sst-filters available at + * https://github.com/surge-synthesizer/sst-filters + */ #define CATCH_CONFIG_RUNNER #include "catch2/catch2.hpp"