diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 06792aa4..dbfd752f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -21,30 +21,6 @@ format_check: needs: [] script: - echo Coming soon! - -build_release_cauldron: - stage: build - script: - - 'mkdir build' - - 'cd build' - - 'cmake -A x64 .. -DBUILD_TYPE=CAULDRON' - - 'cmake --build . --config ReleaseDX12 %CMAKE_BUILD_DRIVER%' - - 'cmake --build . --config ReleaseVK %CMAKE_BUILD_DRIVER%' - artifacts: - paths: - - ./bin - -build_debug_cauldron: - stage: build - script: - - 'mkdir build' - - 'cd build' - - 'cmake -A x64 .. -DBUILD_TYPE=CAULDRON' - - 'cmake --build . --config DebugDX12 %CMAKE_BUILD_DRIVER%' - - 'cmake --build . --config DebugVK %CMAKE_BUILD_DRIVER%' - artifacts: - paths: - - ./bin build_release_sdk_dx12: stage: build @@ -57,6 +33,7 @@ build_release_sdk_dx12: artifacts: paths: - ./sdk/bin + expire_in: 2 weeks build_debug_sdk_dx12: stage: build @@ -69,6 +46,7 @@ build_debug_sdk_dx12: artifacts: paths: - ./sdk/bin + expire_in: 2 weeks build_release_sdk_vk: stage: build @@ -81,6 +59,7 @@ build_release_sdk_vk: artifacts: paths: - ./sdk/bin + expire_in: 2 weeks build_debug_sdk_vk: stage: build @@ -93,6 +72,7 @@ build_debug_sdk_vk: artifacts: paths: - ./sdk/bin + expire_in: 2 weeks build_debug_api_dll_dx12: stage: build @@ -105,6 +85,7 @@ build_debug_api_dll_dx12: artifacts: paths: - ./ffx-api/bin + expire_in: 2 weeks build_release_api_dll_dx12: stage: build @@ -130,6 +111,7 @@ build_debug_api_dll_vk: artifacts: paths: - ./ffx-api/bin + expire_in: 2 weeks build_release_api_dll_vk: stage: build @@ -157,6 +139,7 @@ build_release_samples_dx12: artifacts: paths: - ./bin + expire_in: 2 weeks build_release_samples_vk: stage: build @@ -171,6 +154,7 @@ build_release_samples_vk: artifacts: paths: - ./bin + expire_in: 2 weeks build_debug_samples_dx12: stage: build @@ -185,6 +169,7 @@ build_debug_samples_dx12: artifacts: paths: - ./bin + expire_in: 2 weeks build_debug_samples_vk: stage: build @@ -199,6 +184,7 @@ build_debug_samples_vk: artifacts: paths: - ./bin + expire_in: 2 weeks package_release_samples_pc: stage: package @@ -223,6 +209,6 @@ package_release_samples_pc: - ./*.bat - ./CMakeLists.txt - ./common.cmake - - ./LICENSE.txt + - ./sdk/LICENSE.txt - ./readme.md - ./sample.cmake \ No newline at end of file diff --git a/.gitlab/issue_templates/Code quality issue.md b/.gitlab/issue_templates/Code quality issue.md deleted file mode 100644 index a2562ed8..00000000 --- a/.gitlab/issue_templates/Code quality issue.md +++ /dev/null @@ -1,9 +0,0 @@ -## FidelityFX SDK Code Quality -(Which SDK sample project or component has a code quality issue) - -## Code quality issue description -(Describe in as much detail what constitutes the quality issue, including as much descriptive information as possible) - -(This issue category is primarily for code review issues in sample and component code, but primarily for issues of structure, performance, and future maintainability. Code linting issues are acceptable only while automatic code linting isn't a production feature. Please consult the coding guidelines to help you assess code quality issues) - -/label ~code-quality \ No newline at end of file diff --git a/.gitlab/issue_templates/Compatility issue.md b/.gitlab/issue_templates/Compatility issue.md deleted file mode 100644 index 698e1af8..00000000 --- a/.gitlab/issue_templates/Compatility issue.md +++ /dev/null @@ -1,29 +0,0 @@ -## FidelityFX SDK Compatibility Issue -(Which SDK sample project or component has a compatibility issue) - -## Compatibility issue description -(Describe in as much detail what constitutes the compatibility issue, including as much descriptive information as possible) - -(This issue category is primarily for compatibility issues in samples, where the sample runs correctly on one combination of CPU and GPU, but a different CPU or GPU causes issues. Please list working combinations wherever possible) - -## What built the sample project or component - -- [ ] code was built locally -- [ ] code was built by CI - -(If the code was built by CI, please insert a link to the CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: -- CPU: -- GPU: -- GPU driver version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose and reproduce the compatibility issue) - -/label ~compatibility-issue \ No newline at end of file diff --git a/.gitlab/issue_templates/Execution failure.md b/.gitlab/issue_templates/Execution failure.md deleted file mode 100644 index b518b795..00000000 --- a/.gitlab/issue_templates/Execution failure.md +++ /dev/null @@ -1,27 +0,0 @@ -## FidelityFX SDK Execution Failure -(Which SDK sample project or component failed to execute) - -## Execution failure description -(Describe in as much detail what constitutes the execution failure, including the text of error dialogs and any relevant extra information) - -## What built the sample project or component - -- [ ] code was built locally -- [ ] code was built by CI - -(If the code was built by CI, please insert a link to the CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: -- CPU: -- GPU: -- GPU driver version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose the execution failure) - -/label ~execution-failure ~qa-report \ No newline at end of file diff --git a/.gitlab/issue_templates/Functional issue.md b/.gitlab/issue_templates/Functional issue.md deleted file mode 100644 index e458ee83..00000000 --- a/.gitlab/issue_templates/Functional issue.md +++ /dev/null @@ -1,29 +0,0 @@ -## FidelityFX SDK Functional Issue -(Which SDK sample project or component has a functional issue) - -## Functional issue description -(Describe in as much detail what constitutes the functional issue, including as much descriptive information as possible) - -(This issue category is primarily for UI/UX failures in samples, such as UI components not triggering the correct behaviours) - -## What built the sample project or component - -- [ ] code was built locally -- [ ] code was built by CI - -(If the code was built by CI, please insert a link to the CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: -- CPU: -- GPU: -- GPU driver version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose and reproduce the functional issue) - -/label ~functional-issue \ No newline at end of file diff --git a/.gitlab/issue_templates/Non-CI build failure.md b/.gitlab/issue_templates/Non-CI build failure.md deleted file mode 100644 index f752d2fd..00000000 --- a/.gitlab/issue_templates/Non-CI build failure.md +++ /dev/null @@ -1,21 +0,0 @@ -## FidelityFX SDK non-CI Build Failure -(Which SDK sample project or component failed to build outside of CI) - -## Build failure output -(Paste the output from the Output window in Visual Studio. Please use code blocks (```) to format the output, to make it easier to read) - -## Build information - -(If the code is successfully built by CI, please insert a link to a passing CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose the failure) - -/label ~non-ci-build-failure ~qa-report \ No newline at end of file diff --git a/.gitlab/issue_templates/Performance issue.md b/.gitlab/issue_templates/Performance issue.md deleted file mode 100644 index 799df6f5..00000000 --- a/.gitlab/issue_templates/Performance issue.md +++ /dev/null @@ -1,27 +0,0 @@ -## FidelityFX SDK Performance Issue -(Which SDK sample project or component has a performance issue) - -## Performance issue description -(Describe in as much detail what constitutes the performance issue, including as much descriptive information as possible, including information about prior performance runs or expectations where applicable) - -## What built the sample project or component - -- [ ] code was built locally -- [ ] code was built by CI - -(If the code was built by CI, please insert a link to the CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: -- CPU: -- GPU: -- GPU driver version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose and reproduce the performance issue) - -/label ~performance-issue \ No newline at end of file diff --git a/.gitlab/issue_templates/UX issue.md b/.gitlab/issue_templates/UX issue.md deleted file mode 100644 index fe07c55c..00000000 --- a/.gitlab/issue_templates/UX issue.md +++ /dev/null @@ -1,29 +0,0 @@ -## FidelityFX SDK UX Issue -(Which SDK sample project or component has a UX issue) - -## UX issue description -(Describe in as much detail what constitutes the UX issue, including as much descriptive information as possible) - -(This issue category is primarily for UI/UX usabity issues in samples, such as UI being hard to use, read or understand, despite it operating correctly) - -## What built the sample project or component - -- [ ] code was built locally -- [ ] code was built by CI - -(If the code was built by CI, please insert a link to the CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: -- CPU: -- GPU: -- GPU driver version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose and reproduce the UX issue) - -/label ~ux-issue \ No newline at end of file diff --git a/.gitlab/issue_templates/Visual issue.md b/.gitlab/issue_templates/Visual issue.md deleted file mode 100644 index b3ed99cc..00000000 --- a/.gitlab/issue_templates/Visual issue.md +++ /dev/null @@ -1,27 +0,0 @@ -## FidelityFX SDK Visual Issue -(Which SDK sample project or component has a visual issue) - -## Visual issue description -(Describe in as much detail what constitutes the visual issue, including as much descriptive information as possible) - -## What built the sample project or component - -- [ ] code was built locally -- [ ] code was built by CI - -(If the code was built by CI, please insert a link to the CI job here) - -## System setup -- Windows version: -- Windows SDK version: -- Vulkan SDK version: -- CMake version: -- Visual Studio version: -- CPU: -- GPU: -- GPU driver version: - -## Relevant logs or screenshots -(Paste any relevant logs or screenshots to help diagnose the visual issue) - -/label ~visual-issue ~qa-report \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..3712e34a --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +This file is part of the FidelityFX SDK. + +Copyright (C) 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/PrebuiltSignedDLL/amd_fidelityfx_dx12.dll b/PrebuiltSignedDLL/amd_fidelityfx_dx12.dll index 437c1abf..5e381643 100644 Binary files a/PrebuiltSignedDLL/amd_fidelityfx_dx12.dll and b/PrebuiltSignedDLL/amd_fidelityfx_dx12.dll differ diff --git a/PrebuiltSignedDLL/amd_fidelityfx_vk.dll b/PrebuiltSignedDLL/amd_fidelityfx_vk.dll index 78554668..f25dad87 100644 Binary files a/PrebuiltSignedDLL/amd_fidelityfx_vk.dll and b/PrebuiltSignedDLL/amd_fidelityfx_vk.dll differ diff --git a/PrebuiltSignedDLL/amd_fidelityfx_vk.lib b/PrebuiltSignedDLL/amd_fidelityfx_vk.lib index 63d283d0..6c45a503 100644 Binary files a/PrebuiltSignedDLL/amd_fidelityfx_vk.lib and b/PrebuiltSignedDLL/amd_fidelityfx_vk.lib differ diff --git a/docs/getting-started/building-samples.md b/docs/getting-started/building-samples.md index 84ce4de2..a8cf4c3d 100644 --- a/docs/getting-started/building-samples.md +++ b/docs/getting-started/building-samples.md @@ -7,7 +7,7 @@ To build the samples in the AMD FidelityFX SDK: 1. Download and install the following software developer tool minimum versions: - - [CMake 3.17](https://cmake.org/download/) + - [CMake 3.17 - 3.30](https://cmake.org/download/) - [Visual Studio 2019](https://visualstudio.microsoft.com/downloads/) - [Windows 10 SDK 10.0.18362.0](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk) - [Vulkan SDK 1.3.239](https://vulkan.lunarg.com/) @@ -15,12 +15,12 @@ To build the samples in the AMD FidelityFX SDK: 2. Generate Visual Studio solution: ```bash - > \BuildSamplesSolution.bat + > \BuildSamplesSolution[DX12/VK].bat ``` - The batch file will inquire if the solution should build the SDK as a DLL (builds as a statically linked library if no ('n') is provided) and which samples should be included. Please use '1' to build a solution with all samples included or provide the list of samples to be included (using the corresponding number of the samples with spaces in between). - - This will generate a `build\` directory where you will find the solution for the SDK samples (`FidelityFX SDK Samples.sln`). + The batch file will inquire if the solution should build the SDK as a DLL (builds as a statically linked library if no (`n`) is provided) and which samples should be included. + Please use `1` to build a solution with all samples included or provide the list of samples to be included (using the corresponding number of the samples with spaces in between). + This will generate a `build\` directory where you will find the solution for the SDK samples (`FidelityFX SDK [DX12/VK] Samples.sln`).

Building and running in Visual Studio

diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md index 1a230c8d..9b3dc54a 100644 --- a/docs/getting-started/index.md +++ b/docs/getting-started/index.md @@ -4,22 +4,22 @@ The FidelityFX SDK is a collection of heavily optimized, open source effects (shader and runtime code) that can be used by developers to improve their DirectX®12 or Vulkan® applications. The FidelityFX SDK includes a number of effects: -- [Combined Adaptive Compute Ambient Occlusion 1.3](../techniques/combined-adaptive-compute-ambient-occlusion.md) -- [Contrast Adaptive Sharpening 1.1](../techniques/contrast-adaptive-sharpening.md) -- [Denoiser 1.2](../techniques/denoiser.md) -- [Classifier 1.0](../techniques/classifier.md) -- [Luminance Preserving Mapper 1.3](../techniques/luminance-preserving-mapper.md) -- [Parallel Sort 1.2](../techniques/parallel-sort.md) -- [Single Pass Downsampler 2.1](../techniques/single-pass-downsampler.md) -- [Stochastic Screen-Space Reflections 1.4](../techniques/stochastic-screen-space-reflections.md) -- [Super Resolution 1.1](../techniques/super-resolution-spatial.md) -- [Super Resolution 2.2](../techniques/super-resolution-temporal.md) -- [Super Resolution 3.0](../techniques/super-resolution-interpolation.md) -- [Super Resolution 3.0 Upscaler](../techniques/super-resolution-upscaler.md) -- [Variable Shading 1.1](../techniques/variable-shading.md) -- [Blur 1.0](../techniques/blur.md) -- [Depth of Field 1.0](../techniques/depth-of-field.md) -- [Lens 1.0](../techniques/lens.md) +- [Combined Adaptive Compute Ambient Occlusion 1.4](../techniques/combined-adaptive-compute-ambient-occlusion.md) +- [Contrast Adaptive Sharpening 1.2](../techniques/contrast-adaptive-sharpening.md) +- [Denoiser 1.3](../techniques/denoiser.md) +- [Classifier 1.3](../techniques/classifier.md) +- [Luminance Preserving Mapper 1.4](../techniques/luminance-preserving-mapper.md) +- [Parallel Sort 1.3](../techniques/parallel-sort.md) +- [Single Pass Downsampler 2.2](../techniques/single-pass-downsampler.md) +- [Stochastic Screen-Space Reflections 1.5](../techniques/stochastic-screen-space-reflections.md) +- [Super Resolution 1.2](../techniques/super-resolution-spatial.md) +- [Super Resolution 2.3.2](../techniques/super-resolution-temporal.md) +- [Super Resolution 3.1.3](../techniques/super-resolution-interpolation.md) +- [Super Resolution 3.1.3 Upscaler](../techniques/super-resolution-upscaler.md) +- [Variable Shading 1.2](../techniques/variable-shading.md) +- [Blur 1.1](../techniques/blur.md) +- [Depth of Field 1.1](../techniques/depth-of-field.md) +- [Lens 1.1](../techniques/lens.md) - [Breadcrumbs 1.0](../techniques/breadcrumbs.md)

Supported ecosystems

@@ -41,7 +41,7 @@ All samples are written in C++, and use the [FidelityFX Cauldron Framework](../. AMD FidelityFX SDK is open source, and distributed under the MIT license. -For more information on the license terms please refer to the [license](license.md). +For more information on the license terms please refer to the [license](../license.md).

Support

diff --git a/docs/index.md b/docs/index.md index 7093b7ad..4ab73abd 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -

Welcome to the AMD FidelityFX™ SDK 1.1.2

+

Welcome to the AMD FidelityFX™ SDK 1.1.3

![alt text](/docs/media/fidelityfxsdk-logo-rescaled.png) @@ -18,10 +18,10 @@ The FidelityFX SDK includes: | [Stochastic Screen-Space Reflections](/docs/techniques/stochastic-screen-space-reflections.md) 1.5 | [SSSR sample](/docs/samples/stochastic-screen-space-reflections.md) | [FidelityFX Screen Space Reflections](https://gpuopen.com/fidelityfx-sssr/) | Provides high-fidelity screen-spaced reflections in your scene, without a hefty performance price tag. | | [Super Resolution (Spatial)](/docs/techniques/super-resolution-spatial.md) 1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution](https://gpuopen.com/fidelityfx-superresolution/) | Offers a spatial single-frame solution for producing higher resolution frames from lower resolution inputs. | | [Super Resolution (Temporal)](/docs/techniques/super-resolution-temporal.md) 2.3.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 2](https://gpuopen.com/fidelityfx-superresolution-2/) | Offers both spatial single-frame and temporal multi-frame solutions for producing high resolution frames from lower resolution inputs. | -| [Super Resolution 3](/docs/techniques/super-resolution-interpolation.md) 3.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames in combination with our temporal multi-frame solution for producing high resolution frames from lower resolution inputs. | -| [Super Resolution (Upscaler)](/docs/techniques/super-resolution-upscaler.md) 3.1.2 | [Super Resolution sample](/docs.samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers temporal multi-frame solutions for producing high resolution frames from lower resolution inputs. | -| [Frame Interpolation](techniques/frame-interpolation.md) 1.1.1 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames from multiple real input frames, and multiple sources of motion vector data. | -| [Frame Interpolation SwapChain](/docs/techniques/frame-interpolation-swap-chain.md) 1.1.1 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | A replacement DXGI Swapchain implementation for DX12 which allows for additional frames to be presented along with real game frames, with relevant frame pacing. | +| [Super Resolution 3](/docs/techniques/super-resolution-interpolation.md) 3.1.3 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames in combination with our temporal multi-frame solution for producing high resolution frames from lower resolution inputs. | +| [Super Resolution (Upscaler)](/docs/techniques/super-resolution-upscaler.md) 3.1.3 | [Super Resolution sample](/docs.samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers temporal multi-frame solutions for producing high resolution frames from lower resolution inputs. | +| [Frame Interpolation](techniques/frame-interpolation.md) 1.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames from multiple real input frames, and multiple sources of motion vector data. | +| [Frame Interpolation SwapChain](/docs/techniques/frame-interpolation-swap-chain.md) 1.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | A replacement DXGI Swapchain implementation for DX12 which allows for additional frames to be presented along with real game frames, with relevant frame pacing. | | [Optical Flow](/docs/techniques/optical-flow.md) 1.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers a motion-estimation algorithm which is useful for generating block-based motion vectors from temporal image inputs. | | [Variable Shading](/docs/techniques/variable-shading.md) 1.2 | [Variable Shading sample](/docs/samples/variable-shading.md) | [FidelityFX Variable Shading](https://gpuopen.com/fidelityfx-variable-shading/) | Helps you to drive Variable Rate Shading hardware introduced in RDNA2-based and contemporary GPUs, by analyzing the luminance of pixels in a tile to determine where the shading rate can be lowered to increase performance. | | [Blur](/docs/samples/blur.md) 1.1 | [Blur sample](/docs/samples/blur.md) | [FidelityFX Blur](https://gpuopen.com/fidelityfx-blur/) | A library of highly optimized functions which perform common blurring operations such as Gaussian blur, radial blurs, and others. | @@ -36,7 +36,7 @@ The FidelityFX SDK includes:

Further information

- [What's new in AMD FidelityFX SDK](/docs/whats-new/index.md) - - [FidelityFX SDK 1.1.2](/docs/whats-new/index.md) + - [FidelityFX SDK 1.1.2](/docs/whats-new/version_1_1_2.md) - [FidelityFX SDK 1.1.1](/docs/whats-new/version_1_1_1.md) - [FidelityFX SDK 1.1](/docs/whats-new/version_1_1.md) - [FidelityFX SDK 1.0](/docs/whats-new/version_1_0.md) @@ -50,7 +50,7 @@ The FidelityFX SDK includes: - [Tools](/docs/tools/index.md) - [Shader Precompiler](/docs/tools/ffx-sc.md) - - [FidelityFX SDK Media Delivery System](/docs/media-delivery.md) + - [FidelityFX SDK Media Delivery System](/docs/tools/media-delivery.md)

Known issues

@@ -65,12 +65,13 @@ The FidelityFX SDK includes: | FidelityFX DOF | All APIs / All Configs | Some artifacts may occur on some Intel Arc GPUs. | | All FidelityFX SDK Samples | All APIs / All Configs | There is a resource leak in the UploadContext used to load glTF content. | | All FidelityFX SDK Samples | All APIs / All Configs | Windows path length restrictions may cause compile issues. It is recommended to place the SDK close to the root of a drive or use subst or a mklink to shorten the path. | +| All FidelityFX SDK Samples | All APIs / All Configs | There is a build error when using CMake 3.31 or newer |

Open source

AMD FidelityFX SDK is open source, and available under the MIT license. -For more information on the license terms please refer to [license](/sdk/license.txt). +For more information on the license terms please refer to [license](/sdk/LICENSE.txt).

Disclaimer

@@ -98,4 +99,4 @@ Microsoft is a registered trademark of Microsoft Corporation in the US and other Windows is a registered trademark of Microsoft Corporation in the US and other jurisdictions. -© 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +© 2022-2024 Advanced Micro Devices, Inc. All rights reserved. diff --git a/docs/license.md b/docs/license.md index 16f0f4d8..fe052b85 100644 --- a/docs/license.md +++ b/docs/license.md @@ -2,7 +2,7 @@ # License -Copyright © 2023 Advanced Micro Devices, Inc. +Copyright © 2024 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal diff --git a/docs/samples/super-resolution.md b/docs/samples/super-resolution.md index ac31027e..cc9ba463 100644 --- a/docs/samples/super-resolution.md +++ b/docs/samples/super-resolution.md @@ -4,7 +4,7 @@ ![alt text](media/super-resolution/fsr-sample_resized.jpg "A screenshot of the FSR sample.") -This sample demonstrates the use of FidelityFX Super Resolution 3.1.2 for upscaling and frame generation. +This sample demonstrates the use of FidelityFX Super Resolution 3.1.3 for upscaling and frame generation. For details on the underlying algorithms you can refer to the per-technique documentation for [FSR3 upscaling](../techniques/super-resolution-upscaler.md) and [frame interpolation](../techniques/frame-interpolation.md). diff --git a/docs/techniques/frame-interpolation-swap-chain.md b/docs/techniques/frame-interpolation-swap-chain.md index ee72585c..58af6ebd 100644 --- a/docs/techniques/frame-interpolation-swap-chain.md +++ b/docs/techniques/frame-interpolation-swap-chain.md @@ -1,6 +1,6 @@ - + -

FidelityFX Frame Interpolation Swapchain 1.1.1

+

FidelityFX Frame Interpolation Swapchain 1.1.2

Table of contents

diff --git a/docs/techniques/frame-interpolation.md b/docs/techniques/frame-interpolation.md index 94d8283a..58f4207c 100644 --- a/docs/techniques/frame-interpolation.md +++ b/docs/techniques/frame-interpolation.md @@ -1,6 +1,6 @@ - + -

FidelityFX Frame Interpolation 1.1.1

+

FidelityFX Frame Interpolation 1.1.2

Table of contents

diff --git a/docs/techniques/lens.md b/docs/techniques/lens.md index 28a9a076..a2ba89f9 100644 --- a/docs/techniques/lens.md +++ b/docs/techniques/lens.md @@ -84,7 +84,7 @@ Call `FfxLensApplyFilmGrain` with the following parameters to write out a new ca

Chromatic aberration (CA)

-Chromatic aberration has two types that arise when the lens system of a camera either: +Chromatic aberration has two [types](https://en.wikipedia.org/wiki/Chromatic_aberration#Types) that arise when the lens system of a camera either: * magnifies each color differently (transverse), or * focuses each color differently (axial). diff --git a/docs/techniques/optical-flow.md b/docs/techniques/optical-flow.md index 63b34cb3..3e649040 100644 --- a/docs/techniques/optical-flow.md +++ b/docs/techniques/optical-flow.md @@ -20,7 +20,7 @@ The final iteration result is what is output to the consumer of the technique.

Technical Requirements

-SM 6.2 is required. The effect uses wave operations, and also uses the HLSL `msad4` intrinsic extensively. In architectures where `msad4` is not natively executed by the GPU, this may result in reduced performance. +SM 6.2 is required. The effect uses wave operations, and also uses the [HLSL `msad4`](https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-msad4) intrinsic extensively. In architectures where `msad4` is not natively executed by the GPU, this may result in reduced performance.

Create the effect

diff --git a/docs/techniques/super-resolution-interpolation.md b/docs/techniques/super-resolution-interpolation.md index 321b6218..24c64ee9 100644 --- a/docs/techniques/super-resolution-interpolation.md +++ b/docs/techniques/super-resolution-interpolation.md @@ -1,6 +1,6 @@ - + -

FidelityFX Super Resolution 3.1.2 (FSR3) - Upscaling and Frame Generation

+

FidelityFX Super Resolution 3.1.3 (FSR3) - Upscaling and Frame Generation

![Screenshot](media/super-resolution-temporal/fsr3-sample_resized.jpg "A screenshot showcasing the final output of the effect") diff --git a/docs/techniques/super-resolution-temporal.md b/docs/techniques/super-resolution-temporal.md index 685d4cba..bc43e726 100644 --- a/docs/techniques/super-resolution-temporal.md +++ b/docs/techniques/super-resolution-temporal.md @@ -387,7 +387,7 @@ int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth); FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t jitterPhase, int32_t sequenceLength); ``` -Internally, these function implement a Halton[2,3] sequence. The goal of the Halton sequence is to provide spatially separated points, which cover the available space. +Internally, these function implement a Halton[2,3] sequence [[Halton](#references)]. The goal of the Halton sequence is to provide spatially separated points, which cover the available space. ![invert](media/super-resolution-temporal/jitter-space.svg "A diagram showing how to map sub-pixel jitter offsets to projection offsets.") @@ -778,7 +778,7 @@ The first step of the [Reproject & accumulate](#reproject-accumulate) stage is t ![invert](media/super-resolution-temporal/upsample-with-lanczos.svg "A diagram showing upsampling of the current frame's input using Lanczos.") -Next we must upsample the adjusted color. To perform upsampling, the adjusted color's pixel position serves as the center of a 5x5 Lanczos resampling kernel. In the diagram above, you can see that the Lanczos functions are centered around the display resolution sample `S`. The point in each pixel - labelled `P` - denotes the render resolution jittered sample position for which we calculate the Lanczos weights. Looking above and to the right of the 5x5 pixel neighbourhood, you can see the `Lanczos(x, 2)` resampling kernel being applied to the render resolution samples in the 5x5 grid of pixels surrounding the pixel position. It is worth noting that while conceptually the neighbourhood is 5x5, in the implementation only a 4x4 is actually sampled, due to the zero weighted contributions of those pixels on the periphery of the neighbourhood. The implementation of the Lanczos kernel may vary by GPU product. On RDNA2-based products, we use a look-up-table (LUT) to encode the `sinc(x)` function. This helps to produce a more harmonious balance between ALU and memory in the [Reproject & accumulate](#reproject-accumulate) stage. As the upsample step has access to the 5x5 neighbourhood of pixels, it makes sense from an efficiency point of view to also calculate the YCoCg bounding box - which is used during color rectification - at this point. The diagram below shows a 2D YCo bounding box being constructed from a 3x3 neighbourhood around the current pixel, in reality the bounding box also has a third dimension for Cg. +Next we must upsample the adjusted color. To perform upsampling, the adjusted color's pixel position serves as the center of a 5x5 Lanczos resampling kernel [[Lanczos]](#references). In the diagram above, you can see that the Lanczos functions are centered around the display resolution sample `S`. The point in each pixel - labelled `P` - denotes the render resolution jittered sample position for which we calculate the Lanczos weights. Looking above and to the right of the 5x5 pixel neighbourhood, you can see the `Lanczos(x, 2)` resampling kernel being applied to the render resolution samples in the 5x5 grid of pixels surrounding the pixel position. It is worth noting that while conceptually the neighbourhood is 5x5, in the implementation only a 4x4 is actually sampled, due to the zero weighted contributions of those pixels on the periphery of the neighbourhood. The implementation of the Lanczos kernel may vary by GPU product. On RDNA2-based products, we use a look-up-table (LUT) to encode the `sinc(x)` function. This helps to produce a more harmonious balance between ALU and memory in the [Reproject & accumulate](#reproject-accumulate) stage. As the upsample step has access to the 5x5 neighbourhood of pixels, it makes sense from an efficiency point of view to also calculate the YCoCg bounding box - which is used during color rectification - at this point. The diagram below shows a 2D YCo bounding box being constructed from a 3x3 neighbourhood around the current pixel, in reality the bounding box also has a third dimension for Cg. ![invert](media/super-resolution-temporal/calculate-bounding-box.svg "A diagram showing how a YCoCg bounding box is computed from the current frame's adjust color samples.") @@ -880,6 +880,12 @@ Refer to changelog for more detail on versions. [**Akeley-06**] Kurt Akeley and Jonathan Su, **"Minimum Triangle Separation for Correct Z-Buffer Occlusion"**, [http://www.cs.cmu.edu/afs/cs/academic/class/15869-f11/www/readings/akeley06_triseparation.pdf](https://www.cs.cmu.edu/afs/cs/academic/class/15869-f11/www/readings/akeley06_triseparation.pdf) +[**Lanczos**] Lanczos resampling, **"Lanczos resampling"**, [https://en.wikipedia.org/wiki/Lanczos_resampling](https://en.wikipedia.org/wiki/Lanczos_resampling) + +[**Halton**] Halton sequence, **"Halton sequence"**, [https://en.wikipedia.org/wiki/Halton_sequence](https://en.wikipedia.org/wiki/Halton_sequence) + +[**YCoCg**] YCoCg Color Space, [https://en.wikipedia.org/wiki/YCoCg](https://en.wikipedia.org/wiki/YCoCg) +

See also

- [FidelityFX Super Resolution](../samples/super-resolution.md) diff --git a/docs/techniques/super-resolution-upscaler.md b/docs/techniques/super-resolution-upscaler.md index 017fd15a..29e9465e 100644 --- a/docs/techniques/super-resolution-upscaler.md +++ b/docs/techniques/super-resolution-upscaler.md @@ -1,6 +1,6 @@ - + -

FidelityFX Super Resolution 3.1.2 (FSR3) - Upscaler

+

FidelityFX Super Resolution 3.1.3 (FSR3) - Upscaler

![Screenshot](media/super-resolution-temporal/fsr3-sample_resized.jpg "A screenshot showcasing the final output of the effect") @@ -254,7 +254,7 @@ int32_t ffxFsr3UpscalerGetJitterPhaseCount(int32_t renderWidth, int32_t displayW FfxErrorCode ffxFsr3UpscalerGetJitterOffset(float* outX, float* outY, int32_t jitterPhase, int32_t sequenceLength); ``` -Internally, these function implement a Halton[2,3] sequence. The goal of the Halton sequence is to provide spatially separated points, which cover the available space. +Internally, these function implement a Halton[2,3] sequence [[Halton](#references)]. The goal of the Halton sequence is to provide spatially separated points, which cover the available space. ![alt text](media/super-resolution-temporal/jitter-space.svg "A diagram showing how to map sub-pixel jitter offsets to projection offsets.") @@ -664,7 +664,7 @@ The first step of the [Accumulate](#accumulate) stage is to assess each pixel fo ![alt text](media/super-resolution-temporal/upsample-with-lanczos.svg "A diagram showing upsampling of the current frame's input using Lanczos.") -Next we must upsample the adjusted color. To perform upsampling, the adjusted color's pixel position serves as the center of a 5x5 Lanczos resampling kernel. In the diagram above, you can see that the Lanczos functions are centered around the display resolution sample `S`. The point in each pixel - labelled `P` - denotes the render resolution jittered sample position for which we calculate the Lanczos weights. Looking above and to the right of the 5x5 pixel neighbourhood, you can see the `Lanczos(x, 2)` resampling kernel being applied to the render resolution samples in the 5x5 grid of pixels surrounding the pixel position. It is worth noting that while conceptually the neighbourhood is 5x5, in the implementation only a 4x4 is actually sampled, due to the zero weighted contributions of those pixels on the periphery of the neighbourhood. The implementation of the Lanczos kernel may vary by GPU product. On RDNA2-based products, we use a look-up-table (LUT) to encode the `sinc(x)` function. This helps to produce a more harmonious balance between ALU and memory in the [Accumulate](#accumulate) stage. As the upsample step has access to the 5x5 neighbourhood of pixels, it makes sense from an efficiency point of view to also calculate the YCoCg bounding box - which is used during color rectification - at this point. The diagram below shows a 2D YCo bounding box being constructed from a 3x3 neighbourhood around the current pixel, in reality the bounding box also has a third dimension for Cg. +Next we must upsample the adjusted color. To perform upsampling, the adjusted color's pixel position serves as the center of a 5x5 Lanczos resampling kernel [[Lanczos]](#references). In the diagram above, you can see that the Lanczos functions are centered around the display resolution sample `S`. The point in each pixel - labelled `P` - denotes the render resolution jittered sample position for which we calculate the Lanczos weights. Looking above and to the right of the 5x5 pixel neighbourhood, you can see the `Lanczos(x, 2)` resampling kernel being applied to the render resolution samples in the 5x5 grid of pixels surrounding the pixel position. It is worth noting that while conceptually the neighbourhood is 5x5, in the implementation only a 4x4 is actually sampled, due to the zero weighted contributions of those pixels on the periphery of the neighbourhood. The implementation of the Lanczos kernel may vary by GPU product. On RDNA2-based products, we use a look-up-table (LUT) to encode the `sinc(x)` function. This helps to produce a more harmonious balance between ALU and memory in the [Accumulate](#accumulate) stage. As the upsample step has access to the 5x5 neighbourhood of pixels, it makes sense from an efficiency point of view to also calculate the YCoCg bounding box - which is used during color rectification - at this point. The diagram below shows a 2D YCo bounding box being constructed from a 3x3 neighbourhood around the current pixel, in reality the bounding box also has a third dimension for Cg. ![alt text](media/super-resolution-temporal/calculate-bounding-box.svg "A diagram showing how a YCoCg bounding box is computed from the current frame's adjust color samples.") @@ -761,6 +761,12 @@ Refer to changelog for more detail on versions. [**Akeley-06**] Kurt Akeley and Jonathan Su, "Minimum Triangle Separation for Correct Z-Buffer Occlusion", [http://www.cs.cmu.edu/afs/cs/academic/class/15869-f11/www/readings/akeley06_triseparation.pdf](https://www.cs.cmu.edu/afs/cs/academic/class/15869-f11/www/readings/akeley06_triseparation.pdf) +[**Lanczos**] Lanczos resampling, "Lanczos resampling", [https://en.wikipedia.org/wiki/Lanczos_resampling](https://en.wikipedia.org/wiki/Lanczos_resampling) + +[**Halton**] Halton sequence, "Halton sequence", [https://en.wikipedia.org/wiki/Halton_sequence](https://en.wikipedia.org/wiki/Halton_sequence) + +[**YCoCg**] YCoCg Color Space, [https://en.wikipedia.org/wiki/YCoCg](https://en.wikipedia.org/wiki/YCoCg) +

See also

- [FidelityFX Super Resolution](../samples/super-resolution.md) diff --git a/docs/whats-new/index.md b/docs/whats-new/index.md index 0f27d6a8..c355107b 100644 --- a/docs/whats-new/index.md +++ b/docs/whats-new/index.md @@ -1,8 +1,8 @@ - + -

What's new in the AMD FidelityFX™ SDK 1.1.2?

+

What's new in the AMD FidelityFX™ SDK 1.1.3?

-Welcome to the AMD FidelityFX SDK. This updated version of the SDK contains various fixes and support for the Microsoft® GDK (available from Microsoft through the GDK). +Welcome to the AMD FidelityFX SDK. This updated version of the SDK contains various fixes for FSR3.

New effects and features

@@ -10,21 +10,17 @@ None.

Updated effects

-

AMD FidelityFX Frameinterpolation Swapchain 1.1.1

+

AMD FidelityFX Frameinterpolation Swapchain 1.1.2

AMD FidelityFX Super Resolution 3 (FSR 3)

-FSR 3.1.2 includes a number of fixes for issues discovered with FSR 3.1.0/3.1.1. -FSR 3.1.2 has been tested and optimized for Microsoft GDK for Xbox Series X/S. +FSR 3.1.3 includes a number of fixes for issues discovered with FSR 3.1.0/3.1.1/3.1.2. +FSR 3.1.3 has been tested and optimized for Microsoft GDK for Xbox Series X/S.

AMD FidelityFX Super Resolution (FSR) API

* Minor non-API breaking additions to enable forward looking features -

AMD FidelityFX GDK Backend

- -The FidelityFX SDK now supports compiling natively to the Microsoft GDK for desktop and Xbox Series X/S. In order to use the GDK backend, please see the FidelityFX samples shipped with the latest version of Microsoft's GDK. -

Updated documentation

None. @@ -38,5 +34,5 @@ None. None. - + diff --git a/docs/whats-new/version_1_1_2.md b/docs/whats-new/version_1_1_2.md new file mode 100644 index 00000000..4de61c64 --- /dev/null +++ b/docs/whats-new/version_1_1_2.md @@ -0,0 +1,43 @@ + + +

What's new in the AMD FidelityFX™ SDK 1.1.2?

+ +Welcome to the AMD FidelityFX SDK. This updated version of the SDK contains various fixes and support for the Microsoft® GDK (available from Microsoft through the GDK). + +

New effects and features

+ +None. + +

Updated effects

+ +

AMD FidelityFX Frameinterpolation Swapchain 1.1.1

+ +

AMD FidelityFX Super Resolution 3 (FSR 3)

+ +FSR 3.1.2 includes a number of fixes for issues discovered with FSR 3.1.0/3.1.1. +FSR 3.1.2 has been tested and optimized for Microsoft GDK for Xbox Series X/S. + +

AMD FidelityFX Super Resolution (FSR) API

+ +* Minor non-API breaking additions to enable forward looking features + +

AMD FidelityFX GDK Backend

+ +The FidelityFX SDK now supports compiling natively to the Microsoft GDK for desktop and Xbox Series X/S. In order to use the GDK backend, please see the FidelityFX samples shipped with the latest version of Microsoft's GDK. + +

Updated documentation

+ +None. + +

Deprecated effects

+ +None. + +

Deprecated components

+ +None. + + + + + diff --git a/ffx-api/include/ffx_api/dx12/ffx_api_dx12.h b/ffx-api/include/ffx_api/dx12/ffx_api_dx12.h index 271f4ac5..fd174ec8 100644 --- a/ffx-api/include/ffx_api/dx12/ffx_api_dx12.h +++ b/ffx-api/include/ffx_api/dx12/ffx_api_dx12.h @@ -103,9 +103,11 @@ struct ffxConfigureDescFrameGenerationSwapChainKeyValueDX12 void* ptr; ///< Pointer to set or pointer to value to set. }; +//enum value matches enum FfxFrameInterpolationSwapchainConfigureKey enum FfxApiConfigureFrameGenerationSwapChainKeyDX12 { - FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_WAITCALLBACK = 0 ///< Sets FfxWaitCallbackFunc + FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_WAITCALLBACK = 0, ///< Sets FfxWaitCallbackFunc + FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING = 2, ///< Sets FfxApiSwapchainFramePacingTuning }; #define FFX_API_QUERY_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_GPU_MEMORY_USAGE_DX12 0x00030009u @@ -134,6 +136,7 @@ static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) //case DXGI_FORMAT_R32G32B32_SINT: case DXGI_FORMAT_R16G16B16A16_TYPELESS: + return FFX_API_SURFACE_FORMAT_R16G16B16A16_TYPELESS; case DXGI_FORMAT_R16G16B16A16_FLOAT: return FFX_API_SURFACE_FORMAT_R16G16B16A16_FLOAT; //case DXGI_FORMAT_R16G16B16A16_UNORM: @@ -142,6 +145,7 @@ static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) //case DXGI_FORMAT_R16G16B16A16_SINT: case DXGI_FORMAT_R32G32_TYPELESS: + return FFX_API_SURFACE_FORMAT_R32G32_TYPELESS; case DXGI_FORMAT_R32G32_FLOAT: return FFX_API_SURFACE_FORMAT_R32G32_FLOAT; //case DXGI_FORMAT_R32G32_FLOAT: @@ -163,6 +167,7 @@ static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) return FFX_API_SURFACE_FORMAT_R8_UINT; case DXGI_FORMAT_R10G10B10A2_TYPELESS: + return FFX_API_SURFACE_FORMAT_R10G10B10A2_TYPELESS; case DXGI_FORMAT_R10G10B10A2_UNORM: return FFX_API_SURFACE_FORMAT_R10G10B10A2_UNORM; //case DXGI_FORMAT_R10G10B10A2_UINT: @@ -188,6 +193,7 @@ static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) return FFX_API_SURFACE_FORMAT_B8G8R8A8_SRGB; case DXGI_FORMAT_R16G16_TYPELESS: + return FFX_API_SURFACE_FORMAT_R16G16_TYPELESS; case DXGI_FORMAT_R16G16_FLOAT: return FFX_API_SURFACE_FORMAT_R16G16_FLOAT; //case DXGI_FORMAT_R16G16_UNORM: @@ -200,18 +206,22 @@ static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) case DXGI_FORMAT_R32_UINT: return FFX_API_SURFACE_FORMAT_R32_UINT; case DXGI_FORMAT_R32_TYPELESS: + return FFX_API_SURFACE_FORMAT_R32_TYPELESS; case DXGI_FORMAT_D32_FLOAT: case DXGI_FORMAT_R32_FLOAT: return FFX_API_SURFACE_FORMAT_R32_FLOAT; - case DXGI_FORMAT_R8G8_TYPELESS: case DXGI_FORMAT_R8G8_UINT: return FFX_API_SURFACE_FORMAT_R8G8_UINT; - //case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_TYPELESS: + return FFX_API_SURFACE_FORMAT_R8G8_TYPELESS; + case DXGI_FORMAT_R8G8_UNORM: + return FFX_API_SURFACE_FORMAT_R8G8_UNORM; //case DXGI_FORMAT_R8G8_SNORM: //case DXGI_FORMAT_R8G8_SINT: case DXGI_FORMAT_R16_TYPELESS: + return FFX_API_SURFACE_FORMAT_R16_TYPELESS; case DXGI_FORMAT_R16_FLOAT: return FFX_API_SURFACE_FORMAT_R16_FLOAT; case DXGI_FORMAT_R16_UINT: @@ -224,6 +234,7 @@ static inline uint32_t ffxApiGetSurfaceFormatDX12(DXGI_FORMAT format) //case DXGI_FORMAT_R16_SINT: case DXGI_FORMAT_R8_TYPELESS: + return FFX_API_SURFACE_FORMAT_R8_TYPELESS; case DXGI_FORMAT_R8_UNORM: case DXGI_FORMAT_A8_UNORM: return FFX_API_SURFACE_FORMAT_R8_UNORM; diff --git a/ffx-api/include/ffx_api/ffx_api_types.h b/ffx-api/include/ffx_api/ffx_api_types.h index 85d6edbe..a31b81fe 100644 --- a/ffx-api/include/ffx_api/ffx_api_types.h +++ b/ffx-api/include/ffx_api/ffx_api_types.h @@ -24,7 +24,7 @@ #include -/// An enumeration of surface formats. +/// An enumeration of surface formats. Needs to match enum FfxSurfaceFormat enum FfxApiSurfaceFormat { FFX_API_SURFACE_FORMAT_UNKNOWN, ///< Unknown format @@ -57,10 +57,19 @@ enum FfxApiSurfaceFormat FFX_API_SURFACE_FORMAT_R8G8_UINT, ///< 8 bit per channel, 2 channel unsigned integer format FFX_API_SURFACE_FORMAT_R32_FLOAT, ///< 32 bit per channel, 1 channel float format FFX_API_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP, ///< 9 bit per channel, 5 bit exponent format + + FFX_API_SURFACE_FORMAT_R16G16B16A16_TYPELESS, ///< 16 bit per channel, 4 channel typeless format + FFX_API_SURFACE_FORMAT_R32G32_TYPELESS, ///< 32 bit per channel, 2 channel typeless format + FFX_API_SURFACE_FORMAT_R10G10B10A2_TYPELESS, ///< 10 bit per 3 channel, 2 bit for 1 channel typeless format + FFX_API_SURFACE_FORMAT_R16G16_TYPELESS, ///< 16 bit per channel, 2 channel typless format + FFX_API_SURFACE_FORMAT_R16_TYPELESS, ///< 16 bit per channel, 1 channel typeless format + FFX_API_SURFACE_FORMAT_R8_TYPELESS, ///< 8 bit per channel, 1 channel typeless format + FFX_API_SURFACE_FORMAT_R8G8_TYPELESS, ///< 8 bit per channel, 2 channel typeless format + FFX_API_SURFACE_FORMAT_R32_TYPELESS, ///< 32 bit per channel, 1 channel typeless format }; /// An enumeration of resource usage. -enum FfxApiResorceUsage +enum FfxApiResourceUsage { FFX_API_RESOURCE_USAGE_READ_ONLY = 0, ///< No usage flags indicate a resource is read only. FFX_API_RESOURCE_USAGE_RENDERTARGET = (1<<0), ///< Indicates a resource will be used as render target. @@ -70,6 +79,7 @@ enum FfxApiResorceUsage FFX_API_RESOURCE_USAGE_ARRAYVIEW = (1<<4), ///< Indicates a resource that will generate array views. Works on 2D and cubemap textures FFX_API_RESOURCE_USAGE_STENCILTARGET = (1<<5), ///< Indicates a resource will be used as stencil target. }; +typedef FfxApiResourceUsage FfxApiResorceUsage; // Corrects a typo that shipped with original API /// An enumeration of resource states. enum FfxApiResourceState @@ -176,8 +186,57 @@ struct FfxApiResource uint32_t state; }; +//struct definition matches FfxEffectMemoryUsage typedef struct FfxApiEffectMemoryUsage { uint64_t totalUsageInBytes; uint64_t aliasableUsageInBytes; } FfxApiEffectMemoryUsage; + +/* +Tuning varianceFactor and safetyMarginInMs Tips: +Calculation of frame pacing algorithm's next target timestamp: +target frametime delta = average Frametime - (variance * varianceFactor) - safetyMarginInMs + +Default Tuning uses safetyMarginInMs==0.1ms and varianceFactor==0.1. +Say Tuning set A uses safetyMarginInMs==0.75ms, and varianceFactor==0.1. +Say Tuning Set B uses safetyMarginInMs==0.01ms and varianceFactor==0.3. + +Example #1 - Actual Game Cutscene. Game's framerate after FG ON during camera pan from normal (19ms avg frametime) to complex (37ms avg frametime over 1 sec) and back to normal scene complexity (19ms avg frametime). +After the panning is done, +- Default Tuning now gets stuck at targeting ~33ms after panning to a complex scene. GPU utilization significantly lower in this case. +- Tuning Set B and Tuning Set A are able to recover close to ~19ms because of these 2 tuning result in lower "target frametime delta" than Default Tuning. + +However, larger varianceFactor or safetyMarginInMs results in higher variance. As seen in Example #2 bellow. + +Example #2 - FFX_API_FSR sample. Set app fps cap to 33.33ms. Use OCAT to capture 10s at default camera position. +FSR 3.1.0 FG msbetweenpresents ping-pong between 16.552 (5th-percentile) and 16.832 (95th-percentile). Variance is 0.01116. +Tuning set A FG msbetweenpresents ping-pong between 15.901 (5th-percentile) and 17.500 (95th-percentile). Variance is 0.057674. +Tuning Set B FG msbetweenpresents ping-pong between 16.589 (5th-percentile) and 16.971 (95th-percentile). Variance is 0.014452. + +| FG output Frames timestamp | n | n+1 | n+2 | n+3 | frame delta n+1 to n+2 | frame delta n+2 to n+3 | +| -------------------------- | - | ------ | ----- | ------ | ---------------------- | ---------------------- | +| App real frame presents | 0 | | 33.33 | | | | +| Default Tuning | 0 | 16.552 | 33.33 | 49.882 | 16.778 | 16.552 | +| Tuning Set A | 0 | 15.901 | 33.33 | 49.231 | 17.429 | 15.901 | +| Tuning Set B | 0 | 16.589 | 33.33 | 49.919 | 16.741 | 16.589 | + +Analysis of table data in words: +Ignoring the cost of FI, +"Tuning set A"'s "target frametime delta" of 15.901 results in larger frame to frame delta (or in other words larger variance) vs Default tuning. +"Tuning set B"'s "target frametime delta" of 16.589 results in a bit larger frame to frame delta (or in other words a bit larger variance) vs Default tuning. + +TLDR: +If your game when using FG, frame rate is running at unexpectly low frame rate, after gradual transition from rendering complex to easy scene complexity, you could try setting "Tuning Set B" to recover lost FPS at cost of a bit higher variance. + +*/ + +//struct definition matches FfxSwapchainFramePacingTuning +typedef struct FfxApiSwapchainFramePacingTuning +{ + float safetyMarginInMs; // in Millisecond. Default is 0.1ms + float varianceFactor; // valid range [0.0,1.0]. Default is 0.1 + bool allowHybridSpin; //Allows pacing spinlock to sleep. Default is false. + uint32_t hybridSpinTime; //How long to spin if allowHybridSpin is true. Measured in timer resolution units. Not recommended to go below 2. Will result in frequent overshoots. Default is 2. + bool allowWaitForSingleObjectOnFence; //Allows WaitForSingleObject instead of spinning for fence value. Default is false. +} FfxApiSwapchainFramePacingTuning; diff --git a/ffx-api/include/ffx_api/ffx_framegeneration.h b/ffx-api/include/ffx_api/ffx_framegeneration.h index d6054fc7..c44faed7 100644 --- a/ffx-api/include/ffx_api/ffx_framegeneration.h +++ b/ffx-api/include/ffx_api/ffx_framegeneration.h @@ -45,8 +45,10 @@ enum FfxApiDispatchFramegenerationFlags { FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_TEAR_LINES = (1 << 0), ///< A bit indicating that the debug tear lines will be drawn to the generated output. FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_RESET_INDICATORS = (1 << 1), ///< A bit indicating that the debug reset indicators will be drawn to the generated output. - FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_VIEW = (1 << 2), ///< A bit indicating that the generated output resource will contain debug views with relevant information. - FFX_FRAMEGENERATION_FLAG_NO_SWAPCHAIN_CONTEXT_NOTIFY = (1 << 3), ///< A bit indicating that the context should only run frame interpolation and not modify the swapchain. + FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_VIEW = (1 << 2), ///< A bit indicating that the generated output resource will contain debug views with relevant information. + FFX_FRAMEGENERATION_FLAG_NO_SWAPCHAIN_CONTEXT_NOTIFY = (1 << 3), ///< A bit indicating that the context should only run frame interpolation and not modify the swapchain. + FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_PACING_LINES = (1 << 4), ///< A bit indicating that the debug pacing lines will be drawn to the generated output. + }; enum FfxApiUiCompositionFlags @@ -160,7 +162,14 @@ struct ffxQueryDescFrameGenerationGetGPUMemoryUsage struct ffxConfigureDescFrameGenerationRegisterDistortionFieldResource { ffxConfigureDescHeader header; - struct FfxApiResource distortionField; ///< A resource containing distortion offset data. Needs to be 4-component (ie. RGBA). Read by FG shaders via Sample. Resource's xy components encodes [UV coordinate of pixel after lens distortion effect- UV coordinate of pixel before lens distortion]. + struct FfxApiResource distortionField; ///< A resource containing distortion offset data. Needs to be 2-component (ie. RG). Read by FG shaders via Sample. Resource's xy components encodes [UV coordinate of pixel after lens distortion effect- UV coordinate of pixel before lens distortion]. +}; + +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_FRAMEGENERATION_HUDLESS 0x00020009u +struct ffxCreateContextDescFrameGenerationHudless +{ + ffxCreateContextDescHeader header; + uint32_t hudlessBackBufferFormat; ///< The surface format for the hudless back buffer. One of the values from FfxApiSurfaceFormat. }; #if defined(__cplusplus) diff --git a/ffx-api/include/ffx_api/ffx_framegeneration.hpp b/ffx-api/include/ffx_api/ffx_framegeneration.hpp index 30f78021..7d5cb447 100644 --- a/ffx-api/include/ffx_api/ffx_framegeneration.hpp +++ b/ffx-api/include/ffx_api/ffx_framegeneration.hpp @@ -35,6 +35,11 @@ struct struct_type : std::integral_constant struct CreateContextDescFrameGeneration : public InitHelper {}; +template<> +struct struct_type : std::integral_constant {}; + +struct CreateContextDescFrameGenerationHudless : public InitHelper {}; + template<> struct struct_type : std::integral_constant {}; diff --git a/ffx-api/include/ffx_api/vk/ffx_api_vk.h b/ffx-api/include/ffx_api/vk/ffx_api_vk.h index dd670fad..1f56ea6d 100644 --- a/ffx-api/include/ffx_api/vk/ffx_api_vk.h +++ b/ffx-api/include/ffx_api/vk/ffx_api_vk.h @@ -99,9 +99,18 @@ struct ffxConfigureDescFrameGenerationSwapChainKeyValueVK void* ptr; ///< Pointer to set or pointer to value to set. }; +//enum value matches enum FfxFrameInterpolationSwapchainConfigureKey enum FfxApiConfigureFrameGenerationSwapChainKeyVK { - FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_WAITCALLBACK = 0 ///< Sets FfxWaitCallbackFunc + FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_WAITCALLBACK = 0, ///< Sets FfxWaitCallbackFunc + FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING = 2, ///< Sets FfxApiSwapchainFramePacingTuning casted from ptr +}; + +#define FFX_API_QUERY_DESC_TYPE_FRAMEGENERATIONSWAPCHAIN_GPU_MEMORY_USAGE_VK 0x00040009u +struct ffxQueryFrameGenerationSwapChainGetGPUMemoryUsageVK +{ + ffxQueryDescHeader header; + struct FfxApiEffectMemoryUsage* gpuMemoryUsageFrameGenerationSwapchain; }; /// Function to get the number of presents. This is useful when using frame interpolation @@ -124,12 +133,21 @@ struct ffxQueryDescSwapchainReplacementFunctionsVK PFN_getLastPresentCountFFXAPI pOutGetLastPresentCountFFXAPI; ///< Additional function to get the number of times present has been called since the swapchain creation. }; +#define FFX_API_CREATE_CONTEXT_DESC_TYPE_FGSWAPCHAIN_MODE_VK 0x40010u +struct ffxCreateContextDescFrameGenerationSwapChainModeVK +{ + ffxCreateContextDescHeader header; + bool composeOnPresentQueue; ///< flags indicating that composition will happen on the present queue +}; + static inline uint32_t ffxApiGetSurfaceFormatVK(VkFormat fmt) { switch (fmt) { case VK_FORMAT_R32G32B32A32_SFLOAT: return FFX_API_SURFACE_FORMAT_R32G32B32A32_FLOAT; + case VK_FORMAT_R32G32B32_SFLOAT: + return FFX_API_SURFACE_FORMAT_R32G32B32_FLOAT; case VK_FORMAT_R32G32B32A32_UINT: return FFX_API_SURFACE_FORMAT_R32G32B32A32_UINT; case VK_FORMAT_R16G16B16A16_SFLOAT: diff --git a/ffx-api/include/ffx_api/vk/ffx_api_vk.hpp b/ffx-api/include/ffx_api/vk/ffx_api_vk.hpp index ebffa738..6eb2cce7 100644 --- a/ffx-api/include/ffx_api/vk/ffx_api_vk.hpp +++ b/ffx-api/include/ffx_api/vk/ffx_api_vk.hpp @@ -40,6 +40,11 @@ struct struct_type : std::integr struct CreateContextDescFrameGenerationSwapChainVK : public InitHelper {}; +template<> +struct struct_type : std::integral_constant {}; + +struct CreateContextDescFrameGenerationSwapChainModeVK : public InitHelper {}; + template<> struct struct_type : std::integral_constant {}; @@ -70,4 +75,9 @@ struct struct_type : std::in struct ConfigureDescFrameGenerationSwapChainKeyValueVK : public InitHelper {}; +template<> +struct struct_type : std::integral_constant {}; + +struct QueryFrameGenerationSwapChainGetGPUMemoryUsageVK : public InitHelper {}; + } diff --git a/ffx-api/src/dx12/ffx_provider_framegenerationswapchain_dx12.cpp b/ffx-api/src/dx12/ffx_provider_framegenerationswapchain_dx12.cpp index 9dbbfad6..661fc812 100644 --- a/ffx-api/src/dx12/ffx_provider_framegenerationswapchain_dx12.cpp +++ b/ffx-api/src/dx12/ffx_provider_framegenerationswapchain_dx12.cpp @@ -41,13 +41,13 @@ struct InternalFgScContext uint64_t ffxProvider_FrameGenerationSwapChain_DX12::GetId() const { - // FG SwapChain DX12, version 1.0.0 - return 0xF65C'DD12'01'000'000ui64; + // FG SwapChain DX12, version 1.1.2 + return 0xF65C'DD12'01'001'002ui64; } const char* ffxProvider_FrameGenerationSwapChain_DX12::GetVersionName() const { - return "1.0"; + return "1.1.2"; } ffxReturnCode_t ffxProvider_FrameGenerationSwapChain_DX12::CreateContext(ffxContext* context, diff --git a/ffx-api/src/ffx_provider_framegeneration.cpp b/ffx-api/src/ffx_provider_framegeneration.cpp index 268420d6..b2943429 100644 --- a/ffx-api/src/ffx_provider_framegeneration.cpp +++ b/ffx-api/src/ffx_provider_framegeneration.cpp @@ -120,7 +120,14 @@ ffxReturnCode_t ffxProvider_FrameGeneration::CreateContext(ffxContext* context, fiDescription.displaySize.width = desc->displaySize.width; fiDescription.displaySize.height = desc->displaySize.height; fiDescription.backBufferFormat = ConvertEnum(desc->backBufferFormat); - + fiDescription.previousInterpolationSourceFormat = ConvertEnum(desc->backBufferFormat); + for (auto it = header; it; it = it->pNext) + { + if (auto descHudless = ffx::DynamicCast(it)) + { + fiDescription.previousInterpolationSourceFormat = ConvertEnum(descHudless->hudlessBackBufferFormat); + } + } // set up Frameinterpolation TRY2(ffxFrameInterpolationContextCreate(&internal_context->fiContext, &fiDescription)); @@ -291,6 +298,12 @@ ffxReturnCode_t ffxProvider_FrameGeneration::Configure(ffxContext* context, cons config.presentCallbackContext = internal_context; } + config.drawDebugPacingLines = false; + if (desc->flags & FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_PACING_LINES) + { + config.drawDebugPacingLines = true; + } + config.frameGenerationEnabled = desc->frameGenerationEnabled; config.HUDLessColor = Convert(desc->HUDLessColor); config.onlyPresentInterpolated = desc->onlyPresentGenerated; diff --git a/ffx-api/src/vk/ffx_provider_framegenerationswapchain_vk.cpp b/ffx-api/src/vk/ffx_provider_framegenerationswapchain_vk.cpp index 6a350be2..7cc2730e 100644 --- a/ffx-api/src/vk/ffx_provider_framegenerationswapchain_vk.cpp +++ b/ffx-api/src/vk/ffx_provider_framegenerationswapchain_vk.cpp @@ -91,13 +91,13 @@ bool ffxProvider_FrameGenerationSwapChain_VK::CanProvide(uint64_t type) const uint64_t ffxProvider_FrameGenerationSwapChain_VK::GetId() const { - // FG SwapChain VK, version 1.0.0 - return 0xF65D'564B'01'000'000ui64; + // FG SwapChain VK, version 1.1.2 + return 0xF65D'564B'01'001'002ui64; } const char* ffxProvider_FrameGenerationSwapChain_VK::GetVersionName() const { - return "1.0"; + return "1.1.2"; } inline VkQueueInfoFFX convertQueueInfo(VkQueueInfoFFXAPI queueInfo) @@ -125,6 +125,21 @@ ffxReturnCode_t ffxProvider_FrameGenerationSwapChain_VK::CreateContext(ffxContex internal_context->frameInterpolationInfo.imageAcquireQueue = convertQueueInfo(desc->imageAcquireQueue); internal_context->frameInterpolationInfo.pAllocator = desc->allocator; + // set the default values + internal_context->frameInterpolationInfo.compositionMode = VK_COMPOSITION_MODE_NOT_FORCED_FFX; + + // get the extensions + for (auto it = header->pNext; it != nullptr; it = it->pNext) + { + if (auto mode = ffx::DynamicCast(it)) + { + if (mode->composeOnPresentQueue) + internal_context->frameInterpolationInfo.compositionMode = VK_COMPOSITION_MODE_PRESENT_QUEUE_FFX; + else + internal_context->frameInterpolationInfo.compositionMode = VK_COMPOSITION_MODE_GAME_QUEUE_FFX; + } + } + FfxSwapchain swapChain = ffxGetSwapchainVK(*desc->swapchain); TRY2(ffxReplaceSwapchainForFrameinterpolationVK(desc->gameQueue.queue, swapChain, &desc->createInfo, &internal_context->frameInterpolationInfo)); internal_context->fiSwapChain = *desc->swapchain = ffxGetVKSwapchain(swapChain); @@ -217,6 +232,11 @@ ffxReturnCode_t ffxProvider_FrameGenerationSwapChain_VK::Query(ffxContext* conte return FFX_API_RETURN_OK; } + else if (auto desc = ffx::DynamicCast(header)) + { + TRY2(ffxFrameInterpolationSwapchainGetGpuMemoryUsageVK(ffxGetSwapchainVK(internal_context->fiSwapChain), reinterpret_cast (desc->gpuMemoryUsageFrameGenerationSwapchain))); + return FFX_API_RETURN_OK; + } else if (auto desc = ffx::DynamicCast(header)) { desc->pOutCreateSwapchainFFXAPI = vkCreateSwapchainFFXAPI; diff --git a/framework/cauldron/framework/config/cauldronconfig.json b/framework/cauldron/framework/config/cauldronconfig.json index c07938bd..d1666d2a 100644 --- a/framework/cauldron/framework/config/cauldronconfig.json +++ b/framework/cauldron/framework/config/cauldronconfig.json @@ -34,12 +34,22 @@ "Format": "D32_FLOAT" }, "DistortionField0": { - "Format": "RGBA16_FLOAT", + "Format": "RG16_FLOAT", "AllowUAV": true, "RenderResolution": false }, "DistortionField1": { - "Format": "RGBA16_FLOAT", + "Format": "RG16_FLOAT", + "AllowUAV": true, + "RenderResolution": false + }, + "HudlessTarget0": { + "Format": "RGBA8_UNORM", + "AllowUAV": true, + "RenderResolution": false + }, + "HudlessTarget1": { + "Format": "RGBA8_UNORM", "AllowUAV": true, "RenderResolution": false } diff --git a/framework/cauldron/framework/inc/core/backend_interface.h b/framework/cauldron/framework/inc/core/backend_interface.h index 212dccd0..eccaf9b7 100644 --- a/framework/cauldron/framework/inc/core/backend_interface.h +++ b/framework/cauldron/framework/inc/core/backend_interface.h @@ -214,6 +214,8 @@ static cauldron::ResourceFormat GetFrameworkSurfaceFormat(FfxSurfaceFormat forma return cauldron::ResourceFormat::R32_FLOAT; case FFX_SURFACE_FORMAT_R10G10B10A2_UNORM: return cauldron::ResourceFormat::RGB10A2_UNORM; + case FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS: + return cauldron::ResourceFormat::RGB10A2_TYPELESS; default: cauldron::CauldronCritical(L"FFXInterface: Framework: Unsupported format requested. Please implement."); return cauldron::ResourceFormat::Unknown; @@ -314,8 +316,14 @@ static cauldron::ResourceFormat GetFrameworkSurfaceFormatApi(uint32_t format) return cauldron::ResourceFormat::RGBA32_TYPELESS; case FFX_API_SURFACE_FORMAT_R32G32B32A32_FLOAT: return cauldron::ResourceFormat::RGBA32_FLOAT; + case FFX_API_SURFACE_FORMAT_R32G32B32_FLOAT: + return cauldron::ResourceFormat::RGB32_FLOAT; + case FFX_API_SURFACE_FORMAT_R16G16B16A16_TYPELESS: + return cauldron::ResourceFormat::RGBA16_TYPELESS; case FFX_API_SURFACE_FORMAT_R16G16B16A16_FLOAT: return cauldron::ResourceFormat::RGBA16_FLOAT; + case FFX_API_SURFACE_FORMAT_R32G32_TYPELESS: + return cauldron::ResourceFormat::RG32_TYPELESS; case FFX_API_SURFACE_FORMAT_R32G32_FLOAT: return cauldron::ResourceFormat::RG32_FLOAT; case FFX_API_SURFACE_FORMAT_R32_UINT: @@ -338,12 +346,16 @@ static cauldron::ResourceFormat GetFrameworkSurfaceFormatApi(uint32_t format) return cauldron::ResourceFormat::RG11B10_FLOAT; case FFX_API_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP: return cauldron::ResourceFormat::RGB9E5_SHAREDEXP; + case FFX_API_SURFACE_FORMAT_R16G16_TYPELESS: + return cauldron::ResourceFormat::RG16_TYPELESS; case FFX_API_SURFACE_FORMAT_R16G16_FLOAT: return cauldron::ResourceFormat::RG16_FLOAT; case FFX_API_SURFACE_FORMAT_R16G16_UINT: return cauldron::ResourceFormat::RG16_UINT; case FFX_API_SURFACE_FORMAT_R16G16_SINT: return cauldron::ResourceFormat::RG16_SINT; + case FFX_API_SURFACE_FORMAT_R16_TYPELESS: + return cauldron::ResourceFormat::R16_TYPELESS; case FFX_API_SURFACE_FORMAT_R16_FLOAT: return cauldron::ResourceFormat::R16_FLOAT; case FFX_API_SURFACE_FORMAT_R16_UINT: @@ -352,14 +364,22 @@ static cauldron::ResourceFormat GetFrameworkSurfaceFormatApi(uint32_t format) return cauldron::ResourceFormat::R16_UNORM; case FFX_API_SURFACE_FORMAT_R16_SNORM: return cauldron::ResourceFormat::R16_SNORM; + case FFX_API_SURFACE_FORMAT_R8_TYPELESS: + return cauldron::ResourceFormat::R8_TYPELESS; case FFX_API_SURFACE_FORMAT_R8_UNORM: return cauldron::ResourceFormat::R8_UNORM; case FFX_API_SURFACE_FORMAT_R8_UINT: return cauldron::ResourceFormat::R8_UINT; + case FFX_API_SURFACE_FORMAT_R8G8_TYPELESS: + return cauldron::ResourceFormat::RG8_TYPELESS; case FFX_API_SURFACE_FORMAT_R8G8_UNORM: return cauldron::ResourceFormat::RG8_UNORM; + case FFX_API_SURFACE_FORMAT_R32_TYPELESS: + return cauldron::ResourceFormat::R32_TYPELESS; case FFX_API_SURFACE_FORMAT_R32_FLOAT: return cauldron::ResourceFormat::R32_FLOAT; + case FFX_API_SURFACE_FORMAT_R10G10B10A2_TYPELESS: + return cauldron::ResourceFormat::RGB10A2_TYPELESS; case FFX_API_SURFACE_FORMAT_R10G10B10A2_UNORM: return cauldron::ResourceFormat::RGB10A2_UNORM; default: @@ -435,10 +455,14 @@ inline FfxSurfaceFormat GetFfxSurfaceFormat(cauldron::ResourceFormat format) return FFX_SURFACE_FORMAT_R32G32B32A32_UINT; case (cauldron::ResourceFormat::RGBA32_FLOAT): return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT; + case (cauldron::ResourceFormat::RGBA16_TYPELESS): + return FFX_SURFACE_FORMAT_R16G16B16A16_TYPELESS; case (cauldron::ResourceFormat::RGBA16_FLOAT): return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT; case (cauldron::ResourceFormat::RGB32_FLOAT): return FFX_SURFACE_FORMAT_R32G32B32_FLOAT; + case (cauldron::ResourceFormat::RG32_TYPELESS): + return FFX_SURFACE_FORMAT_R32G32_TYPELESS; case (cauldron::ResourceFormat::RG32_FLOAT): return FFX_SURFACE_FORMAT_R32G32_FLOAT; case (cauldron::ResourceFormat::R8_UINT): @@ -465,12 +489,18 @@ inline FfxSurfaceFormat GetFfxSurfaceFormat(cauldron::ResourceFormat format) return FFX_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP; case (cauldron::ResourceFormat::RGB10A2_UNORM): return FFX_SURFACE_FORMAT_R10G10B10A2_UNORM; + case (cauldron::ResourceFormat::RGB10A2_TYPELESS): + return FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS; + case (cauldron::ResourceFormat::RG16_TYPELESS): + return FFX_SURFACE_FORMAT_R16G16_TYPELESS; case (cauldron::ResourceFormat::RG16_FLOAT): return FFX_SURFACE_FORMAT_R16G16_FLOAT; case (cauldron::ResourceFormat::RG16_UINT): return FFX_SURFACE_FORMAT_R16G16_UINT; case (cauldron::ResourceFormat::RG16_SINT): return FFX_SURFACE_FORMAT_R16G16_SINT; + case (cauldron::ResourceFormat::R16_TYPELESS): + return FFX_SURFACE_FORMAT_R16_TYPELESS; case (cauldron::ResourceFormat::R16_FLOAT): return FFX_SURFACE_FORMAT_R16_FLOAT; case (cauldron::ResourceFormat::R16_UINT): @@ -479,12 +509,18 @@ inline FfxSurfaceFormat GetFfxSurfaceFormat(cauldron::ResourceFormat format) return FFX_SURFACE_FORMAT_R16_UNORM; case (cauldron::ResourceFormat::R16_SNORM): return FFX_SURFACE_FORMAT_R16_SNORM; + case (cauldron::ResourceFormat::R8_TYPELESS): + return FFX_SURFACE_FORMAT_R8_TYPELESS; case (cauldron::ResourceFormat::R8_UNORM): return FFX_SURFACE_FORMAT_R8_UNORM; + case cauldron::ResourceFormat::RG8_TYPELESS: + return FFX_SURFACE_FORMAT_R8G8_TYPELESS; case cauldron::ResourceFormat::RG8_UNORM: return FFX_SURFACE_FORMAT_R8G8_UNORM; case cauldron::ResourceFormat::RG8_UINT: return FFX_SURFACE_FORMAT_R8G8_UINT; + case cauldron::ResourceFormat::R32_TYPELESS: + return FFX_SURFACE_FORMAT_R32_TYPELESS; case cauldron::ResourceFormat::R32_FLOAT: case cauldron::ResourceFormat::D32_FLOAT: return FFX_SURFACE_FORMAT_R32_FLOAT; diff --git a/framework/cauldron/framework/inc/core/framework.h b/framework/cauldron/framework/inc/core/framework.h index b64cbd47..5893f03c 100644 --- a/framework/cauldron/framework/inc/core/framework.h +++ b/framework/cauldron/framework/inc/core/framework.h @@ -187,6 +187,8 @@ namespace cauldron // DisplayMode DisplayMode CurrentDisplayMode = DisplayMode::DISPLAYMODE_LDR; + ResourceFormat SwapChainFormat = ResourceFormat::Unknown; + // Requested minimum shader model ShaderModel MinShaderModel = ShaderModel::SM5_1; diff --git a/framework/cauldron/framework/inc/core/uimanager.h b/framework/cauldron/framework/inc/core/uimanager.h index a9e2760b..56525e9c 100644 --- a/framework/cauldron/framework/inc/core/uimanager.h +++ b/framework/cauldron/framework/inc/core/uimanager.h @@ -225,6 +225,15 @@ namespace cauldron , m_Callback(std::move(callback)) {} + /** + * @brief Constructor. Creates the UI button. + */ + UIButton(uint64_t id, const char* text, std::function callback, bool shown = true, bool sameLine = false) + : UIElement(id, UIElementType::Button, text, AlwaysEnable, shown, sameLine) + , m_Callback(std::move(callback)) + { + } + /** * @brief Destructor. Destroys the UI button. */ diff --git a/framework/cauldron/framework/inc/render/gpuresource.h b/framework/cauldron/framework/inc/render/gpuresource.h index d9d7f779..e0c47420 100644 --- a/framework/cauldron/framework/inc/render/gpuresource.h +++ b/framework/cauldron/framework/inc/render/gpuresource.h @@ -43,6 +43,7 @@ namespace cauldron bool IsSRGB(ResourceFormat format); bool IsDepth(ResourceFormat format); + ResourceFormat ToGamma(ResourceFormat format); ResourceFormat FromGamma(ResourceFormat format); uint32_t GetResourceFormatStride(ResourceFormat format); diff --git a/framework/cauldron/framework/inc/render/renderdefines.h b/framework/cauldron/framework/inc/render/renderdefines.h index 7cdc6d8f..cceb2997 100644 --- a/framework/cauldron/framework/inc/render/renderdefines.h +++ b/framework/cauldron/framework/inc/render/renderdefines.h @@ -212,16 +212,19 @@ namespace cauldron // 8-bit R8_SINT, ///< Single-component (R) 8-bit (signed int) type. R8_UINT, ///< Single-component (R) 8-bit (unsigned int) type. + R8_TYPELESS, ///< Single-component (R) 8-bit (typeless) type. R8_UNORM, ///< Single-component (R) 8-bit (unsigned normalized) type. // 16-bit R16_SINT, ///< Single-component (R) 16-bit (signed int) type. R16_UINT, ///< Single-component (R) 16-bit (unsigned int) type. + R16_TYPELESS, ///< Single-component (R) 16-bit (typeless) type. R16_FLOAT, ///< Single-component (R) 16-bit (floating point) type. R16_UNORM, ///< Single-component (R) 16-bit (unsigned normalized) type. R16_SNORM, ///< Single-component (R) 16-bit (signed normalized) type. RG8_SINT, ///< 2-component (RG) 16-bit (signed int) type. RG8_UINT, ///< 2-component (RG) 16-bit (unsigned int) type. + RG8_TYPELESS, ///< 2-component (RG) 16-bit (typeless) type. RG8_UNORM, ///< 2-component (RG) 16-bit (unsigned normalized) type. // 32-bit @@ -238,12 +241,15 @@ namespace cauldron BGRA8_SRGB, ///< 4-Component (BGRA) 32-bit (SRGB) type. RGBA8_TYPELESS, ///< 4-Component (RGBA) 32-bit (typeless) type. BGRA8_TYPELESS, ///< 4-Component (BGRA) 32-bit (typeless) type. + RGB10A2_TYPELESS, ///< 4-Component (RGBA) 32-bit (typeless) type. RGB10A2_UNORM, ///< 4-Component (RGBA) 32-bit (unsigned normalized) type. RG11B10_FLOAT, ///< 3-Component (RGB) 32-bit (floating point) type. RGB9E5_SHAREDEXP, ///< 3-Component (RGB) 32-bit (floating point) type. RG16_SINT, ///< 2-Component (RG) 32-bit (signed int) type. RG16_UINT, ///< 2-Component (RG) 32-bit (unsigned int) type. + RG16_TYPELESS, ///< 2-Component (R) 32-bit (typeless) type. RG16_FLOAT, ///< 2-Component (R) 32-bit (floating point) type. + R32_TYPELESS, ///< Single-Component (R) 32-bit (typeless) type. R32_FLOAT, ///< Single-Component (R) 32-bit (floating point) type. // 64-bit @@ -251,9 +257,11 @@ namespace cauldron RGBA16_UINT, ///< 4-Component (RGBA) 64-bit (unsigned int) type. RGBA16_UNORM, ///< 4-Component (RGBA) 64-bit (unsigned normalized) type. RGBA16_SNORM, ///< 4-Component (RGBA) 64-bit (signed normalized) type. + RGBA16_TYPELESS, ///< 4-Component (RGBA) 64-bit (typeless) type. RGBA16_FLOAT, ///< 4-Component (RGBA) 64-bit (floating point) type. RG32_SINT, ///< 2-Component (RG) 64-bit (signed int) type. RG32_UINT, ///< 2-Component (RG) 64-bit (unsigned int) type. + RG32_TYPELESS, ///< 2-Component (RG) 64-bit (typeless) type. RG32_FLOAT, ///< 2-Component (RG) 64-bit (floating point) type. // 96-bit diff --git a/framework/cauldron/framework/inc/shaders/tonemapping/builddistortionfield.hlsl b/framework/cauldron/framework/inc/shaders/tonemapping/builddistortionfield.hlsl index 5dfbd8c0..6cabaca4 100644 --- a/framework/cauldron/framework/inc/shaders/tonemapping/builddistortionfield.hlsl +++ b/framework/cauldron/framework/inc/shaders/tonemapping/builddistortionfield.hlsl @@ -26,7 +26,7 @@ //-------------------------------------------------------------------------------------- // Texture definitions //-------------------------------------------------------------------------------------- -RWTexture2D OutputTexture : register(u0); +RWTexture2D OutputTexture : register(u0); bool IsInsideLetterbox(int2 pixel) { @@ -46,7 +46,7 @@ void MainCS(uint3 dtID : SV_DispatchThreadID) { const uint2 pixel = dtID.xy; - float4 distortionField = float4(0.0f, 0.0f, 0.0f, 0.0f); + float2 distortionField = float2(0.0f, 0.0f); if (IsInsideLetterbox(pixel)) { float2 uv = (pixel + 0.5f) / LetterboxRectSize; diff --git a/framework/cauldron/framework/inc/shaders/tonemapping/lensdistortion.h b/framework/cauldron/framework/inc/shaders/tonemapping/lensdistortion.h index cbdd558d..acd1c02a 100644 --- a/framework/cauldron/framework/inc/shaders/tonemapping/lensdistortion.h +++ b/framework/cauldron/framework/inc/shaders/tonemapping/lensdistortion.h @@ -58,11 +58,10 @@ float2 InverseZoom(in float2 Uv) return (translatedCoord + 1.0f) / 2.0f; } -float4 GenerateDistortionField(in float2 Uv) +float2 GenerateDistortionField(in float2 Uv) { float2 xy = Zoom(BarrelDistortion(Uv)) - Uv; - float2 zw = InverseBarrelDistortion(InverseZoom(Uv)) - Uv; - return float4(xy, zw); + return xy; } float2 ApplyLensDistortion(in float2 Uv) diff --git a/framework/cauldron/framework/src/core/backend_implementations/backend_interface_vk.cpp b/framework/cauldron/framework/src/core/backend_implementations/backend_interface_vk.cpp index 433602f1..5f937a75 100644 --- a/framework/cauldron/framework/src/core/backend_implementations/backend_interface_vk.cpp +++ b/framework/cauldron/framework/src/core/backend_implementations/backend_interface_vk.cpp @@ -206,23 +206,6 @@ FfxResource ffxGetFrameinterpolationTexture(FfxSwapchain ffxSwapChain) return s_pFfxGetFrameinterpolationTextureFunc(ffxSwapChain); } -VkResult GraphicsQueueSubmit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) -{ - return cauldron::GetDevice()->GetImpl()->SubmitPassthrough(cauldron::RequestedQueue::Graphics, submitCount, pSubmits, fence); -} -VkResult AsyncComputeQueueSubmit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) -{ - return cauldron::GetDevice()->GetImpl()->SubmitPassthrough(cauldron::RequestedQueue::FIAsyncCompute, submitCount, pSubmits, fence); -} -VkResult PresentQueueSubmit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) -{ - return cauldron::GetDevice()->GetImpl()->SubmitPassthrough(cauldron::RequestedQueue::FIPresent, submitCount, pSubmits, fence); -} -VkResult ImageAcquireQueueSubmit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) -{ - return cauldron::GetDevice()->GetImpl()->SubmitPassthrough(cauldron::RequestedQueue::FIImageAcquire, submitCount, pSubmits, fence); -} - void ffxSetupFrameInterpolationSwapChain() { CAULDRON_ASSERT(s_pFfxGetSwapchainFunc); @@ -231,6 +214,12 @@ void ffxSetupFrameInterpolationSwapChain() CAULDRON_ASSERT(s_pFfxGetVKSwapchainFunc); CAULDRON_ASSERT(s_pFfxGetSwapchainReplacementFunctionsFunc); + const cauldron::FIQueue* pAsyncComputeQueue = cauldron::GetDevice()->GetImpl()->GetFIAsyncComputeQueue(); + const cauldron::FIQueue* pPresentQueue = cauldron::GetDevice()->GetImpl()->GetFIPresentQueue(); + const cauldron::FIQueue* pImageAcquireQueue = cauldron::GetDevice()->GetImpl()->GetFIImageAcquireQueue(); + cauldron::CauldronAssert(cauldron::ASSERT_CRITICAL, pPresentQueue->queue != VK_NULL_HANDLE, L"Cannot create FI Swapchain because there is no present queue."); + cauldron::CauldronAssert(cauldron::ASSERT_CRITICAL, pImageAcquireQueue->queue != VK_NULL_HANDLE, L"Cannot create FI Swapchain because there is no image acquire queue."); + // Create frameinterpolation swapchain cauldron::SwapChain* pSwapchain = cauldron::GetFramework()->GetSwapChain(); FfxSwapchain ffxSwapChain = s_pFfxGetSwapchainFunc(pSwapchain->GetImpl()->VKSwapChain()); @@ -245,27 +234,20 @@ void ffxSetupFrameInterpolationSwapChain() frameInterpolationInfo.physicalDevice = cauldron::GetDevice()->GetImpl()->VKPhysicalDevice(); frameInterpolationInfo.pAllocator = nullptr; frameInterpolationInfo.gameQueue.queue = cauldron::GetDevice()->GetImpl()->VKCmdQueue(cauldron::CommandQueue::Graphics); - frameInterpolationInfo.gameQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::Graphics]; + frameInterpolationInfo.gameQueue.familyIndex = cauldron::GetDevice()->GetImpl()->VKCmdQueueFamily(cauldron::CommandQueue::Graphics); frameInterpolationInfo.gameQueue.submitFunc = nullptr; // this queue is only used in vkQueuePresentKHR, hence doesn't need a callback - frameInterpolationInfo.asyncComputeQueue.queue = cauldron::GetDevice()->GetImpl()->GetFIAsyncComputeQueue()->queue; - frameInterpolationInfo.asyncComputeQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::FIAsyncCompute]; - if (cauldron::GetDevice()->GetImpl()->GetFIAsyncComputeQueue()->shared) - frameInterpolationInfo.asyncComputeQueue.submitFunc = AsyncComputeQueueSubmit; - else - frameInterpolationInfo.asyncComputeQueue.submitFunc = nullptr; - frameInterpolationInfo.presentQueue.queue = cauldron::GetDevice()->GetImpl()->GetFIPresentQueue()->queue; - frameInterpolationInfo.presentQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::FIPresent]; - if (cauldron::GetDevice()->GetImpl()->GetFIPresentQueue()->shared) - frameInterpolationInfo.presentQueue.submitFunc = PresentQueueSubmit; - else - frameInterpolationInfo.presentQueue.submitFunc = nullptr; - frameInterpolationInfo.imageAcquireQueue.queue = cauldron::GetDevice()->GetImpl()->GetFIImageAcquireQueue()->queue; - frameInterpolationInfo.imageAcquireQueue.familyIndex = - cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::FIImageAcquire]; - if (cauldron::GetDevice()->GetImpl()->GetFIImageAcquireQueue()->shared) - frameInterpolationInfo.imageAcquireQueue.submitFunc = ImageAcquireQueueSubmit; - else - frameInterpolationInfo.imageAcquireQueue.submitFunc = nullptr; + + frameInterpolationInfo.asyncComputeQueue.queue = pAsyncComputeQueue->queue; + frameInterpolationInfo.asyncComputeQueue.familyIndex = pAsyncComputeQueue->family; + frameInterpolationInfo.asyncComputeQueue.submitFunc = nullptr; + + frameInterpolationInfo.presentQueue.queue = pPresentQueue->queue; + frameInterpolationInfo.presentQueue.familyIndex = pPresentQueue->family; + frameInterpolationInfo.presentQueue.submitFunc = nullptr; + + frameInterpolationInfo.imageAcquireQueue.queue = pImageAcquireQueue->queue; + frameInterpolationInfo.imageAcquireQueue.familyIndex = pImageAcquireQueue->family; + frameInterpolationInfo.imageAcquireQueue.submitFunc = nullptr; VkSwapchainCreateInfoKHR createInfo = *cauldron::GetFramework()->GetSwapChain()->GetImpl()->GetCreateInfo(); FfxErrorCode errorCode = s_pFfxReplaceSwapchainForFrameinterpolationFunc(ffxGameQueue, ffxSwapChain, &createInfo, &frameInterpolationInfo); diff --git a/framework/cauldron/framework/src/core/framework.cpp b/framework/cauldron/framework/src/core/framework.cpp index fa7b870c..571f4905 100644 --- a/framework/cauldron/framework/src/core/framework.cpp +++ b/framework/cauldron/framework/src/core/framework.cpp @@ -88,24 +88,51 @@ namespace cauldron NLOHMANN_JSON_SERIALIZE_ENUM(cauldron::ResourceFormat, { {ResourceFormat::Unknown, "Unknown"}, + {ResourceFormat::R8_TYPELESS, "R8_TYPELESS"}, {ResourceFormat::R8_UNORM, "R8_UNORM"}, {ResourceFormat::R8_UINT, "R8_UINT"}, + + // 16-bit + {ResourceFormat::R16_TYPELESS, "R16_TYPELESS"}, {ResourceFormat::R16_FLOAT, "R16_FLOAT"}, + {ResourceFormat::RG8_TYPELESS, "RG8_TYPELESS"}, + {ResourceFormat::RG8_UNORM, "RG8_UNORM"}, + + + // 32-bit {ResourceFormat::RGBA8_UNORM, "RGBA8_UNORM"}, + {ResourceFormat::BGRA8_UNORM, "BGRA8_UNORM"}, {ResourceFormat::RGBA8_SNORM, "RGBA8_SNORM"}, {ResourceFormat::RGBA8_SRGB, "RGBA8_SRGB"}, + {ResourceFormat::BGRA8_SRGB, "BGRA8_SRGB"}, + {ResourceFormat::RGBA8_TYPELESS, "RGBA8_TYPELESS"}, + {ResourceFormat::BGRA8_TYPELESS, "BGRA8_TYPELESS"}, + {ResourceFormat::RGB10A2_TYPELESS, "RGB10A2_TYPELESS"}, {ResourceFormat::RGB10A2_UNORM, "RGB10A2_UNORM"}, {ResourceFormat::RG11B10_FLOAT, "RG11B10_FLOAT"}, {ResourceFormat::RGB9E5_SHAREDEXP, "RGB9E5_SHAREDEXP"}, + {ResourceFormat::RG16_TYPELESS, "RG16_TYPELESS"}, {ResourceFormat::RG16_FLOAT, "RG16_FLOAT"}, + {ResourceFormat::R32_TYPELESS, "R32_TYPELESS"}, {ResourceFormat::R32_FLOAT, "R32_FLOAT"}, + + // 64-bit {ResourceFormat::RGBA16_UNORM, "RGBA16_UNORM"}, - {ResourceFormat::RGBA16_SNORM, "RGBA16_SNORM"}, + {ResourceFormat::RGBA16_TYPELESS, "RGBA16_TYPELESS"}, {ResourceFormat::RGBA16_FLOAT, "RGBA16_FLOAT"}, + {ResourceFormat::RG32_TYPELESS, "RG32_TYPELESS"}, {ResourceFormat::RG32_FLOAT, "RG32_FLOAT"}, + + // 96-bit + {ResourceFormat::RGB32_FLOAT, "RGB32_FLOAT"}, + + //128-bit + {ResourceFormat::RGBA32_TYPELESS, "RGBA32_TYPELESS"}, {ResourceFormat::RGBA32_FLOAT, "RGBA32_FLOAT"}, + + //Depth {ResourceFormat::D16_UNORM, "D16_UNORM"}, - {ResourceFormat::D32_FLOAT, "D32_FLOAT"}, + {ResourceFormat::D32_FLOAT, "D32_FLOAT"} }) // map ShaderModel values to JSON as strings @@ -771,6 +798,8 @@ namespace cauldron m_Config.Width = presentationConfig.value("Width", m_Config.Width); m_Config.Height = presentationConfig.value("Height", m_Config.Height); m_Config.CurrentDisplayMode = presentationConfig.value("Mode", m_Config.CurrentDisplayMode); + if (presentationConfig.find("SwapchainFormat") != presentationConfig.end()) + m_Config.SwapChainFormat = presentationConfig.value("SwapchainFormat", m_Config.SwapChainFormat); } // Initialize allocation configuration @@ -1099,7 +1128,8 @@ namespace cauldron m_ResolutionInfo.DisplayHeight}; // Flush the GPU as this may have implications on resource creations - if (oldResolutionInfo.DisplayHeight != m_ResolutionInfo.DisplayHeight || oldResolutionInfo.DisplayWidth != m_ResolutionInfo.DisplayWidth) + if (oldResolutionInfo.DisplayHeight != m_ResolutionInfo.DisplayHeight || oldResolutionInfo.DisplayWidth != m_ResolutionInfo.DisplayWidth || + oldResolutionInfo.RenderHeight != m_ResolutionInfo.RenderHeight || oldResolutionInfo.RenderWidth != m_ResolutionInfo.RenderWidth) { ResizeEvent(); } @@ -2089,7 +2119,7 @@ namespace cauldron desc.Height = displayHeight; }; - std::vector uiTexNames = { L"SwapChainProxy",L"UITarget0", L"UITarget1", L"HudlessTarget0", L"HudlessTarget1" }; + std::vector uiTexNames = { L"SwapChainProxy", L"UITarget0", L"UITarget1" }; for (auto texName : uiTexNames) { uiTextureDesc.Name = texName; diff --git a/framework/cauldron/framework/src/render/dx12/device_dx12.h b/framework/cauldron/framework/src/render/dx12/device_dx12.h index 62685f12..e69aaac9 100644 --- a/framework/cauldron/framework/src/render/dx12/device_dx12.h +++ b/framework/cauldron/framework/src/render/dx12/device_dx12.h @@ -129,6 +129,7 @@ namespace cauldron MSComPtr m_pAdapter = nullptr; AMD::AntiLag2DX12::Context m_AntiLag2Context = {}; + }; } // namespace cauldron diff --git a/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.cpp b/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.cpp index 5a7778ba..24b99733 100644 --- a/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.cpp +++ b/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.cpp @@ -310,14 +310,14 @@ namespace cauldron static bool s_InvertedDepth = GetConfig()->InvertedDepth; D3D12_CLEAR_VALUE* pClearValue = NULL; D3D12_CLEAR_VALUE clearValue; - if (m_ResourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + if (m_ResourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL && !IsTypelessDXGIFormat(m_ResourceDesc.Format)) { clearValue.Format = m_ResourceDesc.Format; clearValue.DepthStencil.Depth = s_InvertedDepth ? 0.f : 1.0f; clearValue.DepthStencil.Stencil = 0; pClearValue = &clearValue; } - else if (m_ResourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) + else if (m_ResourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET && !IsTypelessDXGIFormat(m_ResourceDesc.Format)) { clearValue.Format = m_ResourceDesc.Format; clearValue.Color[0] = 0.0f; @@ -347,6 +347,8 @@ namespace cauldron return DXGI_FORMAT_R8_SINT; case ResourceFormat::R8_UINT: return DXGI_FORMAT_R8_UINT; + case ResourceFormat::R8_TYPELESS: + return DXGI_FORMAT_R8_TYPELESS; case ResourceFormat::R8_UNORM: return DXGI_FORMAT_R8_UNORM; @@ -355,6 +357,8 @@ namespace cauldron return DXGI_FORMAT_R16_SINT; case ResourceFormat::R16_UINT: return DXGI_FORMAT_R16_UINT; + case ResourceFormat::R16_TYPELESS: + return DXGI_FORMAT_R16_TYPELESS; case ResourceFormat::R16_FLOAT: return DXGI_FORMAT_R16_FLOAT; case ResourceFormat::R16_UNORM: @@ -365,6 +369,8 @@ namespace cauldron return DXGI_FORMAT_R8G8_SINT; case ResourceFormat::RG8_UINT: return DXGI_FORMAT_R8G8_UINT; + case ResourceFormat::RG8_TYPELESS: + return DXGI_FORMAT_R8G8_TYPELESS; case ResourceFormat::RG8_UNORM: return DXGI_FORMAT_R8G8_UNORM; @@ -383,8 +389,16 @@ namespace cauldron return DXGI_FORMAT_R8G8B8A8_SNORM; case ResourceFormat::RGBA8_SRGB: return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + case ResourceFormat::BGRA8_TYPELESS: + return DXGI_FORMAT_B8G8R8A8_TYPELESS; + case ResourceFormat::BGRA8_UNORM: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case ResourceFormat::BGRA8_SRGB: + return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB; case ResourceFormat::RGBA8_TYPELESS: return DXGI_FORMAT_R8G8B8A8_TYPELESS; + case ResourceFormat::RGB10A2_TYPELESS: + return DXGI_FORMAT_R10G10B10A2_TYPELESS; case ResourceFormat::RGB10A2_UNORM: return DXGI_FORMAT_R10G10B10A2_UNORM; case ResourceFormat::RG11B10_FLOAT: @@ -395,8 +409,12 @@ namespace cauldron return DXGI_FORMAT_R16G16_SINT; case ResourceFormat::RG16_UINT: return DXGI_FORMAT_R16G16_UINT; + case ResourceFormat::RG16_TYPELESS: + return DXGI_FORMAT_R16G16_TYPELESS; case ResourceFormat::RG16_FLOAT: return DXGI_FORMAT_R16G16_FLOAT; + case ResourceFormat::R32_TYPELESS: + return DXGI_FORMAT_R32_TYPELESS; case ResourceFormat::R32_FLOAT: return DXGI_FORMAT_R32_FLOAT; @@ -408,12 +426,16 @@ namespace cauldron return DXGI_FORMAT_R16G16B16A16_UINT; case ResourceFormat::RGBA16_SNORM: return DXGI_FORMAT_R16G16B16A16_SNORM; + case ResourceFormat::RGBA16_TYPELESS: + return DXGI_FORMAT_R16G16B16A16_TYPELESS; case ResourceFormat::RGBA16_FLOAT: return DXGI_FORMAT_R16G16B16A16_FLOAT; case ResourceFormat::RG32_SINT: return DXGI_FORMAT_R32G32_SINT; case ResourceFormat::RG32_UINT: return DXGI_FORMAT_R32G32_UINT; + case ResourceFormat::RG32_TYPELESS: + return DXGI_FORMAT_R32G32_TYPELESS; case ResourceFormat::RG32_FLOAT: return DXGI_FORMAT_R32G32_FLOAT; @@ -477,6 +499,39 @@ namespace cauldron } } + // Override TYPELESS resources to prevent device removal + DXGI_FORMAT ConvertTypelessDXGIFormat(DXGI_FORMAT format) + { + switch (format) + { + case DXGI_FORMAT_R8_TYPELESS: + return DXGI_FORMAT_R8_UNORM; + case DXGI_FORMAT_R16_TYPELESS: + return DXGI_FORMAT_R16_FLOAT; + case DXGI_FORMAT_R8G8_TYPELESS: + return DXGI_FORMAT_R8G8_UNORM; + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + return DXGI_FORMAT_R10G10B10A2_UNORM; + case DXGI_FORMAT_R16G16_TYPELESS: + return DXGI_FORMAT_R16G16_FLOAT; + case DXGI_FORMAT_R32_TYPELESS: + return DXGI_FORMAT_R32_FLOAT; + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case DXGI_FORMAT_R32G32_TYPELESS: + return DXGI_FORMAT_R32G32_FLOAT; + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + default: + return format; + } + + } + DXGI_FORMAT DXGIToGamma(DXGI_FORMAT format) { switch (format) @@ -640,7 +695,26 @@ namespace cauldron return resourceFlags; } - + bool IsTypelessDXGIFormat(DXGI_FORMAT format) + { + switch (format) + { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R32_TYPELESS: + return true; + default: + return false; + } + } } // namespace cauldron #endif // #if defined(_DX12) diff --git a/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.h b/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.h index edf54a7f..86621ec0 100644 --- a/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.h +++ b/framework/cauldron/framework/src/render/dx12/gpuresource_dx12.h @@ -32,11 +32,13 @@ namespace cauldron { DXGI_FORMAT GetDXGIFormat(ResourceFormat format); + DXGI_FORMAT ConvertTypelessDXGIFormat(DXGI_FORMAT format); DXGI_FORMAT DXGIToGamma(DXGI_FORMAT format); DXGI_FORMAT DXGIFromGamma(DXGI_FORMAT format); uint32_t GetDXGIFormatStride(ResourceFormat format); D3D12_RESOURCE_STATES GetDXResourceState(ResourceState state); D3D12_RESOURCE_FLAGS GetDXResourceFlags(ResourceFlags flags); + bool IsTypelessDXGIFormat(DXGI_FORMAT format); struct GPUResourceInitParams { diff --git a/framework/cauldron/framework/src/render/dx12/pipelinedesc_dx12.cpp b/framework/cauldron/framework/src/render/dx12/pipelinedesc_dx12.cpp index d15cd2f7..4ac0b4d3 100644 --- a/framework/cauldron/framework/src/render/dx12/pipelinedesc_dx12.cpp +++ b/framework/cauldron/framework/src/render/dx12/pipelinedesc_dx12.cpp @@ -370,7 +370,7 @@ namespace cauldron { m_PipelineImpl->m_GraphicsPipelineDesc.NumRenderTargets = static_cast(numColorFormats); for (uint32_t i = 0; i < numColorFormats; ++i) - m_PipelineImpl->m_GraphicsPipelineDesc.RTVFormats[i] = GetDXGIFormat(pColorFormats[i]); + m_PipelineImpl->m_GraphicsPipelineDesc.RTVFormats[i] = ConvertTypelessDXGIFormat(GetDXGIFormat(pColorFormats[i])); m_PipelineImpl->m_GraphicsPipelineDesc.DSVFormat = GetDXGIFormat(depthStencilFormat); } diff --git a/framework/cauldron/framework/src/render/dx12/resourceview_dx12.cpp b/framework/cauldron/framework/src/render/dx12/resourceview_dx12.cpp index 7096411c..d3b91453 100644 --- a/framework/cauldron/framework/src/render/dx12/resourceview_dx12.cpp +++ b/framework/cauldron/framework/src/render/dx12/resourceview_dx12.cpp @@ -84,7 +84,7 @@ namespace cauldron D3D12_RESOURCE_DESC renderTargetDesc = pResource->GetImpl()->DX12Desc(); D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; - rtvDesc.Format = GetDXGIFormat(textureDesc.Format); + rtvDesc.Format = ConvertTypelessDXGIFormat(GetDXGIFormat(textureDesc.Format)); if (renderTargetDesc.SampleDesc.Count == 1) { @@ -202,23 +202,21 @@ namespace cauldron D3D12_RESOURCE_DESC resourceDesc = pResource->GetImpl()->DX12Desc(); // use the format from the TextureDesc to allow overriding it, e.g. for reading SRGB surfaces - resourceDesc.Format = GetDXGIFormat(textureDesc.Format); + resourceDesc.Format = ConvertTypelessDXGIFormat(GetDXGIFormat(textureDesc.Format)); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - // Override TYPELESS resources to prevent device removal switch (resourceDesc.Format) { - case DXGI_FORMAT_D32_FLOAT: - case DXGI_FORMAT_R32_TYPELESS: srvDesc.Format = DXGI_FORMAT_R32_FLOAT; - break; - case DXGI_FORMAT_R16_TYPELESS: srvDesc.Format = DXGI_FORMAT_R16_FLOAT; - break; - case DXGI_FORMAT_D16_UNORM: srvDesc.Format = DXGI_FORMAT_R16_UNORM; - break; - default: - srvDesc.Format = resourceDesc.Format; - break; + case DXGI_FORMAT_D32_FLOAT: + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + break; + case DXGI_FORMAT_D16_UNORM: + srvDesc.Format = DXGI_FORMAT_R16_UNORM; + break; + default: + srvDesc.Format = resourceDesc.Format; + break; } switch (dimension) @@ -286,14 +284,15 @@ namespace cauldron D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; // Override TYPELESS resources to prevent device removal + resourceDesc.Format = ConvertTypelessDXGIFormat(resourceDesc.Format); + switch (resourceDesc.Format) { case DXGI_FORMAT_D32_FLOAT: - case DXGI_FORMAT_R32_TYPELESS: uavDesc.Format = DXGI_FORMAT_R32_FLOAT; - break; - case DXGI_FORMAT_R16_TYPELESS: uavDesc.Format = DXGI_FORMAT_R16_FLOAT; + uavDesc.Format = DXGI_FORMAT_R32_FLOAT; break; - case DXGI_FORMAT_D16_UNORM: uavDesc.Format = DXGI_FORMAT_R16_UNORM; + case DXGI_FORMAT_D16_UNORM: + uavDesc.Format = DXGI_FORMAT_R16_UNORM; break; default: // sRGB format aren't allowed for UAV diff --git a/framework/cauldron/framework/src/render/dx12/swapchain_dx12.cpp b/framework/cauldron/framework/src/render/dx12/swapchain_dx12.cpp index b308c8f9..7e4ab194 100644 --- a/framework/cauldron/framework/src/render/dx12/swapchain_dx12.cpp +++ b/framework/cauldron/framework/src/render/dx12/swapchain_dx12.cpp @@ -71,6 +71,24 @@ namespace cauldron // Set format based on display mode m_SwapChainFormat = GetFormat(m_CurrentDisplayMode); + // If config file provides a swapchainformat override, try to use it + if (pConfig->SwapChainFormat != ResourceFormat::Unknown && pConfig->SwapChainFormat != m_SwapChainFormat) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS FeatureData; + ZeroMemory(&FeatureData, sizeof(FeatureData)); + DXGI_FORMAT requestedSwapchainFormat = GetDXGIFormat(pConfig->SwapChainFormat); + D3D12_FEATURE_DATA_FORMAT_SUPPORT FormatSupport = {requestedSwapchainFormat, D3D12_FORMAT_SUPPORT1_NONE, D3D12_FORMAT_SUPPORT2_NONE}; + HRESULT hr = GetDevice()->GetImpl()->DX12Device()->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &FormatSupport, sizeof(FormatSupport)); + if (SUCCEEDED(hr) && (FormatSupport.Support1 & D3D12_FORMAT_SUPPORT1_DISPLAY) != 0) + { + m_SwapChainFormat = pConfig->SwapChainFormat; + } + else + { + CauldronWarning(L"The requested swapchain format from the config file cannot be used for present/display. Override is ignored."); + } + } + // Set primaries based on display mode PopulateHDRMetadataBasedOnDisplayMode(); diff --git a/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.cpp b/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.cpp index 35337993..e75f7370 100644 --- a/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.cpp +++ b/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.cpp @@ -32,6 +32,8 @@ #include "render/color_conversion.h" #include "render/swapchain.h" #include "render/dynamicresourcepool.h" +#include "render/rasterview.h" + #define FFX_CPU #include @@ -67,6 +69,9 @@ void ToneMappingRenderModule::Init(const json& InitData) m_pDistortionField[0] = GetFramework()->GetRenderTexture(L"DistortionField0"); m_pDistortionField[1] = GetFramework()->GetRenderTexture(L"DistortionField1"); + m_pDistortionFieldRasterView[0] = GetRasterViewAllocator()->RequestRasterView(m_pDistortionField[0], ViewDimension::Texture2D); + m_pDistortionFieldRasterView[1] = GetRasterViewAllocator()->RequestRasterView(m_pDistortionField[1], ViewDimension::Texture2D); + TextureDesc desc = TextureDesc::Tex2D(L"AutomaticExposureSpdAtomicCounter", ResourceFormat::R32_UINT, 1, 1, 1, 1, ResourceFlags::AllowUnorderedAccess); m_pAutomaticExposureSpdAtomicCounter = GetDynamicResourcePool()->CreateRenderTexture(&desc); @@ -337,6 +342,33 @@ void ToneMappingRenderModule::Execute(double deltaTime, CommandList* pCmdList) ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource); ResourceBarrier(pCmdList, 1, &barrier); } + + if (shouldClearRenderTargets) + { + GPUScopedProfileCapture distortionFieldMarker(pCmdList, L"Clear Distortion Field"); + std::vector barriers; + barriers.push_back(Barrier::Transition(m_pDistortionField[0]->GetResource(), + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, + ResourceState::RenderTargetResource)); + barriers.push_back(Barrier::Transition(m_pDistortionField[1]->GetResource(), + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, + ResourceState::RenderTargetResource)); + ResourceBarrier(pCmdList, static_cast (barriers.size()), barriers.data()); + + float clearColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + ClearRenderTarget(pCmdList, &m_pDistortionFieldRasterView[0]->GetResourceView(), clearColor); + ClearRenderTarget(pCmdList, &m_pDistortionFieldRasterView[1]->GetResourceView(), clearColor); + shouldClearRenderTargets = false; + + barriers.clear(); + barriers.push_back(Barrier::Transition(m_pDistortionField[0]->GetResource(), + ResourceState::RenderTargetResource, + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource)); + barriers.push_back(Barrier::Transition(m_pDistortionField[1]->GetResource(), + ResourceState::RenderTargetResource, + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource)); + ResourceBarrier(pCmdList, static_cast (barriers.size()), barriers.data()); + } if (m_TonemapperConstantData.LensDistortionEnabled) { @@ -373,3 +405,10 @@ void ToneMappingRenderModule::SetDoubleBufferedTextureIndex(uint32_t textureInde { m_curDoubleBufferedTextureIndex = textureIndex; } + +void ToneMappingRenderModule::OnResize(const cauldron::ResolutionInfo& resInfo) +{ + if (!ModuleEnabled()) + return; + shouldClearRenderTargets = true; +} diff --git a/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.h b/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.h index 6839546a..929a4760 100644 --- a/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.h +++ b/framework/cauldron/framework/src/render/rendermodules/tonemapping/tonemappingrendermodule.h @@ -49,6 +49,11 @@ class ToneMappingRenderModule : public cauldron::RenderModule virtual void Execute(double deltaTime, cauldron::CommandList* pCmdList) override; void SetDoubleBufferedTextureIndex(uint32_t textureIndex); + + /** + * @brief ClearRenderTarget() on render targets that may not be written to before being read. Called by the framework when the resolution changes. + */ + void OnResize(const cauldron::ResolutionInfo& resInfo) override; private: // No copy, No move NO_COPY(ToneMappingRenderModule) @@ -84,6 +89,9 @@ class ToneMappingRenderModule : public cauldron::RenderModule const cauldron::Texture* m_pRenderTargetIn = nullptr; const cauldron::Texture* m_pRenderTargetOut = nullptr; const cauldron::Texture* m_pDistortionField[2] = {}; + const cauldron::RasterView* m_pDistortionFieldRasterView[2] = {}; uint32_t m_curDoubleBufferedTextureIndex = 0; + + bool shouldClearRenderTargets = true; }; diff --git a/framework/cauldron/framework/src/render/vk/buffer_vk.cpp b/framework/cauldron/framework/src/render/vk/buffer_vk.cpp index 67ff3e81..39eb1242 100644 --- a/framework/cauldron/framework/src/render/vk/buffer_vk.cpp +++ b/framework/cauldron/framework/src/render/vk/buffer_vk.cpp @@ -112,8 +112,9 @@ namespace cauldron CopyBufferRegion(pImmediateCopyCmdList, &desc); - QueueFamilies families = pDevice->GetQueueFamilies(); - bool needsQueueOwnershipTransfer = (families.familyIndices[RequestedQueue::Graphics] != families.familyIndices[RequestedQueue::Copy]); + uint32_t graphicsFamily = pDevice->VKCmdQueueFamily(CommandQueue::Graphics); + uint32_t copyFamily = pDevice->VKCmdQueueFamily(CommandQueue::Copy); + bool needsQueueOwnershipTransfer = (graphicsFamily != copyFamily); VkBufferMemoryBarrier bufferMemoryBarrier = {}; if (needsQueueOwnershipTransfer) { @@ -122,8 +123,8 @@ namespace cauldron bufferMemoryBarrier.pNext = nullptr; bufferMemoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; bufferMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - bufferMemoryBarrier.srcQueueFamilyIndex = families.familyIndices[RequestedQueue::Copy]; - bufferMemoryBarrier.dstQueueFamilyIndex = families.familyIndices[RequestedQueue::Graphics]; + bufferMemoryBarrier.srcQueueFamilyIndex = copyFamily; + bufferMemoryBarrier.dstQueueFamilyIndex = graphicsFamily; bufferMemoryBarrier.buffer = m_pResource->GetImpl()->GetBuffer(); bufferMemoryBarrier.offset = 0; bufferMemoryBarrier.size = static_cast(size); @@ -211,8 +212,9 @@ namespace cauldron CopyBufferRegion(pUploadContext->GetImpl()->GetCopyCmdList(), &desc); - QueueFamilies families = pDevice->GetQueueFamilies(); - bool needsQueueOwnershipTransfer = (families.familyIndices[RequestedQueue::Graphics] != families.familyIndices[RequestedQueue::Copy]); + uint32_t graphicsFamily = pDevice->VKCmdQueueFamily(CommandQueue::Graphics); + uint32_t copyFamily = pDevice->VKCmdQueueFamily(CommandQueue::Copy); + bool needsQueueOwnershipTransfer = (graphicsFamily != copyFamily); VkBufferMemoryBarrier bufferMemoryBarrier = {}; if (needsQueueOwnershipTransfer) { @@ -221,8 +223,8 @@ namespace cauldron bufferMemoryBarrier.pNext = nullptr; bufferMemoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; bufferMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - bufferMemoryBarrier.srcQueueFamilyIndex = families.familyIndices[RequestedQueue::Copy]; - bufferMemoryBarrier.dstQueueFamilyIndex = families.familyIndices[RequestedQueue::Graphics]; + bufferMemoryBarrier.srcQueueFamilyIndex = copyFamily; + bufferMemoryBarrier.dstQueueFamilyIndex = graphicsFamily; bufferMemoryBarrier.buffer = m_pResource->GetImpl()->GetBuffer(); bufferMemoryBarrier.offset = 0; bufferMemoryBarrier.size = static_cast(size); diff --git a/framework/cauldron/framework/src/render/vk/device_vk.cpp b/framework/cauldron/framework/src/render/vk/device_vk.cpp index c5fbd52e..acfff9b2 100644 --- a/framework/cauldron/framework/src/render/vk/device_vk.cpp +++ b/framework/cauldron/framework/src/render/vk/device_vk.cpp @@ -406,6 +406,29 @@ namespace cauldron Appender m_PropertiesAppender; }; + + enum RequestedQueue : uint32_t + { + Graphics = 0, + Compute, + Copy, + + // frame interpolation + FIAsyncCompute, + FIPresent, + FIImageAcquire, + + Count + }; + struct QueueFamilies + { + struct + { + uint32_t family = 0; + uint32_t index = 0; + } queues[RequestedQueue::Count]; + }; + QueueFamilies GetQueues(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface) { // Get queue/memory/device properties @@ -424,12 +447,7 @@ namespace cauldron // init for (uint32_t i = 0; i < RequestedQueue::Count; ++i) - families.familyIndices[i] = UINT32_MAX; - - // How to select a queue: - // - it should have the requested capabilities - // - it should have no more capabilities than the requested one - // - it shouldn't be used by another RequestedQueue + families.queues[i].family = UINT32_MAX; // Find a graphics device and a queue that can present to the above surface // We only support device where the graphics queue can present @@ -438,171 +456,135 @@ namespace cauldron if (queueProps[i].queueCount == 0) continue; VkBool32 supportsPresent; - vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, i, surface, &supportsPresent); + VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, i, surface, &supportsPresent); if (HAS_QUEUE_FAMILY_FLAG((VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) // VK_QUEUE_TRANSFER_BIT is implied - && supportsPresent) + && res == VK_SUCCESS && supportsPresent && queueAvailability[i].queueCount > 0) { - if (families.familyIndices[RequestedQueue::Graphics] == UINT32_MAX) - { - families.familyIndices[RequestedQueue::Graphics] = i; - families.properties[RequestedQueue::Graphics] = queueProps[i]; - break; - } + families.queues[RequestedQueue::Graphics].family = i; + --queueAvailability[i].queueCount; + break; } } - --queueAvailability[families.familyIndices[RequestedQueue::Graphics]].queueCount; + CauldronAssert(ASSERT_CRITICAL, families.queues[RequestedQueue::Graphics].family != UINT32_MAX, L"Unable to get a graphics queue that supports Present."); - CauldronAssert(ASSERT_CRITICAL, families.familyIndices[RequestedQueue::Graphics] != UINT32_MAX, L"Unable to get a graphics queue that supports Present."); - - // Get a compute queue that isn't the graphics one if possible + // Get an async compute queue for (uint32_t i = 0; i < queueFamilyCount; ++i) { - if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && (queueAvailability[i].queueCount > 0)) // VK_QUEUE_TRANSFER_BIT is implied + if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) && (queueAvailability[i].queueCount > 0)) // VK_QUEUE_TRANSFER_BIT is implied { - if (families.familyIndices[RequestedQueue::Compute] == UINT32_MAX) - { - families.familyIndices[RequestedQueue::Compute] = i; - families.properties[RequestedQueue::Compute] = queueProps[i]; - break; - } + families.queues[RequestedQueue::Compute].family = i; + --queueAvailability[i].queueCount; + break; } } - --queueAvailability[families.familyIndices[RequestedQueue::Compute]].queueCount; - - // chose the same queue for FI async compute as we use for app async compute. We don't mind to share that one as we won't use it much. - families.familyIndices[RequestedQueue::FIAsyncCompute] = families.familyIndices[RequestedQueue::Compute]; - families.properties[RequestedQueue::FIAsyncCompute] = families.properties[RequestedQueue::Compute]; + CauldronAssert(ASSERT_CRITICAL, families.queues[RequestedQueue::Compute].family != UINT32_MAX, L"Unable to get an async compute queue."); - // for present and image aquire select a queue family that has not been selected or supports more than 1 queue + // Get a copy queue for (uint32_t i = 0; i < queueFamilyCount; ++i) { - VkBool32 supportsPresent; - if (vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, (uint32_t)i, surface, &supportsPresent) == VK_SUCCESS) + if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_TRANSFER_BIT) && !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && + !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) && + (queueAvailability[i].queueCount > 0)) { - // if only one GFX and one compute queue are available try to avoid them - if (queueAvailability[i].queueCount > 0) - { - families.familyIndices[RequestedQueue::FIPresent] = i; - families.properties[RequestedQueue::FIPresent] = queueProps[i]; - families.familyIndices[RequestedQueue::FIImageAcquire] = i; - families.properties[RequestedQueue::FIImageAcquire] = queueProps[i]; - break; - } + families.queues[RequestedQueue::Copy].family = i; + --queueAvailability[i].queueCount; + break; } } + CauldronAssert(ASSERT_CRITICAL, families.queues[RequestedQueue::Copy].family != UINT32_MAX, L"Unable to get a copy queue."); - // if we don't have a queue for FIPresent so far, try compute queue family and use GFX as last resort - if (families.familyIndices[RequestedQueue::FIPresent] == UINT32_MAX) + // Queues for frame interpolation + + // frame interpolation present queue should have transfer capabilities and support present + for (uint32_t i = 0; i < queueFamilyCount; ++i) { + if (queueProps[i].queueCount == 0) + continue; VkBool32 supportsPresent; - if (vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, (uint32_t)families.familyIndices[RequestedQueue::Compute], surface, &supportsPresent) == VK_SUCCESS) - { - families.familyIndices[RequestedQueue::FIPresent] = families.familyIndices[RequestedQueue::Compute]; - families.properties[RequestedQueue::FIPresent] = queueProps[families.familyIndices[RequestedQueue::Compute]]; - families.familyIndices[RequestedQueue::FIImageAcquire] = families.familyIndices[RequestedQueue::Compute]; - families.properties[RequestedQueue::FIImageAcquire] = queueProps[families.familyIndices[RequestedQueue::Compute]]; - } - else if (vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, (uint32_t)families.familyIndices[RequestedQueue::Graphics], surface, &supportsPresent) == VK_SUCCESS) + VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, i, surface, &supportsPresent); + + if (HAS_QUEUE_FAMILY_FLAG((VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) && // VK_QUEUE_TRANSFER_BIT is implied + res == VK_SUCCESS && supportsPresent && queueAvailability[i].queueCount > 0) { - // since RequestedQueue::Graphics supports present we should always at the very least have one now - families.familyIndices[RequestedQueue::FIPresent] = families.familyIndices[RequestedQueue::Graphics]; - families.properties[RequestedQueue::FIPresent] = queueProps[families.familyIndices[RequestedQueue::Graphics]]; - families.familyIndices[RequestedQueue::FIImageAcquire] = families.familyIndices[RequestedQueue::Graphics]; - families.properties[RequestedQueue::FIImageAcquire] = queueProps[families.familyIndices[RequestedQueue::Graphics]]; + families.queues[RequestedQueue::FIPresent].family = i; + --queueAvailability[i].queueCount; + break; } } - // Get a copy queue that isn't the graphics or compute one if possible + // image acquire queue doesn't need any capability for (uint32_t i = 0; i < queueFamilyCount; ++i) { - if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) || HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT)) - continue; - - if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_TRANSFER_BIT) && (queueAvailability[i].queueCount > 0)) + if (!HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) && + !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_TRANSFER_BIT) && + (queueAvailability[i].queueCount > 0)) { - if (families.familyIndices[RequestedQueue::Copy] == UINT32_MAX) - { - families.familyIndices[RequestedQueue::Copy] = i; - families.properties[RequestedQueue::Copy] = queueProps[i]; - } + families.queues[RequestedQueue::FIImageAcquire].family = i; + --queueAvailability[i].queueCount; + break; } } - // if no dedicated copy queue is available we use any queue that can copy - if (families.familyIndices[RequestedQueue::Copy] == UINT32_MAX) + if (families.queues[RequestedQueue::FIImageAcquire].family == UINT32_MAX) { + // no image acquire queue was found, look for a more general queue for (uint32_t i = 0; i < queueFamilyCount; ++i) { - if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_TRANSFER_BIT)) + if (!HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) && + (queueAvailability[i].queueCount > 0)) { - if (families.familyIndices[RequestedQueue::Copy] == UINT32_MAX) - { - families.familyIndices[RequestedQueue::Copy] = i; - families.properties[RequestedQueue::Copy] = queueProps[i]; - } + families.queues[RequestedQueue::FIImageAcquire].family = i; + --queueAvailability[i].queueCount; + break; } } } -/* - // NOTE: we might want to choose the queue index in a given family to present overlapping queues. - // for example, if we only have one queue family with graphics, compute and copy, we should take the index 0, 1, 2 from this family for each queue. - // (providing this family has at least 3 queues) - - // Choose the queue with the following criterias in this order: - // - the queue hasn't been found yet - // - compatible family should has less important capabilities. Capabilities are in the order of importance: Graphics, Compute, Transfer, Video Decode, Video Encode (as of Vulkan 1.3) - // - compatible family should minimize the number of capabilities. - // - the family has more queues - // - importance of capabilities is in their VkQueueFlagBits order. So the lower the queueFlags value is, the more important the capabilities are. - auto canChoose = [&queueProps, &families](RequestedQueue requestedQueue, uint32_t tentative) { - bool chooseThisQueue = (families.familyIndices[requestedQueue] == UINT32_MAX) // queue not found yet - || (GetLowestBit(queueProps[tentative].queueFlags) > GetLowestBit(families.properties[requestedQueue].queueFlags)) // lower importance capabilities - || (CountBits(queueProps[tentative].queueFlags) < CountBits(families.properties[requestedQueue].queueFlags)) // less capabilities - || (queueProps[tentative].queueFlags == families.properties[requestedQueue].queueFlags && - queueProps[tentative].queueCount > families.properties[requestedQueue].queueCount); // if they have the same capabilities, more queues - - if (chooseThisQueue) + if (families.queues[RequestedQueue::FIImageAcquire].family == UINT32_MAX) + { + // no image acquire queue was found, look for a more general queue + for (uint32_t i = 0; i < queueFamilyCount; ++i) { - families.familyIndices[requestedQueue] = tentative; - families.properties[requestedQueue] = queueProps[tentative]; + if (!HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) && (queueAvailability[i].queueCount > 0)) + { + families.queues[RequestedQueue::FIImageAcquire].family = i; + --queueAvailability[i].queueCount; + break; + } } - }; + } - // TODO: choose better queues + // frame interpolation async compute queue should have compute capabilities for (uint32_t i = 0; i < queueFamilyCount; ++i) { - if (queueProps[i].queueCount == 0) - continue; - - // check compute & transfer - if (HAS_QUEUE_FAMILY_FLAG((VK_QUEUE_COMPUTE_BIT))) // VK_QUEUE_TRANSFER_BIT is implied + if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && !HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) && + (queueAvailability[i].queueCount > 0)) // VK_QUEUE_TRANSFER_BIT is implied { - canChoose(RequestedQueue::FIAsyncCompute, i); + families.queues[RequestedQueue::FIAsyncCompute].family = i; + --queueAvailability[i].queueCount; + break; } - - // Present queue will only do present and some copies. Hence it needs at least one of the following capabilities - // VK_QUEUE_GRAPHICS_BIT, VK_QUEUE_COMPUTE_BIT, VK_QUEUE_TRANSFER_BIT - if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_GRAPHICS_BIT) || HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) || HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_TRANSFER_BIT)) + } + if (families.queues[RequestedQueue::FIAsyncCompute].family == UINT32_MAX) + { + // no async compute was found, look for a more general queue + for (uint32_t i = 0; i < queueFamilyCount; ++i) { - VkBool32 supportsPresent; - if (vkGetPhysicalDeviceSurfaceSupportKHR(physicalDevice, (uint32_t)i, surface, &supportsPresent) == VK_SUCCESS) + if (HAS_QUEUE_FAMILY_FLAG(VK_QUEUE_COMPUTE_BIT) && (queueAvailability[i].queueCount > 0)) // VK_QUEUE_TRANSFER_BIT is implied { - if (supportsPresent == VK_TRUE) - { - canChoose(RequestedQueue::FIPresent, i); - } + families.queues[RequestedQueue::FIAsyncCompute].family = i; + --queueAvailability[i].queueCount; + break; } } - - // image acquire queue doesn't need any capability - { - canChoose(RequestedQueue::FIImageAcquire, i); - } } -*/ - CauldronAssert(ASSERT_CRITICAL, families.familyIndices[RequestedQueue::FIAsyncCompute] != UINT32_MAX, L"Couldn't find an async compute queue for frame interpolation"); - CauldronAssert(ASSERT_CRITICAL, families.familyIndices[RequestedQueue::FIPresent] != UINT32_MAX, L"Couldn't find a present queue for frame interpolation"); - CauldronAssert(ASSERT_CRITICAL, families.familyIndices[RequestedQueue::FIImageAcquire] != UINT32_MAX, L"Couldn't find an image acquire queue"); + + CauldronAssert(ASSERT_WARNING, families.queues[RequestedQueue::FIPresent].family != UINT32_MAX, L"Couldn't find a present queue for frame interpolation. Please update your driver."); + CauldronAssert(ASSERT_WARNING, + families.queues[RequestedQueue::FIAsyncCompute].family != UINT32_MAX, + L"Couldn't find an async compute queue for frame interpolation. Please update your driver."); + CauldronAssert(ASSERT_WARNING, + families.queues[RequestedQueue::FIImageAcquire].family != UINT32_MAX, + L"Couldn't find an image acquire queue for frame interpolation. Please update your driver."); return families; } @@ -1101,7 +1083,7 @@ namespace cauldron } // Get all the queues we need - m_QueueFamilies = GetQueues(m_PhysicalDevice, m_Surface); + QueueFamilies queueFamilies = GetQueues(m_PhysicalDevice, m_Surface); float queuePriorities[RequestedQueue::Count][RequestedQueue::Count]; VkDeviceQueueCreateInfo queueCreateInfos[RequestedQueue::Count]; @@ -1119,46 +1101,30 @@ namespace cauldron } // helper - auto addQueueToCreateInfo = [this, &queueCreateInfos, &queueCreateInfoCount, &queuePriorities](RequestedQueue requestedQueue, const float priority) { - for (uint32_t i = 0; i < RequestedQueue::Count; ++i) + auto addQueueToCreateInfo = [this, &queueCreateInfos, &queueCreateInfoCount, &queuePriorities, &queueFamilies](RequestedQueue requestedQueue, + const float priority) { + if (queueFamilies.queues[requestedQueue].family == UINT32_MAX) + return; + + // find the index of the VkDeviceQueueCreateInfo + uint32_t infoIndex = 0; + for (; infoIndex < queueCreateInfoCount; ++infoIndex) { - // find the create info of the queue family - if (queueCreateInfos[i].queueFamilyIndex == m_QueueFamilies.familyIndices[requestedQueue] && queueCreateInfos[i].queueCount > 0) - { - if (queueCreateInfos[i].queueCount < m_QueueFamilies.properties[requestedQueue].queueCount) - { - uint32_t queueIndex = queueCreateInfos[i].queueCount; - queuePriorities[i][queueIndex] = priority; - ++queueCreateInfos[i].queueCount; - m_QueueFamilies.queueIndices[requestedQueue] = queueIndex; - return; - } - else - { - // check if there's a queue with the same priority - for (uint32_t j = 0; j < queueCreateInfos[i].queueCount; ++j) - { - if (queuePriorities[i][j] == priority) - { - m_QueueFamilies.queueIndices[requestedQueue] = j; - return; - } - } - - // priority cannot be set - CauldronWarning(L"Cannot set the priority for the given queue as it is used elsewhere with another priority."); - m_QueueFamilies.queueIndices[requestedQueue] = queueCreateInfos[i].queueCount - 1; - return; - } - } + if (queueCreateInfos[infoIndex].queueFamilyIndex == queueFamilies.queues[requestedQueue].family && + queueCreateInfos[infoIndex].queueCount > 0) + break; + } + + if (infoIndex == queueCreateInfoCount) + { + // first queue, initialize + ++queueCreateInfoCount; + queueCreateInfos[infoIndex].queueFamilyIndex = queueFamilies.queues[requestedQueue].family; } - // This queue family wasn't in the create info yet. Create a new entry - queueCreateInfos[queueCreateInfoCount].queueCount = 1; - queueCreateInfos[queueCreateInfoCount].queueFamilyIndex = m_QueueFamilies.familyIndices[requestedQueue]; - queuePriorities[queueCreateInfoCount][0] = priority; - ++queueCreateInfoCount; - m_QueueFamilies.queueIndices[requestedQueue] = 0; + queuePriorities[infoIndex][queueCreateInfos[infoIndex].queueCount] = priority; + queueFamilies.queues[requestedQueue].index = queueCreateInfos[infoIndex].queueCount; + ++queueCreateInfos[infoIndex].queueCount; }; addQueueToCreateInfo(RequestedQueue::Graphics, 1.0f); @@ -1166,9 +1132,21 @@ namespace cauldron addQueueToCreateInfo(RequestedQueue::Copy, 0.5f); // For frame interpolation - addQueueToCreateInfo(RequestedQueue::FIAsyncCompute, 1.0f); - addQueueToCreateInfo(RequestedQueue::FIPresent, 1.0f); - addQueueToCreateInfo(RequestedQueue::FIImageAcquire, 0.9f); + bool canRunFrameInterpolation = + queueFamilies.queues[RequestedQueue::FIPresent].family != UINT32_MAX && queueFamilies.queues[RequestedQueue::FIImageAcquire].family != UINT32_MAX; + if (canRunFrameInterpolation) + { + // no need to query the queues if frame interpolation cannot run + addQueueToCreateInfo(RequestedQueue::FIPresent, 1.0f); + addQueueToCreateInfo(RequestedQueue::FIImageAcquire, 0.9f); + addQueueToCreateInfo(RequestedQueue::FIAsyncCompute, 1.0f); + } + else + { + queueFamilies.queues[RequestedQueue::FIPresent].family = UINT32_MAX; + queueFamilies.queues[RequestedQueue::FIImageAcquire].family = UINT32_MAX; + queueFamilies.queues[RequestedQueue::FIAsyncCompute].family = UINT32_MAX; + } // Create device m_Device = deviceCreator.Create(queueCreateInfos, queueCreateInfoCount); @@ -1232,41 +1210,28 @@ namespace cauldron SetResourceName(VK_OBJECT_TYPE_DEVICE, (uint64_t)m_Device, "CauldronDevice"); // create the queues - auto queueBuilder = [this](CommandQueue queueType, RequestedQueue requestedQueue, uint32_t numFramesInFlight, const char* name) + auto queueBuilder = [this, &queueFamilies](CommandQueue queueType, RequestedQueue requestedQueue, uint32_t numFramesInFlight, const char* name) { m_QueueSyncPrims[static_cast(queueType)].Init( - this, queueType, m_QueueFamilies.familyIndices[requestedQueue], m_QueueFamilies.queueIndices[requestedQueue], numFramesInFlight, name); + this, queueType, queueFamilies.queues[requestedQueue].family, queueFamilies.queues[requestedQueue].index, numFramesInFlight, name); }; queueBuilder(CommandQueue::Graphics, RequestedQueue::Graphics, pConfig->BackBufferCount, "CauldronGraphicsQueue"); queueBuilder(CommandQueue::Compute, RequestedQueue::Compute, pConfig->BackBufferCount, "CauldronComputeQueue" ); queueBuilder(CommandQueue::Copy, RequestedQueue::Copy, pConfig->BackBufferCount, "CauldronCopyQueue" ); // frame interpolation - auto getFIQueue = [this](FIQueue& fiQueue, RequestedQueue requestedQueue, const char* name) { - vkGetDeviceQueue(m_Device, m_QueueFamilies.familyIndices[requestedQueue], m_QueueFamilies.queueIndices[requestedQueue], &fiQueue.queue); - CauldronAssert(ASSERT_CRITICAL, fiQueue.queue != VK_NULL_HANDLE, L"Couldn't get the frame interpolation queue"); - SetResourceName(VK_OBJECT_TYPE_QUEUE, (uint64_t)fiQueue.queue, name); - fiQueue.family = m_QueueFamilies.familyIndices[requestedQueue]; - fiQueue.index = m_QueueFamilies.queueIndices[requestedQueue]; - - fiQueue.shared = false; - fiQueue.sharedWith = requestedQueue; - for (uint32_t i = 0; i < RequestedQueue::Count; ++i) + auto getFIQueue = [this, &queueFamilies](FIQueue& fiQueue, RequestedQueue requestedQueue, const char* name) { + if (queueFamilies.queues[requestedQueue].family != UINT32_MAX) { - if (i == requestedQueue) - continue; - if (m_QueueFamilies.familyIndices[requestedQueue] == m_QueueFamilies.familyIndices[i] && - m_QueueFamilies.queueIndices[requestedQueue] == m_QueueFamilies.queueIndices[i]) - { - fiQueue.shared = true; - if (i < requestedQueue) // only keeps the lowest requested queue - fiQueue.sharedWith = static_cast(i); - break; - } + vkGetDeviceQueue(m_Device, queueFamilies.queues[requestedQueue].family, queueFamilies.queues[requestedQueue].index, &fiQueue.queue); + CauldronAssert(ASSERT_CRITICAL, fiQueue.queue != VK_NULL_HANDLE, L"Couldn't get the frame interpolation queue"); + SetResourceName(VK_OBJECT_TYPE_QUEUE, (uint64_t)fiQueue.queue, name); + fiQueue.family = queueFamilies.queues[requestedQueue].family; + fiQueue.index = queueFamilies.queues[requestedQueue].index; } }; + getFIQueue(m_FIPresentQueue, RequestedQueue::FIPresent, "FrameInterpolationPresentQueue"); getFIQueue(m_FIAsyncComputeQueue, RequestedQueue::FIAsyncCompute, "FrameInterpolationAsyncComputeQueue"); - getFIQueue(m_FIPresentQueue, RequestedQueue::FIPresent, "FrameInterpolationPresentQueue"); getFIQueue(m_FIImageAcquireQueue, RequestedQueue::FIImageAcquire, "FrameInterpolationImageAcquireQueue"); m_DeviceName = StringToWString(physicalDeviceProperties.deviceName); @@ -1898,17 +1863,6 @@ namespace cauldron return m_LatestSemaphoreValue; } - VkResult DeviceInternal::QueueSyncPrimitive::SubmitPassthrough(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, DeviceRemovedCallback deviceRemovedCallback, void* deviceRemovedCustomData) - { - std::lock_guard lock(m_SubmitMutex); - VkResult res = vkQueueSubmit(m_Queue, submitCount, pSubmits, fence); - if (res == VK_ERROR_DEVICE_LOST && deviceRemovedCallback) - { - deviceRemovedCallback(deviceRemovedCustomData); - } - return res; - } - uint64_t DeviceInternal::QueueSyncPrimitive::Present(const DeviceInternal* pDevice, VkSwapchainKHR swapchain, uint32_t imageIndex, DeviceRemovedCallback deviceRemovedCallback, void* deviceRemovedCustomData) // only valid on the present queue { VkPresentInfoKHR presentInfo = {}; @@ -2092,56 +2046,6 @@ namespace cauldron return m_getLastPresentCountFFX(swapchain); return 0; } - - VkResult DeviceInternal::SubmitPassthrough(RequestedQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) - { - RequestedQueue realRequestedQueue = queue; - switch (queue) - { - case RequestedQueue::Graphics: - case RequestedQueue::Compute: - case RequestedQueue::Copy: - break; - - case RequestedQueue::FIPresent: - realRequestedQueue = m_FIPresentQueue.sharedWith; - break; - case RequestedQueue::FIAsyncCompute: - realRequestedQueue = m_FIAsyncComputeQueue.sharedWith; - break; - case RequestedQueue::FIImageAcquire: - realRequestedQueue = m_FIImageAcquireQueue.sharedWith; - break; - default: - return VK_INCOMPLETE; - } - - switch (realRequestedQueue) - { - case RequestedQueue::Graphics: - return m_QueueSyncPrims[static_cast(CommandQueue::Graphics)].SubmitPassthrough(submitCount, pSubmits, fence, m_DeviceRemovedCallback, m_DeviceRemovedCustomData); - case RequestedQueue::Compute: - return m_QueueSyncPrims[static_cast(CommandQueue::Compute)].SubmitPassthrough(submitCount, pSubmits, fence, m_DeviceRemovedCallback, m_DeviceRemovedCustomData); - case RequestedQueue::Copy: - return m_QueueSyncPrims[static_cast(CommandQueue::Copy)].SubmitPassthrough(submitCount, pSubmits, fence, m_DeviceRemovedCallback, m_DeviceRemovedCustomData); - - case RequestedQueue::FIPresent: - return m_FIPresentQueue.SubmitPassthrough(submitCount, pSubmits, fence); - case RequestedQueue::FIAsyncCompute: - return m_FIAsyncComputeQueue.SubmitPassthrough(submitCount, pSubmits, fence); - case RequestedQueue::FIImageAcquire: - return m_FIImageAcquireQueue.SubmitPassthrough(submitCount, pSubmits, fence); - default: - return VK_INCOMPLETE; - } - } - - VkResult FIQueue::SubmitPassthrough(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) - { - std::lock_guard lock(submitMutex); - return vkQueueSubmit(queue, submitCount, pSubmits, fence); - } - } // namespace cauldron #endif // #if defined(_VK) diff --git a/framework/cauldron/framework/src/render/vk/device_vk.h b/framework/cauldron/framework/src/render/vk/device_vk.h index e0354bf1..9dcb0a5c 100644 --- a/framework/cauldron/framework/src/render/vk/device_vk.h +++ b/framework/cauldron/framework/src/render/vk/device_vk.h @@ -38,36 +38,11 @@ namespace cauldron { - enum RequestedQueue : uint32_t - { - Graphics = 0, - Compute, - Copy, - - // frame interpolation - FIAsyncCompute, - FIPresent, - FIImageAcquire, - - Count - }; - struct QueueFamilies - { - uint32_t familyIndices[RequestedQueue::Count]; - VkQueueFamilyProperties properties[RequestedQueue::Count]; - uint32_t queueIndices[RequestedQueue::Count]; - }; - struct FIQueue { VkQueue queue = VK_NULL_HANDLE; uint32_t family = 0; uint32_t index = 0; - bool shared = false; - RequestedQueue sharedWith = RequestedQueue::Count; - std::mutex submitMutex; - - VkResult SubmitPassthrough(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); }; struct SwapChainCreationParams @@ -122,14 +97,16 @@ namespace cauldron const VkQueue VKCmdQueue(CommandQueue queueType) const { return m_QueueSyncPrims[static_cast(queueType)].GetQueue(); } VkQueue VKCmdQueue(CommandQueue queueType) { return m_QueueSyncPrims[static_cast(queueType)].GetQueue(); } + const uint32_t VKCmdQueueFamily(CommandQueue queueType) const + { + return m_QueueSyncPrims[static_cast(queueType)].GetQueueFamily(); + } VkSurfaceKHR GetSurface() { return m_Surface; } void SetResourceName(VkObjectType objectType, uint64_t handle, const char* name); void SetResourceName(VkObjectType objectType, uint64_t handle, const wchar_t* name); - QueueFamilies GetQueueFamilies() const { return m_QueueFamilies; } - const uint32_t GetMinAccelerationStructureScratchOffsetAlignment() { return m_MinAccelerationStructureScratchOffsetAlignment; } const uint32_t GetBreadcrumbsMemoryIndex() { return m_BreadcrumbsMemoryIndex; } const bool BreadcrumbsDedicatedAllocRequired() { return m_UseBreadcrumbsDedicatedAlloc; } @@ -200,8 +177,6 @@ namespace cauldron return &m_FIImageAcquireQueue; } - VkResult SubmitPassthrough(RequestedQueue queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence); - private: friend class Device; DeviceInternal(); @@ -218,10 +193,10 @@ namespace cauldron void ReleaseCommandPool(VkCommandPool commandPool); const VkQueue GetQueue() const { return m_Queue; } + const uint32_t GetQueueFamily() const { return m_FamilyIndex; } // thread safe uint64_t Submit(const std::vector& cmdLists, const VkSemaphore signalSemaphore, const VkSemaphore waitSemaphore, bool waitForSwapchainImage, bool useEndOfFrameSemaphore, DeviceRemovedCallback deviceRemovedCallback, void* deviceRemovedCustomData); - VkResult SubmitPassthrough(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, DeviceRemovedCallback deviceRemovedCallback, void* deviceRemovedCustomData); uint64_t Present(const DeviceInternal* pDevice, VkSwapchainKHR swapchain, uint32_t imageIndex, DeviceRemovedCallback deviceRemovedCallback, void* deviceRemovedCustomData); // only valid on the present queue void Wait(VkDevice device, uint64_t waitValue) const; @@ -257,8 +232,6 @@ namespace cauldron VkPhysicalDevice m_PhysicalDevice = VK_NULL_HANDLE; VkSurfaceKHR m_Surface = VK_NULL_HANDLE; - QueueFamilies m_QueueFamilies; - VmaAllocator m_VmaAllocator = nullptr; // minAccelerationStructureScratchOffsetAlignment diff --git a/framework/cauldron/framework/src/render/vk/gpuresource_vk.cpp b/framework/cauldron/framework/src/render/vk/gpuresource_vk.cpp index 77f7941c..08417a98 100644 --- a/framework/cauldron/framework/src/render/vk/gpuresource_vk.cpp +++ b/framework/cauldron/framework/src/render/vk/gpuresource_vk.cpp @@ -433,24 +433,6 @@ VkFormat GetVkFormatFromSurfaceFormat(FfxSurfaceFormat fmt) DeviceInternal* pDevice = GetDevice()->GetImpl(); - // adjust the image creation structure if mutable views are allowed - std::array formats; - VkImageFormatListCreateInfo imageFormatInfo = {}; - if ((m_ImageCreateInfo.flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) != 0) - { - formats[0] = m_ImageCreateInfo.format; - formats[1] = VKToGamma(m_ImageCreateInfo.format); - imageFormatInfo.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO; - imageFormatInfo.pNext = nullptr; - imageFormatInfo.viewFormatCount = static_cast(formats.size()); - imageFormatInfo.pViewFormats = formats.data(); - m_ImageCreateInfo.pNext = &imageFormatInfo; - - // Add some asserts to be sure that we are handling the case of a image with sRGB view on it - CauldronAssert(ASSERT_CRITICAL, formats[0] != formats[1], L"Image is already a sRGB one, this shouldn't happen if the VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT is set."); - CauldronAssert(ASSERT_CRITICAL, (m_ImageCreateInfo.usage & VK_IMAGE_USAGE_STORAGE_BIT) != 0, L"VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT should ony be set to handle storage views on a texture."); - } - m_MemoryUsage = VMA_MEMORY_USAGE_GPU_ONLY; VmaAllocationCreateInfo allocInfo = {}; diff --git a/framework/cauldron/framework/src/render/vk/swapchain_vk.cpp b/framework/cauldron/framework/src/render/vk/swapchain_vk.cpp index 9a90b1ea..c3ffb6ce 100644 --- a/framework/cauldron/framework/src/render/vk/swapchain_vk.cpp +++ b/framework/cauldron/framework/src/render/vk/swapchain_vk.cpp @@ -29,6 +29,7 @@ #include "render/vk/commandlist_vk.h" #include "render/vk/device_vk.h" #include "render/vk/gpuresource_vk.h" +#include "render/vk/helpers.h" #include "render/vk/resourceviewallocator_vk.h" #include "render/vk/swapchain_vk.h" #include "render/vk/texture_vk.h" @@ -107,58 +108,58 @@ namespace cauldron } } - std::unordered_map GetAvailableFormats(const std::vector& surfaceFormats2) + std::unordered_map GetAvailableFormats(const std::vector& surfaceFormats2, VkFormat preferredFormat) { std::unordered_map modes; - for (const auto& surfaceFormat2 : surfaceFormats2) - { - VkSurfaceFormatKHR surfaceFormat = surfaceFormat2.surfaceFormat; - if (surfaceFormat.format == VK_FORMAT_R8G8B8A8_UNORM && surfaceFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) + + // small utility + auto addSurfaceFormatToMode = [preferredFormat, &modes](const VkSurfaceFormatKHR surfaceFormat, DisplayMode mode, VkFormat expectedFormat) { + if (surfaceFormat.format == preferredFormat) { - modes[DisplayMode::DISPLAYMODE_LDR] = surfaceFormat; + modes[mode] = surfaceFormat; } - else if (surfaceFormat.format == VK_FORMAT_B8G8R8A8_UNORM && surfaceFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) + else if (surfaceFormat.format == expectedFormat) { - modes[DisplayMode::DISPLAYMODE_LDR] = surfaceFormat; + auto found = modes.find(mode); + if (found == modes.end() || found->second.format != preferredFormat) + { + // add only if the preferred format hasn't been added yet + modes[mode] = surfaceFormat; + } } - else if (surfaceFormat.format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 - && surfaceFormat.colorSpace == VK_COLOR_SPACE_DISPLAY_NATIVE_AMD) + }; + + for (const auto& surfaceFormat2 : surfaceFormats2) + { + VkSurfaceFormatKHR surfaceFormat = surfaceFormat2.surfaceFormat; + bool isPreferredFormat = (surfaceFormat.format == preferredFormat); + + if (surfaceFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) { - modes[DisplayMode::DISPLAYMODE_FSHDR_2084] = surfaceFormat; + addSurfaceFormatToMode(surfaceFormat, DisplayMode::DISPLAYMODE_LDR, VK_FORMAT_R8G8B8A8_UNORM); + addSurfaceFormatToMode(surfaceFormat, DisplayMode::DISPLAYMODE_LDR, VK_FORMAT_B8G8R8A8_UNORM); } - else if (surfaceFormat.format == VK_FORMAT_R16G16B16A16_SFLOAT && surfaceFormat.colorSpace == VK_COLOR_SPACE_DISPLAY_NATIVE_AMD) + else if (surfaceFormat.colorSpace == VK_COLOR_SPACE_DISPLAY_NATIVE_AMD) { - modes[DisplayMode::DISPLAYMODE_FSHDR_SCRGB] = surfaceFormat; + // no override possible here because colorspace and format are linked + if (surfaceFormat.format == VK_FORMAT_A2B10G10R10_UNORM_PACK32) + modes[DisplayMode::DISPLAYMODE_FSHDR_2084] = surfaceFormat; + else if (surfaceFormat.format == VK_FORMAT_R16G16B16A16_SFLOAT) + modes[DisplayMode::DISPLAYMODE_FSHDR_SCRGB] = surfaceFormat; } - else if (surfaceFormat.format == VK_FORMAT_A2B10G10R10_UNORM_PACK32 - && surfaceFormat.colorSpace == VK_COLOR_SPACE_HDR10_ST2084_EXT) + else if (surfaceFormat.colorSpace == VK_COLOR_SPACE_HDR10_ST2084_EXT) { - // NOTE: we don't have any preference for the format between the two - modes[DisplayMode::DISPLAYMODE_HDR10_2084] = surfaceFormat; + addSurfaceFormatToMode(surfaceFormat, DisplayMode::DISPLAYMODE_HDR10_2084, VK_FORMAT_A2B10G10R10_UNORM_PACK32); } - else if (surfaceFormat.format == VK_FORMAT_R16G16B16A16_SFLOAT && surfaceFormat.colorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT) + else if (surfaceFormat.colorSpace == VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT) { - modes[DisplayMode::DISPLAYMODE_HDR10_SCRGB] = surfaceFormat; + addSurfaceFormatToMode(surfaceFormat, DisplayMode::DISPLAYMODE_HDR10_SCRGB, VK_FORMAT_R16G16B16A16_SFLOAT); } } return modes; } - VkSurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector& availableFormats) - { - for (const auto& availableFormat : availableFormats) - { - if (availableFormat.format == VK_FORMAT_R8G8B8A8_UNORM && availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) - return availableFormat; - } - - VkSurfaceFormatKHR errorFormat; - errorFormat.format = VK_FORMAT_UNDEFINED; - errorFormat.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; - return errorFormat; - } - VkPresentModeKHR ChooseSwapPresentMode(const std::vector& availablePresentModes, bool vsync) { auto search = [&availablePresentModes](VkPresentModeKHR Mode) { @@ -680,7 +681,14 @@ namespace cauldron const CauldronConfig* pConfig = GetConfig(); - std::unordered_map modes = GetAvailableFormats(formats2); + // get the requested override swapchain format + VkFormat overrideFormat = VK_FORMAT_UNDEFINED; + if (pConfig->SwapChainFormat != ResourceFormat::Unknown) + { + overrideFormat = GetVkFormat(pConfig->SwapChainFormat); + } + + std::unordered_map modes = GetAvailableFormats(formats2, overrideFormat); for (const auto& it : modes) { @@ -696,6 +704,10 @@ namespace cauldron m_CurrentDisplayMode = found->first; m_SurfaceFormat = found->second; + CauldronAssert(ASSERT_WARNING, + overrideFormat == VK_FORMAT_UNDEFINED || m_SurfaceFormat.format == overrideFormat, + L"The requested swapchain format from the config file cannot be used for present/display. Override is ignored."); + // Set format based on display mode m_SwapChainFormat = ConvertFormat(m_SurfaceFormat.format); } diff --git a/framework/cauldron/framework/src/render/vk/texture_vk.cpp b/framework/cauldron/framework/src/render/vk/texture_vk.cpp index 8f73defa..1db4151c 100644 --- a/framework/cauldron/framework/src/render/vk/texture_vk.cpp +++ b/framework/cauldron/framework/src/render/vk/texture_vk.cpp @@ -128,8 +128,9 @@ namespace cauldron } } - QueueFamilies families = pDevice->GetQueueFamilies(); - bool needsQueueOwnershipTransfer = (families.familyIndices[RequestedQueue::Graphics] != families.familyIndices[RequestedQueue::Copy]); + uint32_t graphicsFamily = pDevice->VKCmdQueueFamily(CommandQueue::Graphics); + uint32_t copyFamily = pDevice->VKCmdQueueFamily(CommandQueue::Copy); + bool needsQueueOwnershipTransfer = (graphicsFamily != copyFamily); VkImageMemoryBarrier imageMemoryBarrier = {}; if (needsQueueOwnershipTransfer) { @@ -140,8 +141,8 @@ namespace cauldron imageMemoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - imageMemoryBarrier.srcQueueFamilyIndex = families.familyIndices[RequestedQueue::Copy]; - imageMemoryBarrier.dstQueueFamilyIndex = families.familyIndices[RequestedQueue::Graphics]; + imageMemoryBarrier.srcQueueFamilyIndex = copyFamily; + imageMemoryBarrier.dstQueueFamilyIndex = graphicsFamily; imageMemoryBarrier.image = m_pResource->GetImpl()->GetImage(); imageMemoryBarrier.subresourceRange.aspectMask = GetImageAspectMask(m_pResource->GetImpl()->GetImageCreateInfo().format); imageMemoryBarrier.subresourceRange.baseMipLevel = 0; diff --git a/readme.md b/readme.md index 7093b7ad..4ab73abd 100644 --- a/readme.md +++ b/readme.md @@ -1,4 +1,4 @@ -

Welcome to the AMD FidelityFX™ SDK 1.1.2

+

Welcome to the AMD FidelityFX™ SDK 1.1.3

![alt text](/docs/media/fidelityfxsdk-logo-rescaled.png) @@ -18,10 +18,10 @@ The FidelityFX SDK includes: | [Stochastic Screen-Space Reflections](/docs/techniques/stochastic-screen-space-reflections.md) 1.5 | [SSSR sample](/docs/samples/stochastic-screen-space-reflections.md) | [FidelityFX Screen Space Reflections](https://gpuopen.com/fidelityfx-sssr/) | Provides high-fidelity screen-spaced reflections in your scene, without a hefty performance price tag. | | [Super Resolution (Spatial)](/docs/techniques/super-resolution-spatial.md) 1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution](https://gpuopen.com/fidelityfx-superresolution/) | Offers a spatial single-frame solution for producing higher resolution frames from lower resolution inputs. | | [Super Resolution (Temporal)](/docs/techniques/super-resolution-temporal.md) 2.3.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 2](https://gpuopen.com/fidelityfx-superresolution-2/) | Offers both spatial single-frame and temporal multi-frame solutions for producing high resolution frames from lower resolution inputs. | -| [Super Resolution 3](/docs/techniques/super-resolution-interpolation.md) 3.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames in combination with our temporal multi-frame solution for producing high resolution frames from lower resolution inputs. | -| [Super Resolution (Upscaler)](/docs/techniques/super-resolution-upscaler.md) 3.1.2 | [Super Resolution sample](/docs.samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers temporal multi-frame solutions for producing high resolution frames from lower resolution inputs. | -| [Frame Interpolation](techniques/frame-interpolation.md) 1.1.1 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames from multiple real input frames, and multiple sources of motion vector data. | -| [Frame Interpolation SwapChain](/docs/techniques/frame-interpolation-swap-chain.md) 1.1.1 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | A replacement DXGI Swapchain implementation for DX12 which allows for additional frames to be presented along with real game frames, with relevant frame pacing. | +| [Super Resolution 3](/docs/techniques/super-resolution-interpolation.md) 3.1.3 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames in combination with our temporal multi-frame solution for producing high resolution frames from lower resolution inputs. | +| [Super Resolution (Upscaler)](/docs/techniques/super-resolution-upscaler.md) 3.1.3 | [Super Resolution sample](/docs.samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers temporal multi-frame solutions for producing high resolution frames from lower resolution inputs. | +| [Frame Interpolation](techniques/frame-interpolation.md) 1.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers generation of interpolated frames from multiple real input frames, and multiple sources of motion vector data. | +| [Frame Interpolation SwapChain](/docs/techniques/frame-interpolation-swap-chain.md) 1.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | A replacement DXGI Swapchain implementation for DX12 which allows for additional frames to be presented along with real game frames, with relevant frame pacing. | | [Optical Flow](/docs/techniques/optical-flow.md) 1.1.2 | [Super Resolution sample](/docs/samples/super-resolution.md) | [FidelityFX Super Resolution 3](https://gpuopen.com/fidelityfx-superresolution-3/) | Offers a motion-estimation algorithm which is useful for generating block-based motion vectors from temporal image inputs. | | [Variable Shading](/docs/techniques/variable-shading.md) 1.2 | [Variable Shading sample](/docs/samples/variable-shading.md) | [FidelityFX Variable Shading](https://gpuopen.com/fidelityfx-variable-shading/) | Helps you to drive Variable Rate Shading hardware introduced in RDNA2-based and contemporary GPUs, by analyzing the luminance of pixels in a tile to determine where the shading rate can be lowered to increase performance. | | [Blur](/docs/samples/blur.md) 1.1 | [Blur sample](/docs/samples/blur.md) | [FidelityFX Blur](https://gpuopen.com/fidelityfx-blur/) | A library of highly optimized functions which perform common blurring operations such as Gaussian blur, radial blurs, and others. | @@ -36,7 +36,7 @@ The FidelityFX SDK includes:

Further information

- [What's new in AMD FidelityFX SDK](/docs/whats-new/index.md) - - [FidelityFX SDK 1.1.2](/docs/whats-new/index.md) + - [FidelityFX SDK 1.1.2](/docs/whats-new/version_1_1_2.md) - [FidelityFX SDK 1.1.1](/docs/whats-new/version_1_1_1.md) - [FidelityFX SDK 1.1](/docs/whats-new/version_1_1.md) - [FidelityFX SDK 1.0](/docs/whats-new/version_1_0.md) @@ -50,7 +50,7 @@ The FidelityFX SDK includes: - [Tools](/docs/tools/index.md) - [Shader Precompiler](/docs/tools/ffx-sc.md) - - [FidelityFX SDK Media Delivery System](/docs/media-delivery.md) + - [FidelityFX SDK Media Delivery System](/docs/tools/media-delivery.md)

Known issues

@@ -65,12 +65,13 @@ The FidelityFX SDK includes: | FidelityFX DOF | All APIs / All Configs | Some artifacts may occur on some Intel Arc GPUs. | | All FidelityFX SDK Samples | All APIs / All Configs | There is a resource leak in the UploadContext used to load glTF content. | | All FidelityFX SDK Samples | All APIs / All Configs | Windows path length restrictions may cause compile issues. It is recommended to place the SDK close to the root of a drive or use subst or a mklink to shorten the path. | +| All FidelityFX SDK Samples | All APIs / All Configs | There is a build error when using CMake 3.31 or newer |

Open source

AMD FidelityFX SDK is open source, and available under the MIT license. -For more information on the license terms please refer to [license](/sdk/license.txt). +For more information on the license terms please refer to [license](/sdk/LICENSE.txt).

Disclaimer

@@ -98,4 +99,4 @@ Microsoft is a registered trademark of Microsoft Corporation in the US and other Windows is a registered trademark of Microsoft Corporation in the US and other jurisdictions. -© 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +© 2022-2024 Advanced Micro Devices, Inc. All rights reserved. diff --git a/samples/fsrapi/CMakeLists.txt b/samples/fsrapi/CMakeLists.txt index 110b863e..bd00492c 100644 --- a/samples/fsrapi/CMakeLists.txt +++ b/samples/fsrapi/CMakeLists.txt @@ -77,7 +77,7 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD ${api_dlls} ${BIN_OUTPUT}) # Add compile definitions to identify the sample -target_compile_definitions(${PROJECT_NAME} PRIVATE SampleName=L"FidelityFX FSR FFXAPI v1.2" +target_compile_definitions(${PROJECT_NAME} PRIVATE SampleName=L"FidelityFX FSR FFXAPI v1.3" ConfigFileName=L"configs/fsrapiconfig.json" ConfigSampleName="FidelityFX FSR FFXAPI" RenderModuleName=FSRApi) diff --git a/samples/fsrapi/fsrapirendermodule.cpp b/samples/fsrapi/fsrapirendermodule.cpp index 6a26fb49..393a558e 100644 --- a/samples/fsrapi/fsrapirendermodule.cpp +++ b/samples/fsrapi/fsrapirendermodule.cpp @@ -68,8 +68,8 @@ void FSRRenderModule::Init(const json& initData) m_pTonemappedColorTarget = GetFramework()->GetRenderTexture(L"SwapChainProxy"); m_pDepthTarget = GetFramework()->GetRenderTexture(L"DepthTarget"); m_pMotionVectors = GetFramework()->GetRenderTexture(L"GBufferMotionVectorRT"); - m_pDistortionField[0] = GetFramework()->GetRenderTexture(L"DistortionField0"); - m_pDistortionField[1] = GetFramework()->GetRenderTexture(L"DistortionField1"); + m_pDistortionField[0] = GetFramework()->GetRenderTexture(L"DistortionField0"); + m_pDistortionField[1] = GetFramework()->GetRenderTexture(L"DistortionField1"); m_pReactiveMask = GetFramework()->GetRenderTexture(L"ReactiveMask"); m_pCompositionMask = GetFramework()->GetRenderTexture(L"TransCompMask"); CauldronAssert(ASSERT_CRITICAL, m_pMotionVectors && m_pDistortionField[0] && m_pDistortionField[1] && m_pReactiveMask && m_pCompositionMask, L"Could not get one of the needed resources for FSR Rendermodule."); @@ -153,108 +153,148 @@ void FSRRenderModule::Init(const json& initData) // Separate from FSR generation so it can be done when the engine creates the swapchain // should not be created and destroyed with FSR, as it requires a switch to windowed mode -#if defined(FFX_API_DX12) - IDXGISwapChain4* dxgiSwapchain = GetSwapChain()->GetImpl()->DX12SwapChain(); - dxgiSwapchain->AddRef(); - cauldron::GetSwapChain()->GetImpl()->SetDXGISwapChain(nullptr); - - ffx::CreateContextDescFrameGenerationSwapChainForHwndDX12 createSwapChainDesc{}; - dxgiSwapchain->GetHwnd(&createSwapChainDesc.hwnd); - DXGI_SWAP_CHAIN_DESC1 desc1; - dxgiSwapchain->GetDesc1(&desc1); - createSwapChainDesc.desc = &desc1; - DXGI_SWAP_CHAIN_FULLSCREEN_DESC fullscreenDesc; - dxgiSwapchain->GetFullscreenDesc(&fullscreenDesc); - createSwapChainDesc.fullscreenDesc = &fullscreenDesc; - dxgiSwapchain->GetParent(IID_PPV_ARGS(&createSwapChainDesc.dxgiFactory)); - createSwapChainDesc.gameQueue = GetDevice()->GetImpl()->DX12CmdQueue(cauldron::CommandQueue::Graphics); - - dxgiSwapchain->Release(); - dxgiSwapchain = nullptr; - createSwapChainDesc.swapchain = &dxgiSwapchain; - - ffx::ReturnCode retCode = ffx::CreateContext(m_SwapChainContext, nullptr, createSwapChainDesc); - CauldronAssert(ASSERT_CRITICAL, retCode == ffx::ReturnCode::Ok, L"Couldn't create the ffxapi fg swapchain (dx12): %d", (uint32_t)retCode); - createSwapChainDesc.dxgiFactory->Release(); - cauldron::GetSwapChain()->GetImpl()->SetDXGISwapChain(dxgiSwapchain); +#if defined(FFX_API_DX12) + + m_FrameInterpolationAvailable = true; + m_AsyncComputeAvailable = true; + +#elif defined(FFX_API_VK) - // In case the app is handling Alt-Enter manually we need to update the window association after creating a different swapchain - IDXGIFactory7* factory = nullptr; - if (SUCCEEDED(dxgiSwapchain->GetParent(IID_PPV_ARGS(&factory)))) + const cauldron::FIQueue* pAsyncComputeQueue = cauldron::GetDevice()->GetImpl()->GetFIAsyncComputeQueue(); + const cauldron::FIQueue* pPresentQueue = cauldron::GetDevice()->GetImpl()->GetFIPresentQueue(); + const cauldron::FIQueue* pImageAcquireQueue = cauldron::GetDevice()->GetImpl()->GetFIImageAcquireQueue(); + + m_FrameInterpolationAvailable = pPresentQueue->queue != VK_NULL_HANDLE && pImageAcquireQueue->queue != VK_NULL_HANDLE; + m_AsyncComputeAvailable = m_FrameInterpolationAvailable && pAsyncComputeQueue->queue != VK_NULL_HANDLE; + +#endif // defined(FFX_API_DX12) + + if (!m_FrameInterpolationAvailable) { - factory->MakeWindowAssociation(cauldron::GetFramework()->GetImpl()->GetHWND(), DXGI_MWA_NO_WINDOW_CHANGES); - factory->Release(); + m_FrameInterpolation = false; + s_uiRenderMode = 0; // no UI handling + CauldronWarning(L"Frame interpolation isn't available on this device."); } + if (!m_AsyncComputeAvailable) + { + m_EnableAsyncCompute = false; + m_PendingEnableAsyncCompute = false; + m_AllowAsyncCompute = false; + + CauldronWarning(L"Async compute Frame interpolation isn't available on this device."); + } + + if (m_FrameInterpolationAvailable) + { +#if defined(FFX_API_DX12) + IDXGISwapChain4* dxgiSwapchain = GetSwapChain()->GetImpl()->DX12SwapChain(); + dxgiSwapchain->AddRef(); + cauldron::GetSwapChain()->GetImpl()->SetDXGISwapChain(nullptr); - dxgiSwapchain->Release(); + ffx::CreateContextDescFrameGenerationSwapChainForHwndDX12 createSwapChainDesc{}; + dxgiSwapchain->GetHwnd(&createSwapChainDesc.hwnd); + DXGI_SWAP_CHAIN_DESC1 desc1; + dxgiSwapchain->GetDesc1(&desc1); + createSwapChainDesc.desc = &desc1; + DXGI_SWAP_CHAIN_FULLSCREEN_DESC fullscreenDesc; + dxgiSwapchain->GetFullscreenDesc(&fullscreenDesc); + createSwapChainDesc.fullscreenDesc = &fullscreenDesc; + dxgiSwapchain->GetParent(IID_PPV_ARGS(&createSwapChainDesc.dxgiFactory)); + createSwapChainDesc.gameQueue = GetDevice()->GetImpl()->DX12CmdQueue(cauldron::CommandQueue::Graphics); + + dxgiSwapchain->Release(); + dxgiSwapchain = nullptr; + createSwapChainDesc.swapchain = &dxgiSwapchain; + + ffx::ReturnCode retCode = ffx::CreateContext(m_SwapChainContext, nullptr, createSwapChainDesc); + CauldronAssert(ASSERT_CRITICAL, retCode == ffx::ReturnCode::Ok, L"Couldn't create the ffxapi fg swapchain (dx12): %d", (uint32_t)retCode); + createSwapChainDesc.dxgiFactory->Release(); + + cauldron::GetSwapChain()->GetImpl()->SetDXGISwapChain(dxgiSwapchain); + + // In case the app is handling Alt-Enter manually we need to update the window association after creating a different swapchain + IDXGIFactory7* factory = nullptr; + if (SUCCEEDED(dxgiSwapchain->GetParent(IID_PPV_ARGS(&factory)))) + { + factory->MakeWindowAssociation(cauldron::GetFramework()->GetImpl()->GetHWND(), DXGI_MWA_NO_WINDOW_CHANGES); + factory->Release(); + } + + dxgiSwapchain->Release(); + + // Lets do the same for HDR as well as it will need to be re initialized since swapchain was re created + cauldron::GetSwapChain()->SetHDRMetadataAndColorspace(); - // Lets do the same for HDR as well as it will need to be re initialized since swapchain was re created - cauldron::GetSwapChain()->SetHDRMetadataAndColorspace(); #elif defined(FFX_API_VK) - // Create frameinterpolation swapchain - cauldron::SwapChain* pSwapchain = cauldron::GetFramework()->GetSwapChain(); - VkSwapchainKHR currentSwapchain = pSwapchain->GetImpl()->VKSwapChain(); - - ffx::CreateContextDescFrameGenerationSwapChainVK createSwapChainDesc{}; - createSwapChainDesc.physicalDevice = cauldron::GetDevice()->GetImpl()->VKPhysicalDevice(); - createSwapChainDesc.device = cauldron::GetDevice()->GetImpl()->VKDevice(); - createSwapChainDesc.swapchain = ¤tSwapchain; - createSwapChainDesc.createInfo = *cauldron::GetFramework()->GetSwapChain()->GetImpl()->GetCreateInfo(); - createSwapChainDesc.allocator = nullptr; - createSwapChainDesc.gameQueue.queue = cauldron::GetDevice()->GetImpl()->VKCmdQueue(cauldron::CommandQueue::Graphics); - createSwapChainDesc.gameQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::Graphics]; - createSwapChainDesc.gameQueue.submitFunc = nullptr; // this queue is only used in vkQueuePresentKHR, hence doesn't need a callback - createSwapChainDesc.asyncComputeQueue.queue = cauldron::GetDevice()->GetImpl()->GetFIAsyncComputeQueue()->queue; - createSwapChainDesc.asyncComputeQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::FIAsyncCompute]; - createSwapChainDesc.asyncComputeQueue.submitFunc = nullptr; - createSwapChainDesc.presentQueue.queue = cauldron::GetDevice()->GetImpl()->GetFIPresentQueue()->queue; - createSwapChainDesc.presentQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::FIPresent]; - createSwapChainDesc.presentQueue.submitFunc = nullptr; - createSwapChainDesc.imageAcquireQueue.queue = cauldron::GetDevice()->GetImpl()->GetFIImageAcquireQueue()->queue; - createSwapChainDesc.imageAcquireQueue.familyIndex = cauldron::GetDevice()->GetImpl()->GetQueueFamilies().familyIndices[cauldron::RequestedQueue::FIImageAcquire]; - createSwapChainDesc.imageAcquireQueue.submitFunc = nullptr; - - // make sure swapchain is not holding a ref to real swapchain - cauldron::GetFramework()->GetSwapChain()->GetImpl()->SetVKSwapChain(VK_NULL_HANDLE); - - auto convertQueueInfo = [](VkQueueInfoFFXAPI queueInfo) { - VkQueueInfoFFX info; - info.queue = queueInfo.queue; - info.familyIndex = queueInfo.familyIndex; - info.submitFunc = queueInfo.submitFunc; - return info; - }; - VkFrameInterpolationInfoFFX frameInterpolationInfo = {}; - frameInterpolationInfo.device = createSwapChainDesc.device; - frameInterpolationInfo.physicalDevice = createSwapChainDesc.physicalDevice; - frameInterpolationInfo.pAllocator = createSwapChainDesc.allocator; - frameInterpolationInfo.gameQueue = convertQueueInfo(createSwapChainDesc.gameQueue); - frameInterpolationInfo.asyncComputeQueue = convertQueueInfo(createSwapChainDesc.asyncComputeQueue); - frameInterpolationInfo.presentQueue = convertQueueInfo(createSwapChainDesc.presentQueue); - frameInterpolationInfo.imageAcquireQueue = convertQueueInfo(createSwapChainDesc.imageAcquireQueue); - - ffx::ReturnCode retCode = ffx::CreateContext(m_SwapChainContext, nullptr, createSwapChainDesc); - - ffx::QueryDescSwapchainReplacementFunctionsVK replacementFunctions{}; - ffx::Query(m_SwapChainContext, replacementFunctions); - cauldron::GetDevice()->GetImpl()->SetSwapchainMethodsAndContext(nullptr, - nullptr, - replacementFunctions.pOutGetSwapchainImagesKHR, - replacementFunctions.pOutAcquireNextImageKHR, - replacementFunctions.pOutQueuePresentKHR, - replacementFunctions.pOutSetHdrMetadataEXT, - replacementFunctions.pOutCreateSwapchainFFXAPI, - replacementFunctions.pOutDestroySwapchainFFXAPI, - nullptr, - replacementFunctions.pOutGetLastPresentCountFFXAPI, - m_SwapChainContext, - &frameInterpolationInfo); - - // Set frameinterpolation swapchain to engine - cauldron::GetFramework()->GetSwapChain()->GetImpl()->SetVKSwapChain(currentSwapchain, true); + // Create frameinterpolation swapchain + cauldron::SwapChain* pSwapchain = cauldron::GetFramework()->GetSwapChain(); + VkSwapchainKHR currentSwapchain = pSwapchain->GetImpl()->VKSwapChain(); + + ffx::CreateContextDescFrameGenerationSwapChainVK createSwapChainDesc{}; + createSwapChainDesc.physicalDevice = cauldron::GetDevice()->GetImpl()->VKPhysicalDevice(); + createSwapChainDesc.device = cauldron::GetDevice()->GetImpl()->VKDevice(); + createSwapChainDesc.swapchain = ¤tSwapchain; + createSwapChainDesc.createInfo = *cauldron::GetFramework()->GetSwapChain()->GetImpl()->GetCreateInfo(); + createSwapChainDesc.allocator = nullptr; + createSwapChainDesc.gameQueue.queue = cauldron::GetDevice()->GetImpl()->VKCmdQueue(cauldron::CommandQueue::Graphics); + createSwapChainDesc.gameQueue.familyIndex = cauldron::GetDevice()->GetImpl()->VKCmdQueueFamily(cauldron::CommandQueue::Graphics); + createSwapChainDesc.gameQueue.submitFunc = nullptr; // this queue is only used in vkQueuePresentKHR, hence doesn't need a callback + + createSwapChainDesc.asyncComputeQueue.queue = pAsyncComputeQueue->queue; + createSwapChainDesc.asyncComputeQueue.familyIndex = pAsyncComputeQueue->family; + createSwapChainDesc.asyncComputeQueue.submitFunc = nullptr; + + createSwapChainDesc.presentQueue.queue = pPresentQueue->queue; + createSwapChainDesc.presentQueue.familyIndex = pPresentQueue->family; + createSwapChainDesc.presentQueue.submitFunc = nullptr; + + createSwapChainDesc.imageAcquireQueue.queue = pImageAcquireQueue->queue; + createSwapChainDesc.imageAcquireQueue.familyIndex = pImageAcquireQueue->family; + createSwapChainDesc.imageAcquireQueue.submitFunc = nullptr; + + // make sure swapchain is not holding a ref to real swapchain + cauldron::GetFramework()->GetSwapChain()->GetImpl()->SetVKSwapChain(VK_NULL_HANDLE); + + auto convertQueueInfo = [](VkQueueInfoFFXAPI queueInfo) { + VkQueueInfoFFX info; + info.queue = queueInfo.queue; + info.familyIndex = queueInfo.familyIndex; + info.submitFunc = queueInfo.submitFunc; + return info; + }; + + VkFrameInterpolationInfoFFX frameInterpolationInfo = {}; + frameInterpolationInfo.device = createSwapChainDesc.device; + frameInterpolationInfo.physicalDevice = createSwapChainDesc.physicalDevice; + frameInterpolationInfo.pAllocator = createSwapChainDesc.allocator; + frameInterpolationInfo.gameQueue = convertQueueInfo(createSwapChainDesc.gameQueue); + frameInterpolationInfo.asyncComputeQueue = convertQueueInfo(createSwapChainDesc.asyncComputeQueue); + frameInterpolationInfo.presentQueue = convertQueueInfo(createSwapChainDesc.presentQueue); + frameInterpolationInfo.imageAcquireQueue = convertQueueInfo(createSwapChainDesc.imageAcquireQueue); + + ffx::ReturnCode retCode = ffx::CreateContext(m_SwapChainContext, nullptr, createSwapChainDesc); + + ffx::QueryDescSwapchainReplacementFunctionsVK replacementFunctions{}; + ffx::Query(m_SwapChainContext, replacementFunctions); + cauldron::GetDevice()->GetImpl()->SetSwapchainMethodsAndContext(nullptr, + nullptr, + replacementFunctions.pOutGetSwapchainImagesKHR, + replacementFunctions.pOutAcquireNextImageKHR, + replacementFunctions.pOutQueuePresentKHR, + replacementFunctions.pOutSetHdrMetadataEXT, + replacementFunctions.pOutCreateSwapchainFFXAPI, + replacementFunctions.pOutDestroySwapchainFFXAPI, + nullptr, + replacementFunctions.pOutGetLastPresentCountFFXAPI, + m_SwapChainContext, + &frameInterpolationInfo); + + // Set frameinterpolation swapchain to engine + cauldron::GetFramework()->GetSwapChain()->GetImpl()->SetVKSwapChain(currentSwapchain, true); #endif // defined(FFX_API_DX12) + } // Fetch hudless texture resources m_pHudLessTexture[0] = GetFramework()->GetRenderTexture(L"HudlessTarget0"); @@ -282,9 +322,12 @@ FSRRenderModule::~FSRRenderModule() // Destroy the FSR context UpdateFSRContext(false); - // Restore the application's swapchain - ffx::DestroyContext(m_SwapChainContext); - RestoreApplicationSwapChain(false); + if (m_SwapChainContext != nullptr) + { + // Restore the application's swapchain + ffx::DestroyContext(m_SwapChainContext); + RestoreApplicationSwapChain(false); + } } void FSRRenderModule::EnableModule(bool enabled) @@ -436,7 +479,7 @@ void FSRRenderModule::InitUI(UISection* pUISection) m_UIElements.emplace_back(pUISection->RegisterUIElement>( "Letterbox size", m_LetterboxRatio, 0.1f, 1.f, [this](float cur, float old) { UpdateUpscaleRatio(&old); }, false)); - m_UIElements.emplace_back(pUISection->RegisterUIElement("Reset Upscaling", m_FrameInterpolation, [this]() { m_ResetUpscale = true; })); + m_UIElements.emplace_back(pUISection->RegisterUIElement("Reset Upscaling", [this]() { m_ResetUpscale = true; })); m_UIElements.emplace_back(pUISection->RegisterUIElement("Draw upscaler debug view", m_DrawUpscalerDebugView, nullptr, false)); // Reactive mask @@ -446,9 +489,6 @@ void FSRRenderModule::InitUI(UISection* pUISection) // Use mask m_UIElements.emplace_back(pUISection->RegisterUIElement("Use Transparency and Composition Mask", m_UseMask, m_EnableMaskOptions, nullptr, false)); - // Use distortion field - m_UIElements.emplace_back(pUISection->RegisterUIElement("Use Distortion Field Input", m_UseDistortionField, nullptr, false)); - // Sharpening m_UIElements.emplace_back(pUISection->RegisterUIElement("RCAS Sharpening", m_RCASSharpen, nullptr, false, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement>("Sharpness", m_Sharpness, 0.f, 1.f, m_RCASSharpen, nullptr, false)); @@ -472,7 +512,7 @@ void FSRRenderModule::InitUI(UISection* pUISection) m_EnableMaskOptions)); // Frame interpolation - m_UIElements.emplace_back(pUISection->RegisterUIElement("Frame Interpolation", m_FrameInterpolation, + m_UIElements.emplace_back(pUISection->RegisterUIElement("Frame Interpolation", m_FrameInterpolation, m_FrameInterpolationAvailable, [this](bool, bool) { m_OfUiEnabled = m_FrameInterpolation && s_enableSoftwareMotionEstimation; @@ -485,6 +525,7 @@ void FSRRenderModule::InitUI(UISection* pUISection) false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("Support Async Compute", m_PendingEnableAsyncCompute, + m_AsyncComputeAvailable, [this](bool, bool) { // Ask main loop to re-initialize. @@ -494,7 +535,9 @@ void FSRRenderModule::InitUI(UISection* pUISection) m_UIElements.emplace_back(pUISection->RegisterUIElement("Allow async compute", m_AllowAsyncCompute, m_PendingEnableAsyncCompute, nullptr, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("Use callback", m_UseCallback, m_FrameInterpolation, nullptr, false)); + m_UIElements.emplace_back(pUISection->RegisterUIElement("Use Distortion Field Input", m_UseDistortionField, m_FrameInterpolation, nullptr, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("Draw frame generation tear lines", m_DrawFrameGenerationDebugTearLines, m_FrameInterpolation, nullptr, false)); + m_UIElements.emplace_back(pUISection->RegisterUIElement("Draw frame generation pacing lines", m_DrawFrameGenerationDebugPacingLines, m_FrameInterpolation, nullptr, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("Draw frame generation reset indicators", m_DrawFrameGenerationDebugResetIndicators, m_FrameInterpolation, nullptr, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("Draw frame generation debug view", m_DrawFrameGenerationDebugView, m_FrameInterpolation, nullptr, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("Present interpolated only", m_PresentInterpolatedOnly, m_FrameInterpolation, nullptr, false)); @@ -513,8 +556,9 @@ void FSRRenderModule::InitUI(UISection* pUISection) uimod->SetAsyncRender(s_uiRenderMode == 2); uimod->SetRenderToTexture(s_uiRenderMode == 1); uimod->SetCopyHudLessTexture(s_uiRenderMode == 3); - - m_NeedReInit = true; + // Need to recreate the FSR context + EnableModule(false); + EnableModule(true); }, false)); m_UIElements.emplace_back(pUISection->RegisterUIElement("DoubleBuffer UI resource in swapchain", m_DoublebufferInSwapchain, m_FrameInterpolation, nullptr, false)); @@ -524,7 +568,7 @@ void FSRRenderModule::InitUI(UISection* pUISection) "WaitCallback Mode", m_waitCallbackMode, waitCallbackModeLabels, - m_EnableMaskOptions, + m_EnableWaitCallbackModeUI, [this](int32_t, int32_t) { #if defined(FFX_API_DX12) @@ -539,13 +583,105 @@ void FSRRenderModule::InitUI(UISection* pUISection) } else if (m_waitCallbackMode == 1) { - //FuncWithinStruct waitCallbackStruct = { &waitCallback }; m_swapchainKeyValueConfig.ptr = waitCallback; } ffx::Configure(m_SwapChainContext, m_swapchainKeyValueConfig); }, m_EnableMaskOptions)); + + + m_UIElements.emplace_back(pUISection->RegisterUIElement>( + "Frame Pacing safetyMarginInMs", + m_SafetyMarginInMs, + 0.0f, 1.0f, + m_FrameInterpolation, + [this](float, float) { +#if defined(FFX_API_DX12) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueDX12 m_swapchainKeyValueConfig{}; +#elif defined(FFX_API_VK) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueVK m_swapchainKeyValueConfig{}; +#endif + m_swapchainKeyValueConfig.key = FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING; + m_swapchainKeyValueConfig.ptr = &framePacingTuning; + + framePacingTuning.safetyMarginInMs = m_SafetyMarginInMs; + + ffx::Configure(m_SwapChainContext, m_swapchainKeyValueConfig); + }, + m_FrameInterpolation)); + m_UIElements.emplace_back(pUISection->RegisterUIElement>( + "Frame Pacing varianceFactor", + m_VarianceFactor, + 0.0f, 1.0f, + m_FrameInterpolation, + [this](float, float) { +#if defined(FFX_API_DX12) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueDX12 m_swapchainKeyValueConfig{}; +#elif defined(FFX_API_VK) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueVK m_swapchainKeyValueConfig{}; +#endif + m_swapchainKeyValueConfig.key = FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING; + m_swapchainKeyValueConfig.ptr = &framePacingTuning; + + framePacingTuning.varianceFactor = m_VarianceFactor; + + ffx::Configure(m_SwapChainContext, m_swapchainKeyValueConfig); + }, + m_FrameInterpolation)); + m_UIElements.emplace_back(pUISection->RegisterUIElement( + "Frame Pacing allowHybridSpin", + m_AllowHybridSpin, + m_FrameInterpolation, + [this](bool, bool) { +#if defined(FFX_API_DX12) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueDX12 m_swapchainKeyValueConfig{}; +#elif defined(FFX_API_VK) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueVK m_swapchainKeyValueConfig{}; +#endif + m_swapchainKeyValueConfig.key = FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING; + m_swapchainKeyValueConfig.ptr = &framePacingTuning; + + framePacingTuning.allowHybridSpin = m_AllowHybridSpin; + + ffx::Configure(m_SwapChainContext, m_swapchainKeyValueConfig); + })); + m_UIElements.emplace_back(pUISection->RegisterUIElement>( + "hybridSpinTime in timer resolution units", + (int32_t&) m_HybridSpinTime, + 0, 10, + m_FrameInterpolation, + [this](int32_t, int32_t) { +#if defined(FFX_API_DX12) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueDX12 m_swapchainKeyValueConfig{}; +#elif defined(FFX_API_VK) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueVK m_swapchainKeyValueConfig{}; +#endif + m_swapchainKeyValueConfig.key = FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING; + m_swapchainKeyValueConfig.ptr = &framePacingTuning; + + framePacingTuning.hybridSpinTime = m_HybridSpinTime; + + ffx::Configure(m_SwapChainContext, m_swapchainKeyValueConfig); + }, + m_FrameInterpolation)); + m_UIElements.emplace_back(pUISection->RegisterUIElement( + "allowWaitForSingleObjectOnFence", + m_AllowWaitForSingleObjectOnFence, + m_FrameInterpolation, + [this](bool, bool) { +#if defined(FFX_API_DX12) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueDX12 m_swapchainKeyValueConfig{}; +#elif defined(FFX_API_VK) + ffx::ConfigureDescFrameGenerationSwapChainKeyValueVK m_swapchainKeyValueConfig{}; +#endif + m_swapchainKeyValueConfig.key = FFX_API_CONFIGURE_FG_SWAPCHAIN_KEY_FRAMEPACINGTUNING; + m_swapchainKeyValueConfig.ptr = &framePacingTuning; + + framePacingTuning.allowWaitForSingleObjectOnFence = m_AllowWaitForSingleObjectOnFence; + + ffx::Configure(m_SwapChainContext, m_swapchainKeyValueConfig); + })); EnableModule(true); } @@ -578,6 +714,8 @@ void FSRRenderModule::SwitchUpscaler(int32_t newUpscaler) break; } + m_EnableWaitCallbackModeUI = m_EnableMaskOptions && m_FrameInterpolationAvailable; + m_UpscaleMethod = newUpscaler; // Enable the new one @@ -780,14 +918,15 @@ void FSRRenderModule::UpdateFSRContext(bool enabled) } // Create the FrameGen context + if (m_FrameInterpolationAvailable) { ffx::CreateContextDescFrameGeneration createFg{}; - createFg.displaySize = {resInfo.DisplayWidth, resInfo.DisplayHeight}; - createFg.maxRenderSize = {resInfo.DisplayWidth, resInfo.DisplayHeight}; + createFg.displaySize = { resInfo.DisplayWidth, resInfo.DisplayHeight }; + createFg.maxRenderSize = { resInfo.DisplayWidth, resInfo.DisplayHeight }; if (s_InvertedDepth) createFg.flags |= FFX_FRAMEGENERATION_ENABLE_DEPTH_INVERTED | FFX_FRAMEGENERATION_ENABLE_DEPTH_INFINITE; createFg.flags |= FFX_FRAMEGENERATION_ENABLE_HIGH_DYNAMIC_RANGE; - + m_EnableAsyncCompute = m_PendingEnableAsyncCompute; if (m_EnableAsyncCompute) { @@ -795,9 +934,20 @@ void FSRRenderModule::UpdateFSRContext(bool enabled) } createFg.backBufferFormat = SDKWrapper::GetFfxSurfaceFormat(GetFramework()->GetSwapChain()->GetSwapChainFormat()); - - // create the context. We can reuse the backend description. TODO: this relies on an implementation detail we may not want to expose. - ffx::ReturnCode retCode = ffx::CreateContext(m_FrameGenContext, nullptr, createFg, backendDesc); + ffx::ReturnCode retCode; + if (s_uiRenderMode == 3) + { + ffx::CreateContextDescFrameGenerationHudless createFgHudless{}; + createFgHudless.hudlessBackBufferFormat = SDKWrapper::GetFfxSurfaceFormat(m_pHudLessTexture[0]->GetResource()->GetTextureResource()->GetFormat()); + // create the context. We can reuse the backend description. TODO: this relies on an implementation detail we may not want to expose. + retCode = ffx::CreateContext(m_FrameGenContext, nullptr, createFg, backendDesc, createFgHudless); + } + else + { + // create the context. We can reuse the backend description. TODO: this relies on an implementation detail we may not want to expose. + retCode = ffx::CreateContext(m_FrameGenContext, nullptr, createFg, backendDesc); + } + CauldronAssert(ASSERT_CRITICAL, retCode == ffx::ReturnCode::Ok, L"Couldn't create the ffxapi framegen context: %d", (uint32_t)retCode); void* ffxSwapChain; @@ -841,16 +991,21 @@ void FSRRenderModule::UpdateFSRContext(bool enabled) CAUDRON_LOG_INFO(L"FrameGeneration Context VRAM totalUsageInBytes %f MB aliasableUsageInBytes %f MB", gpuMemoryUsageFrameGeneration.totalUsageInBytes / 1048576.f, gpuMemoryUsageFrameGeneration.aliasableUsageInBytes / 1048576.f); - } + + FfxApiEffectMemoryUsage gpuMemoryUsageFrameGenerationSwapchain; #if defined(FFX_API_DX12) - FfxApiEffectMemoryUsage gpuMemoryUsageFrameGenerationSwapchain; - ffx::QueryFrameGenerationSwapChainGetGPUMemoryUsageDX12 frameGenSwapchainGetGPUMemoryUsage{}; - frameGenSwapchainGetGPUMemoryUsage.gpuMemoryUsageFrameGenerationSwapchain = &gpuMemoryUsageFrameGenerationSwapchain; - ffx::Query(m_SwapChainContext, frameGenSwapchainGetGPUMemoryUsage); - CAUDRON_LOG_INFO(L"Swapchain Context VRAM totalUsageInBytes %f MB aliasableUsageInBytes %f MB", gpuMemoryUsageFrameGenerationSwapchain.totalUsageInBytes / 1048576.f, gpuMemoryUsageFrameGenerationSwapchain.aliasableUsageInBytes / 1048576.f); + ffx::QueryFrameGenerationSwapChainGetGPUMemoryUsageDX12 frameGenSwapchainGetGPUMemoryUsage{}; + frameGenSwapchainGetGPUMemoryUsage.gpuMemoryUsageFrameGenerationSwapchain = &gpuMemoryUsageFrameGenerationSwapchain; + ffx::Query(m_SwapChainContext, frameGenSwapchainGetGPUMemoryUsage); +#elif defined(FFX_API_VK) + ffx::QueryFrameGenerationSwapChainGetGPUMemoryUsageVK frameGenSwapchainGetGPUMemoryUsage{}; + frameGenSwapchainGetGPUMemoryUsage.gpuMemoryUsageFrameGenerationSwapchain = &gpuMemoryUsageFrameGenerationSwapchain; + ffx::Query(m_SwapChainContext, frameGenSwapchainGetGPUMemoryUsage); #endif // defined(FFX_API_DX12) + CAUDRON_LOG_INFO(L"Swapchain Context VRAM totalUsageInBytes %f MB aliasableUsageInBytes %f MB", gpuMemoryUsageFrameGenerationSwapchain.totalUsageInBytes / 1048576.f, gpuMemoryUsageFrameGenerationSwapchain.aliasableUsageInBytes / 1048576.f); + } } - else + else if (m_FrameInterpolationAvailable) { void* ffxSwapChain; #if defined(FFX_API_DX12) @@ -1068,116 +1223,118 @@ void FSRRenderModule::Execute(double deltaTime, CommandList* pCmdList) CauldronAssert(ASSERT_CRITICAL, !!retCode, L"Dispatching FSR upscaling failed: %d", (uint32_t)retCode); } - ffx::DispatchDescFrameGenerationPrepare dispatchFgPrep{}; + if (m_FrameInterpolationAvailable) + { + ffx::DispatchDescFrameGenerationPrepare dispatchFgPrep{}; #if defined(FFX_API_DX12) - dispatchFgPrep.commandList = pCmdList->GetImpl()->DX12CmdList(); + dispatchFgPrep.commandList = pCmdList->GetImpl()->DX12CmdList(); #elif defined(FFX_API_VK) - dispatchFgPrep.commandList = pCmdList->GetImpl()->VKCmdBuffer(); + dispatchFgPrep.commandList = pCmdList->GetImpl()->VKCmdBuffer(); #endif // defined(FFX_API_DX12) - dispatchFgPrep.depth = SDKWrapper::ffxGetResourceApi(m_pDepthTarget->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ); - dispatchFgPrep.motionVectors = SDKWrapper::ffxGetResourceApi(m_pMotionVectors->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ); - dispatchFgPrep.flags = 0; - - dispatchFgPrep.jitterOffset.x = -m_JitterX; - dispatchFgPrep.jitterOffset.y = -m_JitterY; - dispatchFgPrep.motionVectorScale.x = resInfo.fRenderWidth(); - dispatchFgPrep.motionVectorScale.y = resInfo.fRenderHeight(); + dispatchFgPrep.depth = SDKWrapper::ffxGetResourceApi(m_pDepthTarget->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchFgPrep.motionVectors = SDKWrapper::ffxGetResourceApi(m_pMotionVectors->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchFgPrep.flags = 0; - // Cauldron keeps time in seconds, but FSR expects milliseconds - dispatchFgPrep.frameTimeDelta = static_cast(deltaTime * 1000.f); - - dispatchFgPrep.renderSize.width = resInfo.RenderWidth; - dispatchFgPrep.renderSize.height = resInfo.RenderHeight; - dispatchFgPrep.cameraFovAngleVertical = pCamera->GetFovY(); + dispatchFgPrep.jitterOffset.x = -m_JitterX; + dispatchFgPrep.jitterOffset.y = -m_JitterY; + dispatchFgPrep.motionVectorScale.x = resInfo.fRenderWidth(); + dispatchFgPrep.motionVectorScale.y = resInfo.fRenderHeight(); - if (s_InvertedDepth) - { - dispatchFgPrep.cameraFar = pCamera->GetNearPlane(); - dispatchFgPrep.cameraNear = FLT_MAX; - } - else - { - dispatchFgPrep.cameraFar = pCamera->GetFarPlane(); - dispatchFgPrep.cameraNear = pCamera->GetNearPlane(); - } - dispatchFgPrep.viewSpaceToMetersFactor = 0.f; - dispatchFgPrep.frameID = m_FrameID; - - // Update frame generation config - FfxApiResource hudLessResource = SDKWrapper::ffxGetResourceApi(m_pHudLessTexture[m_curUiTextureIndex]->GetResource(), - FFX_API_RESOURCE_STATE_COMPUTE_READ); - - m_FrameGenerationConfig.frameGenerationEnabled = m_FrameInterpolation; - m_FrameGenerationConfig.flags = 0; - m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugTearLines ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_TEAR_LINES : 0; - m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugResetIndicators ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_RESET_INDICATORS : 0; - m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugView ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_VIEW : 0; - dispatchFgPrep.flags = m_FrameGenerationConfig.flags; // TODO: maybe these should be distinct flags? - m_FrameGenerationConfig.HUDLessColor = (s_uiRenderMode == 3) ? hudLessResource : FfxApiResource({}); - m_FrameGenerationConfig.allowAsyncWorkloads = m_AllowAsyncCompute && m_EnableAsyncCompute; - // assume symmetric letterbox - m_FrameGenerationConfig.generationRect.left = (resInfo.DisplayWidth - resInfo.UpscaleWidth) / 2; - m_FrameGenerationConfig.generationRect.top = (resInfo.DisplayHeight - resInfo.UpscaleHeight) / 2; - m_FrameGenerationConfig.generationRect.width = resInfo.UpscaleWidth; - m_FrameGenerationConfig.generationRect.height = resInfo.UpscaleHeight; - if (m_UseCallback) - { - m_FrameGenerationConfig.frameGenerationCallback = [](ffxDispatchDescFrameGeneration* params, void* pUserCtx) -> ffxReturnCode_t + // Cauldron keeps time in seconds, but FSR expects milliseconds + dispatchFgPrep.frameTimeDelta = static_cast(deltaTime * 1000.f); + + dispatchFgPrep.renderSize.width = resInfo.RenderWidth; + dispatchFgPrep.renderSize.height = resInfo.RenderHeight; + dispatchFgPrep.cameraFovAngleVertical = pCamera->GetFovY(); + + if (s_InvertedDepth) { - return ffxDispatch(reinterpret_cast(pUserCtx), ¶ms->header); - }; - m_FrameGenerationConfig.frameGenerationCallbackUserContext = &m_FrameGenContext; - } - else - { - m_FrameGenerationConfig.frameGenerationCallback = nullptr; - m_FrameGenerationConfig.frameGenerationCallbackUserContext = nullptr; - } - m_FrameGenerationConfig.onlyPresentGenerated = m_PresentInterpolatedOnly; - m_FrameGenerationConfig.frameID = m_FrameID; + dispatchFgPrep.cameraFar = pCamera->GetNearPlane(); + dispatchFgPrep.cameraNear = FLT_MAX; + } + else + { + dispatchFgPrep.cameraFar = pCamera->GetFarPlane(); + dispatchFgPrep.cameraNear = pCamera->GetNearPlane(); + } + dispatchFgPrep.viewSpaceToMetersFactor = 0.f; + dispatchFgPrep.frameID = m_FrameID; + + // Update frame generation config + FfxApiResource hudLessResource = + SDKWrapper::ffxGetResourceApi(m_pHudLessTexture[m_curUiTextureIndex]->GetResource(), FFX_API_RESOURCE_STATE_COMPUTE_READ); + + m_FrameGenerationConfig.frameGenerationEnabled = m_FrameInterpolation; + m_FrameGenerationConfig.flags = 0; + m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugTearLines ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_TEAR_LINES : 0; + m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugResetIndicators ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_RESET_INDICATORS : 0; + m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugPacingLines ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_PACING_LINES : 0; + m_FrameGenerationConfig.flags |= m_DrawFrameGenerationDebugView ? FFX_FRAMEGENERATION_FLAG_DRAW_DEBUG_VIEW : 0; + dispatchFgPrep.flags = m_FrameGenerationConfig.flags; // TODO: maybe these should be distinct flags? + m_FrameGenerationConfig.HUDLessColor = (s_uiRenderMode == 3) ? hudLessResource : FfxApiResource({}); + m_FrameGenerationConfig.allowAsyncWorkloads = m_AllowAsyncCompute && m_EnableAsyncCompute; + // assume symmetric letterbox + m_FrameGenerationConfig.generationRect.left = (resInfo.DisplayWidth - resInfo.UpscaleWidth) / 2; + m_FrameGenerationConfig.generationRect.top = (resInfo.DisplayHeight - resInfo.UpscaleHeight) / 2; + m_FrameGenerationConfig.generationRect.width = resInfo.UpscaleWidth; + m_FrameGenerationConfig.generationRect.height = resInfo.UpscaleHeight; + if (m_UseCallback) + { + m_FrameGenerationConfig.frameGenerationCallback = [](ffxDispatchDescFrameGeneration* params, void* pUserCtx) -> ffxReturnCode_t { + return ffxDispatch(reinterpret_cast(pUserCtx), ¶ms->header); + }; + m_FrameGenerationConfig.frameGenerationCallbackUserContext = &m_FrameGenContext; + } + else + { + m_FrameGenerationConfig.frameGenerationCallback = nullptr; + m_FrameGenerationConfig.frameGenerationCallbackUserContext = nullptr; + } + m_FrameGenerationConfig.onlyPresentGenerated = m_PresentInterpolatedOnly; + m_FrameGenerationConfig.frameID = m_FrameID; - void* ffxSwapChain; + void* ffxSwapChain; #if defined(FFX_API_DX12) - ffxSwapChain = GetSwapChain()->GetImpl()->DX12SwapChain(); + ffxSwapChain = GetSwapChain()->GetImpl()->DX12SwapChain(); #elif defined(FFX_API_VK) - ffxSwapChain = GetSwapChain()->GetImpl()->VKSwapChain(); + ffxSwapChain = GetSwapChain()->GetImpl()->VKSwapChain(); #endif // defined(FFX_API_DX12) - m_FrameGenerationConfig.swapChain = ffxSwapChain; - ffx::ReturnCode retCode = ffx::ReturnCode::ErrorParameter; - if (m_UseDistortionField) - { - ffx::ConfigureDescFrameGenerationRegisterDistortionFieldResource dfConfig{}; - dfConfig.distortionField = SDKWrapper::ffxGetResourceApi(m_pDistortionField[m_curUiTextureIndex]->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ); - retCode = ffx::Configure(m_FrameGenContext, m_FrameGenerationConfig, dfConfig); - } - else - { - retCode = ffx::Configure(m_FrameGenContext, m_FrameGenerationConfig); - } - - CauldronAssert(ASSERT_CRITICAL, !!retCode, L"Configuring FSR FG failed: %d", (uint32_t)retCode); + m_FrameGenerationConfig.swapChain = ffxSwapChain; + ffx::ReturnCode retCode = ffx::ReturnCode::ErrorParameter; + if (m_UseDistortionField) + { + ffx::ConfigureDescFrameGenerationRegisterDistortionFieldResource dfConfig{}; + dfConfig.distortionField = + SDKWrapper::ffxGetResourceApi(m_pDistortionField[m_curUiTextureIndex]->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ); + retCode = ffx::Configure(m_FrameGenContext, m_FrameGenerationConfig, dfConfig); + } + else + { + retCode = ffx::Configure(m_FrameGenContext, m_FrameGenerationConfig); + } - retCode = ffx::Dispatch(m_FrameGenContext, dispatchFgPrep); - CauldronAssert(ASSERT_CRITICAL, !!retCode, L"Dispatching FSR FG (upscaling data) failed: %d", (uint32_t)retCode); + CauldronAssert(ASSERT_CRITICAL, !!retCode, L"Configuring FSR FG failed: %d", (uint32_t)retCode); - FfxApiResource uiColor = (s_uiRenderMode == 1) - ? SDKWrapper::ffxGetResourceApi(m_pUiTexture[m_curUiTextureIndex]->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ) - : FfxApiResource({}); + retCode = ffx::Dispatch(m_FrameGenContext, dispatchFgPrep); + CauldronAssert(ASSERT_CRITICAL, !!retCode, L"Dispatching FSR FG (upscaling data) failed: %d", (uint32_t)retCode); + + FfxApiResource uiColor = + (s_uiRenderMode == 1) ? SDKWrapper::ffxGetResourceApi(m_pUiTexture[m_curUiTextureIndex]->GetResource(), FFX_API_RESOURCE_STATE_PIXEL_COMPUTE_READ) + : FfxApiResource({}); #if defined(FFX_API_DX12) - ffx::ConfigureDescFrameGenerationSwapChainRegisterUiResourceDX12 uiConfig{}; - uiConfig.uiResource = uiColor; - uiConfig.flags = m_DoublebufferInSwapchain ? FFX_FRAMEGENERATION_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING : 0; - ffx::Configure(m_SwapChainContext, uiConfig); + ffx::ConfigureDescFrameGenerationSwapChainRegisterUiResourceDX12 uiConfig{}; + uiConfig.uiResource = uiColor; + uiConfig.flags = m_DoublebufferInSwapchain ? FFX_FRAMEGENERATION_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING : 0; + ffx::Configure(m_SwapChainContext, uiConfig); #elif defined(FFX_API_VK) - ffx::ConfigureDescFrameGenerationSwapChainRegisterUiResourceVK uiConfig{}; - uiConfig.uiResource = uiColor; - uiConfig.flags = m_DoublebufferInSwapchain ? FFX_FRAMEGENERATION_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING : 0; - ffx::Configure(m_SwapChainContext, uiConfig); + ffx::ConfigureDescFrameGenerationSwapChainRegisterUiResourceVK uiConfig{}; + uiConfig.uiResource = uiColor; + uiConfig.flags = m_DoublebufferInSwapchain ? FFX_FRAMEGENERATION_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING : 0; + ffx::Configure(m_SwapChainContext, uiConfig); #endif // defined(FFX_API_DX12) - - + } // Dispatch frame generation, if not using the callback if (m_FrameInterpolation && !m_UseCallback) @@ -1214,7 +1371,7 @@ void FSRRenderModule::Execute(double deltaTime, CommandList* pCmdList) dispatchFg.frameID = m_FrameID; dispatchFg.reset = m_ResetFrameInterpolation; - retCode = ffx::Dispatch(m_FrameGenContext, dispatchFg); + ffx::ReturnCode retCode = ffx::Dispatch(m_FrameGenContext, dispatchFg); CauldronAssert(ASSERT_CRITICAL, !!retCode, L"Dispatching Frame Generation failed: %d", (uint32_t)retCode); } @@ -1356,6 +1513,7 @@ void RestoreApplicationSwapChain(bool recreateSwapchain) } return; } + #elif defined(FFX_API_VK) void RestoreApplicationSwapChain(bool recreateSwapchain) { diff --git a/samples/fsrapi/fsrapirendermodule.h b/samples/fsrapi/fsrapirendermodule.h index b519010e..a3f885bd 100644 --- a/samples/fsrapi/fsrapirendermodule.h +++ b/samples/fsrapi/fsrapirendermodule.h @@ -56,7 +56,15 @@ class FSRRenderModule : public cauldron::RenderModule } UpscalerType; public: - FSRRenderModule() : RenderModule(L"FSRApiRenderModule") {} + FSRRenderModule() + : RenderModule(L"FSRApiRenderModule"), + m_SafetyMarginInMs(0.1f), + m_VarianceFactor (0.1f), + m_AllowHybridSpin (false), + m_HybridSpinTime(2), + m_AllowWaitForSingleObjectOnFence(false), + framePacingTuning { m_SafetyMarginInMs, m_VarianceFactor, m_AllowHybridSpin, m_HybridSpinTime, m_AllowWaitForSingleObjectOnFence } + {} virtual ~FSRRenderModule(); void Init(const json& initData); @@ -166,7 +174,10 @@ class FSRRenderModule : public cauldron::RenderModule bool m_SharpnessEnabled = false; bool m_NeedReInit = false; + bool m_FrameInterpolationAvailable = false; + bool m_AsyncComputeAvailable = false; bool m_EnableMaskOptions = true; + bool m_EnableWaitCallbackModeUI = true; bool m_FrameInterpolation = true; bool m_EnableAsyncCompute = true; bool m_AllowAsyncCompute = true; @@ -174,6 +185,7 @@ class FSRRenderModule : public cauldron::RenderModule bool m_UseCallback = true; bool m_DrawFrameGenerationDebugTearLines = true; bool m_DrawFrameGenerationDebugResetIndicators = true; + bool m_DrawFrameGenerationDebugPacingLines = false; bool m_DrawFrameGenerationDebugView = false; bool m_DrawUpscalerDebugView = false; bool m_PresentInterpolatedOnly = false; @@ -188,9 +200,9 @@ class FSRRenderModule : public cauldron::RenderModule uint32_t m_FsrVersionIndex = 0; bool m_ffxBackendInitialized = false; - ffx::Context m_UpscalingContext; - ffx::Context m_FrameGenContext; - ffx::Context m_SwapChainContext; + ffx::Context m_UpscalingContext = nullptr; + ffx::Context m_FrameGenContext = nullptr; + ffx::Context m_SwapChainContext = nullptr; ffx::ConfigureDescFrameGeneration m_FrameGenerationConfig{}; // Backup UI elements @@ -234,6 +246,13 @@ class FSRRenderModule : public cauldron::RenderModule //Set Swapchain waitcallback via Configure Context KeyValue API int32_t m_waitCallbackMode = 0; + //Set Swapchain Frame pacing Tuning + float m_SafetyMarginInMs; // in Millisecond + float m_VarianceFactor; // valid range [0.0,1.0] + bool m_AllowHybridSpin; + uint32_t m_HybridSpinTime; + bool m_AllowWaitForSingleObjectOnFence; + FfxApiSwapchainFramePacingTuning framePacingTuning; }; // alias to get sample.cpp to use this class. diff --git a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h index 11d280be..20c5c296 100644 --- a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h +++ b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h @@ -472,9 +472,9 @@ layout (set = 0, binding = 1000) uniform sampler s_LinearClamp; #if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) uniform texture2D r_input_distortion_field; - FfxFloat32x4 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv) + FfxFloat32x2 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv) { - return textureLod(sampler2D(r_input_distortion_field, s_LinearClamp), fUv, 0.0); + return textureLod(sampler2D(r_input_distortion_field, s_LinearClamp), fUv, 0.0).xy; } #endif diff --git a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h index ae5f437e..a58743d2 100644 --- a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h +++ b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h @@ -511,8 +511,8 @@ FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) #endif #if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) - Texture2D r_input_distortion_field : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD); - FfxFloat32x4 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv) + Texture2D r_input_distortion_field : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD); + FfxFloat32x2 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv) { return r_input_distortion_field.SampleLevel(s_LinearClamp, fUv, 0); } diff --git a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h index 7a2cb3b2..03adc96f 100644 --- a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h +++ b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h @@ -47,7 +47,7 @@ FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousD FfxFloat32 LoadEstimatedDepth(FfxUInt32 estimatedIndex, FfxInt32x2 iSamplePos) { const FfxFloat32x2 fUv = FfxFloat32x2(iSamplePos + 0.5f) / RenderSize(); - const FfxFloat32x4 fDistortionFieldUv = SampleDistortionField(fUv); + const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUv); FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize()); if (estimatedIndex == 0) diff --git a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h index 88c91714..1968ec76 100644 --- a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h +++ b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h @@ -36,7 +36,7 @@ void computeGameFieldMvs(FfxInt32x2 iPxPos) { const FfxFloat32x2 fUvInScreenSpace = (FfxFloat32x2(iPxPos) + 0.5f) / RenderSize(); - const FfxFloat32x4 fDistortionFieldUv = SampleDistortionField(fUvInScreenSpace); + const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUvInScreenSpace); FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize()); const FfxFloat32x2 fUvInInterpolationRectStart = FfxFloat32x2(InterpolationRectBase()) / DisplaySize(); @@ -51,8 +51,9 @@ void computeGameFieldMvs(FfxInt32x2 iPxPos) const FfxFloat32 fViewSpaceDepth = ConvertFromDeviceDepthToViewSpace(fDepthSample); const FfxUInt32 uHighPriorityFactorPrimary = getPriorityFactorFromViewSpaceDepth(fViewSpaceDepth); - FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUvInInterpolationRect).xyz; - FfxFloat32x3 curBackbufferCol = SamplePreviousBackbuffer(fUvInInterpolationRect + fGameMotionVector * fUvLetterBoxScale).xyz; + // pixel position in current frame + Game Motion Vector -> pixel position in previous frame + FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUvInInterpolationRect+ fGameMotionVector * fUvLetterBoxScale).xyz; // returns previous backbuffer color of current frame pixel position in previous frame + FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUvInInterpolationRect).xyz; // returns current backbuffer color at current frame pixel position FfxFloat32 prevLuma = 0.001f + RawRGBToLuminance(prevBackbufferCol); FfxFloat32 currLuma = 0.001f + RawRGBToLuminance(curBackbufferCol); diff --git a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h index 35fc19a8..84321326 100644 --- a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h +++ b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h @@ -30,8 +30,9 @@ void computeOpticalFlowFieldMvs(FfxUInt32x2 dtID, FfxFloat32x2 fOpticalFlowVecto const FfxFloat32 scaleFactor = 1.0f; FfxFloat32x2 fMotionVectorHalf = fOpticalFlowVector * 0.5f; - FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUv).xyz; - FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUv + fOpticalFlowVector).xyz; + // pixel position in current frame + fOpticalFlowVector-> pixel position in previous frame + FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUv + fOpticalFlowVector).xyz; // returns previous backbuffer color of current frame pixel position in previous frame + FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUv).xyz; // returns current backbuffer color at current frame pixel position FfxFloat32 prevLuma = 0.001f + RawRGBToLuminance(prevBackbufferCol); FfxFloat32 currLuma = 0.001f + RawRGBToLuminance(curBackbufferCol); diff --git a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h index 143449f6..d85f6731 100644 --- a/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h +++ b/sdk/include/FidelityFX/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h @@ -51,7 +51,7 @@ void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxUInt32 depthTarget, FfxFloat32 f void reconstructPreviousDepth(FfxInt32x2 iPxPos) { const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5f)) / RenderSize(); - const FfxFloat32x4 fDistortionFieldUv = SampleDistortionField(fUv); + const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUv); FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize()); FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos + iDistortionPixelOffset); diff --git a/sdk/include/FidelityFX/host/backends/dx12/ffx_dx12.h b/sdk/include/FidelityFX/host/backends/dx12/ffx_dx12.h index e1471a5c..38d2c2ff 100644 --- a/sdk/include/FidelityFX/host/backends/dx12/ffx_dx12.h +++ b/sdk/include/FidelityFX/host/backends/dx12/ffx_dx12.h @@ -309,10 +309,11 @@ FFX_API FfxResource ffxGetFrameinterpolationTextureDX12(FfxSwapchain gameSwapCha /// @ingroup DX12FrameInterpolation FFX_API FfxErrorCode ffxSetFrameGenerationConfigToSwapchainDX12(FfxFrameGenerationConfig const* config); -//enum values should match enum FfxApiConfigureFrameGenerationSwapChainKeyDX12 +//enum value matches enum FfxApiConfigureFrameGenerationSwapChainKeyDX12 typedef enum FfxFrameInterpolationSwapchainConfigureKey { - FFX_FI_SWAPCHAIN_CONFIGURE_KEY_WAITCALLBACK = 0 + FFX_FI_SWAPCHAIN_CONFIGURE_KEY_WAITCALLBACK = 0, + FFX_FI_SWAPCHAIN_CONFIGURE_KEY_FRAMEPACINGTUNING = 2, } FfxFrameInterpolationSwapchainConfigureKey; /// Configures FfxSwapchain via KeyValue API post FfxSwapchain context creation diff --git a/sdk/include/FidelityFX/host/backends/vk/ffx_vk.h b/sdk/include/FidelityFX/host/backends/vk/ffx_vk.h index eef5bda0..b31d5892 100644 --- a/sdk/include/FidelityFX/host/backends/vk/ffx_vk.h +++ b/sdk/include/FidelityFX/host/backends/vk/ffx_vk.h @@ -44,6 +44,14 @@ typedef struct VkQueueInfoFFX PFN_vkQueueSubmitFFX submitFunc; } VkQueueInfoFFX; + +typedef enum VkCompositonModeFFX +{ + VK_COMPOSITION_MODE_NOT_FORCED_FFX, + VK_COMPOSITION_MODE_GAME_QUEUE_FFX, + VK_COMPOSITION_MODE_PRESENT_QUEUE_FFX, +} VkCompositonModeFFX; + /// Structure holding additional information to effectively replace the game swapchain by the frame interpolation one. /// Some notes on the queues: /// - please pass the queue, its family (for queue family ownership transfer purposes) and an optional function if you want to control concurrent submissions @@ -61,6 +69,7 @@ typedef struct VkFrameInterpolationInfoFFX VkQueueInfoFFX asyncComputeQueue; VkQueueInfoFFX presentQueue; VkQueueInfoFFX imageAcquireQueue; + VkCompositonModeFFX compositionMode; const VkAllocationCallbacks* pAllocator; } VkFrameInterpolationInfoFFX; @@ -303,7 +312,8 @@ FFX_API FfxErrorCode ffxSetFrameGenerationConfigToSwapchainVK(FfxFrameGeneration //enum values should match enum FfxApiConfigureFrameGenerationSwapChainKeyVK typedef enum FfxFrameInterpolationSwapchainConfigureKey { - FFX_FI_SWAPCHAIN_CONFIGURE_KEY_WAITCALLBACK = 0 + FFX_FI_SWAPCHAIN_CONFIGURE_KEY_WAITCALLBACK = 0, + FFX_FI_SWAPCHAIN_CONFIGURE_KEY_FRAMEPACINGTUNING = 2, } FfxFrameInterpolationSwapchainConfigureKey; /// Configures FfxSwapchain via KeyValue API post FfxSwapchain context creation @@ -320,6 +330,18 @@ typedef enum FfxFrameInterpolationSwapchainConfigureKey /// @ingroup VKFrameInterpolation FFX_API FfxErrorCode ffxConfigureFrameInterpolationSwapchainVK(FfxSwapchain gameSwapChain, FfxFrameInterpolationSwapchainConfigureKey key, void* valuePtr); +/// Query how much GPU memory created by FfxSwapchain. This excludes GPU memory created by the VkSwapchain (ie. size of backbuffers). +/// +/// @param [in] gameSwapChain The FfxSwapchain to configure via KeyValue API +/// @param [in out] vramUsage The FfxEffectMemoryUsage is the GPU memory created by FrameInterpolationSwapchain +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_ARGUMENT Could not query the interface for the frame interpolation swap chain. +/// +/// @ingroup VKFrameInterpolation +FFX_API FfxErrorCode ffxFrameInterpolationSwapchainGetGpuMemoryUsageVK(FfxSwapchain gameSwapChain, FfxEffectMemoryUsage* vramUsage); typedef VkResult (*PFN_vkCreateSwapchainFFX)(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain, const VkFrameInterpolationInfoFFX* pFrameInterpolationInfo); diff --git a/sdk/include/FidelityFX/host/ffx_frameinterpolation.h b/sdk/include/FidelityFX/host/ffx_frameinterpolation.h index 11c2af76..a0c94e62 100644 --- a/sdk/include/FidelityFX/host/ffx_frameinterpolation.h +++ b/sdk/include/FidelityFX/host/ffx_frameinterpolation.h @@ -40,7 +40,7 @@ /// FidelityFX Frameinterpolation patch version. /// /// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION -#define FFX_FRAMEINTERPOLATION_VERSION_PATCH (1) +#define FFX_FRAMEINTERPOLATION_VERSION_PATCH (2) /// FidelityFX Frame Interpolation context count /// @@ -114,11 +114,12 @@ typedef enum FfxFrameInterpolationInitializationFlagBits { /// /// @ingroup FRAMEINTERPOLATION typedef struct FfxFrameInterpolationContextDescription { - uint32_t flags; ///< A collection of FfxFrameInterpolationInitializationFlagBits. - FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. - FfxDimensions2D displaySize; ///< The size of the presentation resolution - FfxSurfaceFormat backBufferFormat; - FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK + uint32_t flags; ///< A collection of FfxFrameInterpolationInitializationFlagBits. + FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. + FfxDimensions2D displaySize; ///< The size of the presentation resolution + FfxSurfaceFormat backBufferFormat; ///< the format of the backbuffer + FfxSurfaceFormat previousInterpolationSourceFormat; ///< the format of the texture that will store the interpolation source for the next frame. Can be different than the backbuffer one, especially when using hudless + FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK } FfxFrameInterpolationContextDescription; /// A structure encapsulating the resource descriptions for shared resources for this effect. @@ -231,6 +232,7 @@ typedef enum FfxFrameInterpolationDispatchFlags FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES = (1 << 0), ///< A bit indicating that the debug tear lines will be drawn to the interpolated output. FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS = (1 << 1), ///< A bit indicating that the debug reset indicators will be drawn to the generated output. FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW = (1 << 2), ///< A bit indicating that the interpolated output resource will contain debug views with relevant information. + FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_PACING_LINES = (1 << 3), ///< A bit indicating that the debug pacing lines will be drawn to the generated output. } FfxFrameInterpolationDispatchFlags; typedef struct FfxFrameInterpolationDispatchDescription { @@ -268,7 +270,6 @@ typedef struct FfxFrameInterpolationDispatchDescription { FfxResource reconstructedPrevDepth; ///< The reconstructed depth buffer data FfxResource distortionField; ///< A resource containing distortion offset data used when distortion post effects are enabled. - } FfxFrameInterpolationDispatchDescription; FFX_API FfxErrorCode ffxFrameInterpolationDispatch(FfxFrameInterpolationContext* context, const FfxFrameInterpolationDispatchDescription* params); diff --git a/sdk/include/FidelityFX/host/ffx_fsr3.h b/sdk/include/FidelityFX/host/ffx_fsr3.h index 7825a64a..aba7664e 100644 --- a/sdk/include/FidelityFX/host/ffx_fsr3.h +++ b/sdk/include/FidelityFX/host/ffx_fsr3.h @@ -43,7 +43,7 @@ /// FidelityFX Super Resolution 0 patch version. /// /// @ingroup FSR3 -#define FFX_FSR3_VERSION_PATCH (2) +#define FFX_FSR3_VERSION_PATCH (3) /// FidelityFX Super Resolution 3 context count /// @@ -139,6 +139,7 @@ typedef enum FfxFsr3FrameGenerationFlags { FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_TEAR_LINES = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES, ///< A bit indicating that the debug tear lines will be drawn to the interpolated output. FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW, ///< A bit indicating that the interpolated output resource will contain debug views with relevant information. + FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_PACING_LINES = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_PACING_LINES ///< A bit indicating that the debug pacing lines will be drawn to the generated output. } FfxFsr3FrameGenerationFlags; typedef enum FfxFsr3UpscalingFlags diff --git a/sdk/include/FidelityFX/host/ffx_fsr3upscaler.h b/sdk/include/FidelityFX/host/ffx_fsr3upscaler.h index fe782289..2cef2634 100644 --- a/sdk/include/FidelityFX/host/ffx_fsr3upscaler.h +++ b/sdk/include/FidelityFX/host/ffx_fsr3upscaler.h @@ -43,7 +43,7 @@ /// FidelityFX Super Resolution 3 patch version. /// /// @ingroup ffxFsr3Upscaler -#define FFX_FSR3UPSCALER_VERSION_PATCH (2) +#define FFX_FSR3UPSCALER_VERSION_PATCH (3) /// FidelityFX Super Resolution 3 context count /// diff --git a/sdk/include/FidelityFX/host/ffx_interface.h b/sdk/include/FidelityFX/host/ffx_interface.h index 5b81d6dd..88e6572f 100644 --- a/sdk/include/FidelityFX/host/ffx_interface.h +++ b/sdk/include/FidelityFX/host/ffx_interface.h @@ -474,7 +474,7 @@ typedef FfxErrorCode(*FfxPresentCallbackFunc)(const FfxPresentCallbackDescriptio typedef FfxErrorCode(*FfxFrameGenerationDispatchFunc)(const FfxFrameGenerationDispatchDescription* params, void*); typedef FfxErrorCode(*FfxWaitCallbackFunc)(wchar_t* fenceName, uint64_t fenceValueToWaitFor); -/// A structure representing the configuration options to pass to FfxFrameInterpolation. +/// A structure representing the configuration options to pass to FrameInterpolationSwapChain /// /// @ingroup FfxInterface typedef struct FfxFrameGenerationConfig @@ -492,6 +492,7 @@ typedef struct FfxFrameGenerationConfig bool onlyPresentInterpolated; ///< Set to true to only present interpolated frame FfxRect2D interpolationRect; ///< Set the area in the backbuffer that will be interpolated uint64_t frameID; ///< A frame identifier used to synchronize resource usage in workloads + bool drawDebugPacingLines; ///< Sets the state of pacing debug lines. Set to true to display debug lines } FfxFrameGenerationConfig; typedef FfxErrorCode (*FfxSwapChainConfigureFrameGenerationFunc)(FfxFrameGenerationConfig const* config); diff --git a/sdk/include/FidelityFX/host/ffx_types.h b/sdk/include/FidelityFX/host/ffx_types.h index 1b34a400..f171397a 100644 --- a/sdk/include/FidelityFX/host/ffx_types.h +++ b/sdk/include/FidelityFX/host/ffx_types.h @@ -272,7 +272,7 @@ typedef int32_t FfxInt32x4[4]; /// @ingroup ffxHost -/// An enumeration of surface formats. +/// An enumeration of surface formats. Needs to match enum FfxApiSurfaceFormat /// /// @ingroup SDKTypes typedef enum FfxSurfaceFormat { @@ -307,6 +307,15 @@ typedef enum FfxSurfaceFormat { FFX_SURFACE_FORMAT_R8G8_UINT, ///< 8 bit per channel, 2 channel unsigned integer format FFX_SURFACE_FORMAT_R32_FLOAT, ///< 32 bit per channel, 1 channel float format FFX_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP, ///< 9 bit per channel, 5 bit exponent format + + FFX_SURFACE_FORMAT_R16G16B16A16_TYPELESS, ///< 16 bit per channel, 4 channel typeless format + FFX_SURFACE_FORMAT_R32G32_TYPELESS, ///< 32 bit per channel, 2 channel typeless format + FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS, ///< 10 bit per 3 channel, 2 bit for 1 channel typeless format + FFX_SURFACE_FORMAT_R16G16_TYPELESS, ///< 16 bit per channel, 2 channel typeless format + FFX_SURFACE_FORMAT_R16_TYPELESS, ///< 16 bit per channel, 1 channel typeless format + FFX_SURFACE_FORMAT_R8_TYPELESS, ///< 8 bit per channel, 1 channel typeless format + FFX_SURFACE_FORMAT_R8G8_TYPELESS, ///< 8 bit per channel, 2 channel typeless format + FFX_SURFACE_FORMAT_R32_TYPELESS, ///< 32 bit per channel, 1 channel typeless format } FfxSurfaceFormat; typedef enum FfxIndexFormat @@ -1269,12 +1278,23 @@ typedef struct FfxFrameGenerationDispatchDescription { uint64_t frameID; } FfxFrameGenerationDispatchDescription; +//struct definition matches FfxApiEffectMemoryUsage typedef struct FfxEffectMemoryUsage { uint64_t totalUsageInBytes; uint64_t aliasableUsageInBytes; } FfxEffectMemoryUsage; +//struct definition matches FfxApiSwapchainFramePacingTuning +typedef struct FfxSwapchainFramePacingTuning +{ + float safetyMarginInMs; // in Millisecond + float varianceFactor; // valid range [0.0,1.0] + bool allowHybridSpin; //Allows pacing spinlock to sleep. + uint32_t hybridSpinTime; //How long to spin when hybridSpin is enabled. Measured in timer resolution units. Not recommended to go below 2. Will result in frequent overshoots. + bool allowWaitForSingleObjectOnFence; //Allows to call WaitForSingleObject() instead of spinning for fence value. +} FfxSwapchainFramePacingTuning; + #ifdef __cplusplus } #endif // #ifdef __cplusplus diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.cpp b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.cpp index 1d3d394d..90cf7772 100644 --- a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.cpp +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.cpp @@ -26,8 +26,12 @@ #include #include "FrameInterpolationSwapchainDX12_UiComposition.h" +#include "FrameInterpolationSwapchainDX12_DebugPacing.h" #include "antilag2/ffx_antilag2_dx12.h" +#pragma comment(lib, "winmm.lib") +#include + FfxErrorCode ffxRegisterFrameinterpolationUiResourceDX12(FfxSwapchain gameSwapChain, FfxResource uiResource, uint32_t flags) { IDXGISwapChain4* swapChain = ffxGetDX12SwapchainPtr(gameSwapChain); @@ -78,6 +82,12 @@ FfxErrorCode ffxConfigureFrameInterpolationSwapchainDX12(FfxSwapchain gameSwapCh case FFX_FI_SWAPCHAIN_CONFIGURE_KEY_WAITCALLBACK: framinterpolationSwapchain->setWaitCallback(static_cast(valuePtr)); break; + case FFX_FI_SWAPCHAIN_CONFIGURE_KEY_FRAMEPACINGTUNING: + if (valuePtr != nullptr) + { + framinterpolationSwapchain->setFramePacingTuning(static_cast(valuePtr)); + } + break; } SafeRelease(framinterpolationSwapchain); @@ -305,6 +315,32 @@ HRESULT compositeSwapChainFrame(FrameinterpolationPresentInfo* presenter, Pacing presenter->presentQueue->Wait(presenter->interpolationFence, frameInfo.interpolationCompletedFenceValue); + if (pacingEntry->drawDebugPacingLines) + { + auto gpuCommands = presenter->commandPool.get(presenter->presentQueue, L"compositeSwapChainFrame"); + + uint32_t currBackbufferIndex = presenter->swapChain->GetCurrentBackBufferIndex(); + ID3D12Resource* swapchainBackbuffer = nullptr; + presenter->swapChain->GetBuffer(currBackbufferIndex, IID_PPV_ARGS(&swapchainBackbuffer)); + + FfxPresentCallbackDescription desc{}; + desc.commandList = ffxGetCommandListDX12(gpuCommands->reset()); + desc.device = presenter->device; + desc.isInterpolatedFrame = frameID != PacingData::FrameType::Real; + desc.outputSwapChainBuffer = ffxGetResourceDX12(swapchainBackbuffer, ffxGetResourceDescriptionDX12(swapchainBackbuffer), nullptr, FFX_RESOURCE_STATE_PRESENT); + desc.currentBackBuffer = frameInfo.resource; + desc.currentUI = pacingEntry->uiSurface; + desc.usePremulAlpha = pacingEntry->usePremulAlphaComposite; + desc.frameID = pacingEntry->currentFrameID; + + ffxFrameInterpolationDebugPacing(&desc); + + gpuCommands->execute(true); + + SafeRelease(swapchainBackbuffer); + } + + if (pacingEntry->presentCallback) { auto gpuCommands = presenter->commandPool.get(presenter->presentQueue, L"compositeSwapChainFrame"); @@ -330,7 +366,8 @@ HRESULT compositeSwapChainFrame(FrameinterpolationPresentInfo* presenter, Pacing SafeRelease(swapchainBackbuffer); } - presenter->presentQueue->Signal(presenter->compositionFence, frameInfo.presentIndex); + presenter->presentQueue->Signal(presenter->compositionFenceGPU, frameInfo.presentIndex); + presenter->compositionFenceCPU->Signal(frameInfo.presentIndex); return S_OK; } @@ -378,7 +415,16 @@ DWORD WINAPI presenterThread(LPVOID param) if (presenter) { UINT64 numFramesSentForPresentation = 0; - int64_t previousPresentQpc = 0; + int64_t qpcFrequency = 0; + + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + qpcFrequency = freq.QuadPart; + + TIMECAPS timerCaps; + timerCaps.wPeriodMin = UNKNOWN_TIMER_RESOlUTION; //Default to unknown to prevent sleep without guarantees. + + presenter->previousPresentQpc = 0; while (!presenter->shutdown) { @@ -413,10 +459,26 @@ DWORD WINAPI presenterThread(LPVOID param) presenter->presentQueue->Signal(presenter->replacementBufferFence, entry.replacementBufferFenceSignal); } + + MMRESULT result = timeGetDevCaps(&timerCaps, sizeof(timerCaps)); + if (result != MMSYSERR_NOERROR || !presenter->allowHybridSpin) + { + timerCaps.wPeriodMin = UNKNOWN_TIMER_RESOlUTION; + } + else + { + timerCaps.wPeriodMin = FFX_MAXIMUM(1, timerCaps.wPeriodMin); + } + // pacing without composition - waitForFenceValue(presenter->compositionFence, frameInfo.presentIndex); - waitForPerformanceCount(previousPresentQpc + frameInfo.presentQpcDelta); - QueryPerformanceCounter(reinterpret_cast(&previousPresentQpc)); + waitForFenceValue(presenter->compositionFenceGPU, frameInfo.presentIndex); + uint64_t targetQpc = presenter->previousPresentQpc + frameInfo.presentQpcDelta; + waitForPerformanceCount(targetQpc, qpcFrequency, timerCaps.wPeriodMin, presenter->hybridSpinTime); + + int64_t currentPresentQPC; + QueryPerformanceCounter(reinterpret_cast(¤tPresentQPC)); + presenter->previousPresentQpc = currentPresentQPC; + presentToSwapChain(presenter, &entry, (PacingData::FrameType)frameType); } } @@ -450,6 +512,9 @@ DWORD WINAPI interpolationThread(LPVOID param) SimpleMovingAverage<10, double> frameTime{}; int64_t previousQpc = 0; + int64_t previousDelta = 0; + int64_t qpcFrequency; + QueryPerformanceFrequency(reinterpret_cast(&qpcFrequency)); while (!presenter->shutdown) { @@ -463,9 +528,36 @@ DWORD WINAPI interpolationThread(LPVOID param) presenter->scheduledInterpolations.invalidate(); LeaveCriticalSection(&presenter->criticalSectionScheduledFrame); - - waitForFenceValue(presenter->interpolationFence, - entry.frames[PacingData::FrameType::Interpolated_1].interpolationCompletedFenceValue); + + int64_t preWaitQPC = 0; + QueryPerformanceCounter(reinterpret_cast(&preWaitQPC)); + int64_t previousPresentQPC = presenter->previousPresentQpc; + int64_t targetDelta = (previousPresentQPC + previousDelta) - preWaitQPC; + + //Risk of late wake if overthreading. If allowed, use WaitForSingleObject to wait for interpolationFence if the target is more than 2ms later. + if (previousPresentQPC && (targetDelta * 1000000) / qpcFrequency > 2000) + { + waitForFenceValue( + presenter->interpolationFence, + entry.frames[PacingData::FrameType::Interpolated_1].interpolationCompletedFenceValue, + INFINITE, + nullptr, + presenter->allowWaitForSingleObjectOnFence + ); + + } + else + { + // spin to wait for interpolationFence if the target is less than 2ms. + waitForFenceValue( + presenter->interpolationFence, + entry.frames[PacingData::FrameType::Interpolated_1].interpolationCompletedFenceValue, + INFINITE, + nullptr, + false + ); + } + SetEvent(presenter->interpolationEvent); int64_t currentQpc = 0; @@ -475,8 +567,6 @@ DWORD WINAPI interpolationThread(LPVOID param) previousQpc = currentQpc; // reset pacing averaging if delta > 10 fps, - int64_t qpcFrequency; - QueryPerformanceFrequency(reinterpret_cast(&qpcFrequency)); const float fTimeoutInSeconds = 0.1f; double deltaQpcResetThreashold = double(qpcFrequency * fTimeoutInSeconds); if ((deltaQpc > deltaQpcResetThreashold) || presenter->resetTimer) @@ -488,14 +578,14 @@ DWORD WINAPI interpolationThread(LPVOID param) frameTime.update(deltaQpc); } - // set presentation time: reduce based on variance and subract 0.5ms as safety margin so we don't lock on a framerate lower than necessary - double safetyMarginInSec = 0.0001; // = 0.1ms - int64_t qpcSafetyMargin = int64_t(qpcFrequency * safetyMarginInSec); - const int64_t conservativeAvg = int64_t(frameTime.getAverage() * 0.5 - frameTime.getVariance() * 0.1); + // set presentation time: reduce based on variance and subract safety margin so we don't lock on a framerate lower than necessary + int64_t qpcSafetyMargin = int64_t(qpcFrequency * presenter->safetyMarginInSec); + const int64_t conservativeAvg = int64_t(frameTime.getAverage() * 0.5 - frameTime.getVariance() * presenter->varianceFactor); const int64_t deltaToUse = conservativeAvg > qpcSafetyMargin ? (conservativeAvg - qpcSafetyMargin) : 0; entry.frames[PacingData::FrameType::Interpolated_1].presentQpcDelta = deltaToUse; entry.frames[PacingData::FrameType::Real].presentQpcDelta = deltaToUse; - + previousDelta = deltaToUse; + // schedule presents EnterCriticalSection(&presenter->criticalSectionScheduledFrame); presenter->scheduledPresents = entry; @@ -673,8 +763,11 @@ HRESULT FrameInterpolationSwapChainDX12::init(HWND presentInfo.device->CreateFence(framesSentForPresentation, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&presentInfo.replacementBufferFence)); presentInfo.replacementBufferFence->SetName(L"AMD FSR ReplacementBufferFence"); - presentInfo.device->CreateFence(framesSentForPresentation, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&presentInfo.compositionFence)); - presentInfo.compositionFence->SetName(L"AMD FSR CompositionFence"); + presentInfo.device->CreateFence(framesSentForPresentation, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&presentInfo.compositionFenceGPU)); + presentInfo.compositionFenceGPU->SetName(L"AMD FSR CompositionFence GPU"); + + presentInfo.device->CreateFence(framesSentForPresentation, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&presentInfo.compositionFenceCPU)); + presentInfo.compositionFenceCPU->SetName(L"AMD FSR CompositionFence CPU"); replacementFrameLatencyWaitableObjectHandle = CreateEvent(0, FALSE, TRUE, nullptr); @@ -759,7 +852,7 @@ HRESULT FrameInterpolationSwapChainDX12::shutdown() if (presentInfo.interpolationFence) { presentInfo.interpolationQueue->Signal(presentInfo.interpolationFence, ++interpolationFenceValue); - waitForFenceValue(presentInfo.interpolationFence, interpolationFenceValue, INFINITE, waitCallback); + waitForFenceValue(presentInfo.interpolationFence, interpolationFenceValue, INFINITE, presentInfo.waitCallback, presentInfo.interpolationFence); } } @@ -769,13 +862,14 @@ HRESULT FrameInterpolationSwapChainDX12::shutdown() SafeRelease(presentInfo.interpolationFence); SafeRelease(presentInfo.presentFence); SafeRelease(presentInfo.replacementBufferFence); - SafeRelease(presentInfo.compositionFence); + SafeRelease(presentInfo.compositionFenceGPU); + SafeRelease(presentInfo.compositionFenceCPU); std::ignore = SafeRelease(presentInfo.swapChain); if (presentInfo.gameFence) { - waitForFenceValue(presentInfo.gameFence, gameFenceValue, INFINITE, waitCallback); + waitForFenceValue(presentInfo.gameFence, gameFenceValue, INFINITE, presentInfo.waitCallback); } SafeRelease(presentInfo.gameFence); @@ -861,9 +955,10 @@ void FrameInterpolationSwapChainDX12::setFrameGenerationConfig(FfxFrameGeneratio if ( presentInfo.interpolationQueue != inputInterpolationQueue || interpolationEnabled != config->frameGenerationEnabled || presentCallback != inputPresentCallback - || presentCallbackContext != inputPresentCallbackCtx + || presentCallbackContext != inputPresentCallbackCtx || frameGenerationCallback != config->frameGenerationCallback - || frameGenerationCallbackContext != config->frameGenerationCallbackContext) + || frameGenerationCallbackContext != config->frameGenerationCallbackContext + || drawDebugPacingLines != config->drawDebugPacingLines) { applyChangesNow = true; } @@ -876,6 +971,7 @@ void FrameInterpolationSwapChainDX12::setFrameGenerationConfig(FfxFrameGeneratio currentFrameID = config->frameID; presentInterpolatedOnly = config->onlyPresentInterpolated; interpolationRect = config->interpolationRect; + drawDebugPacingLines = config->drawDebugPacingLines; if (presentInfo.interpolationQueue != inputInterpolationQueue) { @@ -939,8 +1035,6 @@ bool FrameInterpolationSwapChainDX12::destroyReplacementResources() discardOutstandingInterpolationCommandLists(); { - ID3D12Device8* device = nullptr; - for (size_t i = 0; i < _countof(replacementSwapBuffers); i++) { uint64_t resourceSize = GetResourceGpuMemorySize(replacementSwapBuffers[i].resource); @@ -964,9 +1058,6 @@ bool FrameInterpolationSwapChainDX12::destroyReplacementResources() } uiReplacementBuffer.destroy(); - - SafeRelease(device); - } // reset counters used in buffer management @@ -998,9 +1089,14 @@ bool FrameInterpolationSwapChainDX12::destroyReplacementResources() presentInfo.replacementBufferFence->Signal(framesSentForPresentation); } - if (presentInfo.compositionFence) + if (presentInfo.compositionFenceGPU) { - presentInfo.compositionFence->Signal(framesSentForPresentation); + presentInfo.compositionFenceGPU->Signal(framesSentForPresentation); + } + + if (presentInfo.compositionFenceCPU) + { + presentInfo.compositionFenceCPU->Signal(framesSentForPresentation); } frameInterpolationResetCondition = true; @@ -1020,9 +1116,9 @@ bool FrameInterpolationSwapChainDX12::destroyReplacementResources() bool FrameInterpolationSwapChainDX12::waitForPresents() { // wait for interpolation to finish - waitForFenceValue(presentInfo.gameFence, gameFenceValue, INFINITE, waitCallback); - waitForFenceValue(presentInfo.interpolationFence, interpolationFenceValue, INFINITE, waitCallback); - waitForFenceValue(presentInfo.presentFence, framesSentForPresentation, INFINITE, waitCallback); + waitForFenceValue(presentInfo.gameFence, gameFenceValue, INFINITE, presentInfo.waitCallback); + waitForFenceValue(presentInfo.interpolationFence, interpolationFenceValue, INFINITE, presentInfo.waitCallback); + waitForFenceValue(presentInfo.presentFence, framesSentForPresentation, INFINITE, presentInfo.waitCallback); return true; } @@ -1126,7 +1222,16 @@ void FrameInterpolationSwapChainDX12::registerUiResource(FfxResource uiResource, void FrameInterpolationSwapChainDX12::setWaitCallback(FfxWaitCallbackFunc waitCallbackFunc) { - waitCallback = waitCallbackFunc; + presentInfo.waitCallback = waitCallbackFunc; +} + +void FrameInterpolationSwapChainDX12::setFramePacingTuning(const FfxSwapchainFramePacingTuning* framePacingTuning) +{ + presentInfo.safetyMarginInSec = static_cast (framePacingTuning->safetyMarginInMs) / 1000.0; + presentInfo.varianceFactor = static_cast (framePacingTuning->varianceFactor); + presentInfo.allowHybridSpin = framePacingTuning->allowHybridSpin; + presentInfo.hybridSpinTime = framePacingTuning->hybridSpinTime; + presentInfo.allowWaitForSingleObjectOnFence = framePacingTuning->allowWaitForSingleObjectOnFence; } void FrameInterpolationSwapChainDX12::GetGpuMemoryUsage(FfxEffectMemoryUsage* vramUsage) @@ -1183,7 +1288,8 @@ void FrameInterpolationSwapChainDX12::presentPassthrough(UINT SyncInterval, UINT passthroughList->execute(true); presentInfo.presentQueue->Signal(presentInfo.replacementBufferFence, ++framesSentForPresentation); - presentInfo.presentQueue->Signal(presentInfo.compositionFence, framesSentForPresentation); + presentInfo.presentQueue->Signal(presentInfo.compositionFenceGPU, framesSentForPresentation); + presentInfo.compositionFenceCPU->Signal(framesSentForPresentation); setSwapChainBufferResourceInfo(presentInfo.swapChain, false); presentInfo.swapChain->Present(SyncInterval, Flags); @@ -1229,7 +1335,8 @@ void FrameInterpolationSwapChainDX12::presentWithUiComposition(UINT SyncInterval uiCompositionList->execute(true); presentInfo.presentQueue->Signal(presentInfo.replacementBufferFence, ++framesSentForPresentation); - presentInfo.presentQueue->Signal(presentInfo.compositionFence, framesSentForPresentation); + presentInfo.presentQueue->Signal(presentInfo.compositionFenceGPU, framesSentForPresentation); + presentInfo.compositionFenceCPU->Signal(framesSentForPresentation); setSwapChainBufferResourceInfo(presentInfo.swapChain, false); presentInfo.swapChain->Present(SyncInterval, Flags); @@ -1253,7 +1360,7 @@ void FrameInterpolationSwapChainDX12::dispatchInterpolationCommands(FfxResource* *pRealFrame = backbuffer; // interpolation queue must wait for output resource to become available - presentInfo.interpolationQueue->Wait(presentInfo.compositionFence, interpolationOutputs[interpolationBufferIndex].availabilityFenceValue); + presentInfo.interpolationQueue->Wait(presentInfo.compositionFenceGPU, interpolationOutputs[interpolationBufferIndex].availabilityFenceValue); auto pRegisteredCommandList = registeredInterpolationCommandLists[currentBackBufferIndex]; if (pRegisteredCommandList != nullptr) @@ -1313,6 +1420,8 @@ void FrameInterpolationSwapChainDX12::presentInterpolated(UINT SyncInterval, UIN PacingData entry{}; entry.presentCallback = presentCallback; entry.presentCallbackContext = presentCallbackContext; + entry.drawDebugPacingLines = drawDebugPacingLines; + if (presentInfo.uiCompositionFlags & FFX_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING) { FfxResourceDescription uiBufferDesc = ffxGetResourceDescriptionDX12(uiReplacementBuffer.resource); @@ -1384,7 +1493,7 @@ bool FrameInterpolationSwapChainDX12::verifyUiDuplicateResource() { uint64_t resourceSize = GetResourceGpuMemorySize(uiReplacementBuffer.resource); totalUsageInBytes -= resourceSize; - waitForFenceValue(presentInfo.compositionFence, framesSentForPresentation, INFINITE, waitCallback); + waitForFenceValue(presentInfo.compositionFenceGPU, framesSentForPresentation, INFINITE, presentInfo.waitCallback); SafeRelease(uiReplacementBuffer.resource); uiReplacementBuffer = {}; } @@ -1399,7 +1508,7 @@ bool FrameInterpolationSwapChainDX12::verifyUiDuplicateResource() if (uiResourceDesc.Format != internalDesc.Format || uiResourceDesc.Width != internalDesc.Width || uiResourceDesc.Height != internalDesc.Height) { - waitForFenceValue(presentInfo.compositionFence, framesSentForPresentation, INFINITE, waitCallback); + waitForFenceValue(presentInfo.compositionFenceGPU, framesSentForPresentation, INFINITE, presentInfo.waitCallback); SafeRelease(uiReplacementBuffer.resource); } } @@ -1510,7 +1619,9 @@ HRESULT STDMETHODCALLTYPE FrameInterpolationSwapChainDX12::Present(UINT SyncInte const bool fgCommandListConfigured = registeredInterpolationCommandLists[currentBackBufferIndex] != nullptr; const bool runInterpolation = interpolationEnabled && (fgCallbackConfigured || fgCommandListConfigured); - presentInfo.gameQueue->Wait(presentInfo.compositionFence, previousFramesSentForPresentation); + // Ensure presenter thread has signaled before applying any wait to the game queue + waitForFenceValue(presentInfo.compositionFenceCPU, previousFramesSentForPresentation); + presentInfo.gameQueue->Wait(presentInfo.compositionFenceGPU, previousFramesSentForPresentation); // Verify integrity of internal Ui resource if (verifyUiDuplicateResource()) @@ -1526,9 +1637,6 @@ HRESULT STDMETHODCALLTYPE FrameInterpolationSwapChainDX12::Present(UINT SyncInte { WaitForSingleObject(presentInfo.interpolationEvent, INFINITE); - // NOTE: why signal here when it's also signaled in presentInterpolated? - presentInfo.gameQueue->Signal(presentInfo.gameFence, ++gameFenceValue); - presentInterpolated(SyncInterval, Flags); } else @@ -1563,7 +1671,7 @@ HRESULT STDMETHODCALLTYPE FrameInterpolationSwapChainDX12::Present(UINT SyncInte LeaveCriticalSection(&criticalSection); - waitForFenceValue(presentInfo.replacementBufferFence, replacementSwapBuffers[replacementSwapBufferIndex].availabilityFenceValue, INFINITE, waitCallback); + waitForFenceValue(presentInfo.replacementBufferFence, replacementSwapBuffers[replacementSwapBufferIndex].availabilityFenceValue, INFINITE, presentInfo.waitCallback); return S_OK; } diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.h b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.h index a1956005..b5c82a7a 100644 --- a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.h +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12.h @@ -47,6 +47,8 @@ typedef struct PacingData bool vsync; bool tearingSupported; bool usePremulAlphaComposite; + bool drawDebugPacingLines; + UINT64 interpolationCompletedFenceValue; UINT64 replacementBufferFenceSignal; @@ -100,14 +102,25 @@ typedef struct FrameinterpolationPresentInfo ID3D12Fence* interpolationFence = nullptr; ID3D12Fence* presentFence = nullptr; ID3D12Fence* replacementBufferFence = nullptr; - ID3D12Fence* compositionFence = nullptr; + ID3D12Fence* compositionFenceCPU = nullptr; + ID3D12Fence* compositionFenceGPU = nullptr; HANDLE presentEvent = 0; HANDLE interpolationEvent = 0; HANDLE pacerEvent = 0; - bool resetTimer = false; + volatile bool resetTimer = false; volatile bool shutdown = false; + + volatile double safetyMarginInSec = 0.0001; //0.1ms + volatile double varianceFactor = 0.1; + volatile bool allowHybridSpin = false; + volatile uint32_t hybridSpinTime = 2; //Measured in system timer resolution units. Default is 2. Below 1 will frequently result in overshoot. Overshoots stop showing up >=2. + volatile bool allowWaitForSingleObjectOnFence = false; + + FfxWaitCallbackFunc waitCallback = nullptr; + + volatile int64_t previousPresentQpc = 0; } FrameinterpolationPresentInfo; typedef struct ReplacementResource @@ -173,6 +186,7 @@ class DECLSPEC_UUID("BEED74B2-282E-4AA3-BBF7-534560507A45") FrameInterpolationSw bool interpolationEnabled = false; bool presentInterpolatedOnly = false; bool previousFrameWasInterpolated = false; + bool drawDebugPacingLines = false; UINT64 currentFrameID = 0; @@ -189,7 +203,6 @@ class DECLSPEC_UUID("BEED74B2-282E-4AA3-BBF7-534560507A45") FrameInterpolationSw void* presentCallbackContext = nullptr; FfxFrameGenerationDispatchFunc frameGenerationCallback = nullptr; void* frameGenerationCallbackContext = nullptr; - FfxWaitCallbackFunc waitCallback = nullptr; void presentPassthrough(UINT SyncInterval, UINT Flags); void presentWithUiComposition(UINT SyncInterval, UINT Flags); @@ -220,6 +233,7 @@ class DECLSPEC_UUID("BEED74B2-282E-4AA3-BBF7-534560507A45") FrameInterpolationSw void registerUiResource(FfxResource uiResource, uint32_t flags); void setWaitCallback(FfxWaitCallbackFunc waitCallbackFunc); + void setFramePacingTuning(const FfxSwapchainFramePacingTuning* framePacingTuning); void GetGpuMemoryUsage(FfxEffectMemoryUsage * vramUsage); diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_DebugPacing.cpp b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_DebugPacing.cpp new file mode 100644 index 00000000..862c9b53 --- /dev/null +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_DebugPacing.cpp @@ -0,0 +1,365 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "d3d12.h" +#include "FrameInterpolationSwapchainDX12.h" +#include "FrameInterpolationSwapchainDX12_Helpers.h" +#include "FrameInterpolationSwapchainDX12_DebugPacing.h" + +namespace DebugPacingshaders +{ +#include "FrameInterpolationSwapchainDebugPacingVS.h" +#include "FrameInterpolationSwapchainDebugPacingPS.h" +} // namespace DebugPacingshaders; + +typedef HRESULT(__stdcall* D3D12SerializeVersionedRootSignatureType)(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC* pRootSignature, + ID3DBlob** ppBlob, + ID3DBlob** ppErrorBlob); + +const uint32_t s_debugPacingDescRingBufferSize = FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT * 2 * 1; // FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT real frames (i.e. * 2), 1 CBV each should be enough +const uint32_t s_debugPacingDescHeapRtvSize = FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT * 2; +ID3D12RootSignature* s_debugPacingRootSignature; +ID3D12PipelineState* s_debugPacingPipeline; +uint32_t s_debugPacingDescRingBufferBase; +ID3D12DescriptorHeap* s_debugPacingDescRingBuffer; +uint32_t s_debugPacingNextRtvDescriptor; +ID3D12DescriptorHeap* s_debugPacingDescHeapRtvCpu; +ID3D12Resource* s_debugPacingConstantBuffer[FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT]; +uint32_t s_debugPacingFrameIndex; +const unsigned int s_debugPacingConstantBufferSize = 256u; + +// create the pipeline state to use for UI composition +// pretty similar to FfxCreatePipelineFunc +FfxErrorCodes CreateDebugPacingPipeline(ID3D12Device* dx12Device, DXGI_FORMAT fmt) +{ + D3D12_DESCRIPTOR_RANGE1 range; + range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + range.NumDescriptors = 1; + range.BaseShaderRegister = 0; + range.RegisterSpace = 0; + range.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; + range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + D3D12_ROOT_PARAMETER1 rootParameters; + rootParameters.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameters.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + rootParameters.DescriptorTable.NumDescriptorRanges = 1; + rootParameters.DescriptorTable.pDescriptorRanges = ⦥ + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + rootSignatureDesc.Desc_1_1.NumParameters = 1; + rootSignatureDesc.Desc_1_1.pParameters = &rootParameters; + rootSignatureDesc.Desc_1_1.NumStaticSamplers = 0; + rootSignatureDesc.Desc_1_1.pStaticSamplers = nullptr; + rootSignatureDesc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + ID3D12RootSignature* dx12RootSignature; + ID3DBlob* signature = nullptr; + ID3DBlob* error = nullptr; + HMODULE d3d12ModuleHandle = GetModuleHandleW(L"D3D12.dll"); + if (NULL != d3d12ModuleHandle) + { + D3D12SerializeVersionedRootSignatureType d3d12SerializeVersionedRootSignatureFunc = + (D3D12SerializeVersionedRootSignatureType)GetProcAddress(d3d12ModuleHandle, "D3D12SerializeVersionedRootSignature"); + + if (nullptr != d3d12SerializeVersionedRootSignatureFunc) + { + HRESULT result = d3d12SerializeVersionedRootSignatureFunc(&rootSignatureDesc, &signature, &error); + if (FAILED(result)) + { + return FFX_ERROR_BACKEND_API_ERROR; + } + + result = dx12Device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&dx12RootSignature)); + SafeRelease(signature); + SafeRelease(error); + + if (FAILED(result)) + { + return FFX_ERROR_BACKEND_API_ERROR; + } + } + else + { + return FFX_ERROR_BACKEND_API_ERROR; + } + } + else + { + return FFX_ERROR_BACKEND_API_ERROR; + } + + // create the PSO + D3D12_RASTERIZER_DESC rasterDesc; + rasterDesc.FillMode = D3D12_FILL_MODE_SOLID; + rasterDesc.CullMode = D3D12_CULL_MODE_BACK; + rasterDesc.FrontCounterClockwise = FALSE; + rasterDesc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + rasterDesc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + rasterDesc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + rasterDesc.DepthClipEnable = FALSE; + rasterDesc.MultisampleEnable = FALSE; + rasterDesc.AntialiasedLineEnable = FALSE; + rasterDesc.ForcedSampleCount = 0; + rasterDesc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + D3D12_BLEND_DESC blendDesc; + blendDesc.AlphaToCoverageEnable = FALSE; + blendDesc.IndependentBlendEnable = FALSE; + const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = { + FALSE, + FALSE, + D3D12_BLEND_ONE, + D3D12_BLEND_ZERO, + D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, + D3D12_BLEND_ZERO, + D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + }; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) + blendDesc.RenderTarget[i] = defaultRenderTargetBlendDesc; + + D3D12_DEPTH_STENCIL_DESC depthStencilDesc; + depthStencilDesc.DepthEnable = FALSE; + depthStencilDesc.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + depthStencilDesc.DepthFunc = D3D12_COMPARISON_FUNC_LESS; + depthStencilDesc.StencilEnable = FALSE; + depthStencilDesc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + depthStencilDesc.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = {D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS}; + depthStencilDesc.FrontFace = defaultStencilOp; + depthStencilDesc.BackFace = defaultStencilOp; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC dx12PipelineStateDescription = {}; + dx12PipelineStateDescription.RasterizerState = rasterDesc; + dx12PipelineStateDescription.BlendState = blendDesc; + dx12PipelineStateDescription.DepthStencilState = depthStencilDesc; + dx12PipelineStateDescription.DepthStencilState.DepthEnable = FALSE; + dx12PipelineStateDescription.SampleMask = UINT_MAX; + dx12PipelineStateDescription.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + dx12PipelineStateDescription.SampleDesc = {1, 0}; + dx12PipelineStateDescription.NumRenderTargets = 1; + dx12PipelineStateDescription.RTVFormats[0] = fmt; + + dx12PipelineStateDescription.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + dx12PipelineStateDescription.pRootSignature = dx12RootSignature; + + s_debugPacingRootSignature = dx12RootSignature; + + { + dx12PipelineStateDescription.VS.pShaderBytecode = DebugPacingshaders::g_mainVS; + dx12PipelineStateDescription.VS.BytecodeLength = sizeof(DebugPacingshaders::g_mainVS); + dx12PipelineStateDescription.PS.pShaderBytecode = DebugPacingshaders::g_mainPS; + dx12PipelineStateDescription.PS.BytecodeLength = sizeof(DebugPacingshaders::g_mainPS); + + if (FAILED(dx12Device->CreateGraphicsPipelineState(&dx12PipelineStateDescription, + IID_PPV_ARGS(reinterpret_cast(&s_debugPacingPipeline))))) + return FFX_ERROR_BACKEND_API_ERROR; + } + + return FFX_OK; +} + +FfxErrorCodes verifyDebugPacingGpuResources(ID3D12Device* dx12Device, DXGI_FORMAT fmt) +{ + FFX_ASSERT(nullptr != dx12Device); + + if (nullptr == s_debugPacingPipeline) + { + FfxErrorCodes res = CreateDebugPacingPipeline(dx12Device, fmt); + if (res != FFX_OK) + return res; + } + + if (nullptr == s_debugPacingDescRingBuffer) + { + D3D12_DESCRIPTOR_HEAP_DESC descHeap = {}; + descHeap.NumDescriptors = s_debugPacingDescRingBufferSize; + descHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + descHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + descHeap.NodeMask = 0; + s_debugPacingDescRingBufferBase = 0; + if(FAILED(dx12Device->CreateDescriptorHeap(&descHeap, IID_PPV_ARGS(&s_debugPacingDescRingBuffer)))) + return FFX_ERROR_BACKEND_API_ERROR; + } + + if (nullptr == s_debugPacingDescHeapRtvCpu) + { + D3D12_DESCRIPTOR_HEAP_DESC descHeap = {}; + descHeap.NumDescriptors = s_debugPacingDescHeapRtvSize; + descHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + descHeap.NodeMask = 0; + descHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + if(FAILED(dx12Device->CreateDescriptorHeap(&descHeap, IID_PPV_ARGS(&s_debugPacingDescHeapRtvCpu)))) + return FFX_ERROR_BACKEND_API_ERROR; + } + + if (nullptr == s_debugPacingConstantBuffer[0]) + { + D3D12_RESOURCE_DESC constantBufferDesc = {}; + constantBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + constantBufferDesc.Width = s_debugPacingConstantBufferSize; + constantBufferDesc.Height = 1; + constantBufferDesc.DepthOrArraySize = 1; + constantBufferDesc.MipLevels = 1; + constantBufferDesc.SampleDesc.Count = 1; + constantBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + constantBufferDesc.Format = DXGI_FORMAT_UNKNOWN; + + D3D12_HEAP_PROPERTIES heapProperties = {}; + heapProperties.Type = D3D12_HEAP_TYPE_UPLOAD; + + for (int i = 0; i < FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT; ++i) + { + if(FAILED(dx12Device->CreateCommittedResource( + &heapProperties, + D3D12_HEAP_FLAG_NONE, + &constantBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&s_debugPacingConstantBuffer[i])))) + { + return FFX_ERROR_BACKEND_API_ERROR; + } + } + + s_debugPacingFrameIndex = 0; + } + + return FFX_OK; +} + +void releasePacingDebugGpuResources() +{ + for (int i = 0; i < FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT; ++i) + { + SafeRelease(s_debugPacingConstantBuffer[i]); + } + s_debugPacingFrameIndex = 0; + + SafeRelease(s_debugPacingRootSignature); + SafeRelease(s_debugPacingPipeline); + + SafeRelease(s_debugPacingDescRingBuffer); + s_debugPacingDescRingBufferBase = 0; + + SafeRelease(s_debugPacingDescHeapRtvCpu); + s_debugPacingNextRtvDescriptor = 0; +} + +FFX_API FfxErrorCode ffxFrameInterpolationDebugPacing(const FfxPresentCallbackDescription* params) +{ + ID3D12Device* dx12Device = reinterpret_cast(params->device); + ID3D12Resource* pRtResource = (ID3D12Resource*)(params->currentBackBuffer.resource); + + FFX_ASSERT(nullptr != dx12Device); + FFX_ASSERT(nullptr != pRtResource); + + // blit backbuffer and composit UI using a VS/PS pass + D3D12_RESOURCE_DESC desc = pRtResource->GetDesc(); + + FfxErrorCode res = verifyDebugPacingGpuResources(dx12Device, desc.Format); + if (res != FFX_OK) + return res; + + ID3D12CommandList* pCommandList = reinterpret_cast(params->commandList); + ID3D12GraphicsCommandList* pCmdList = (ID3D12GraphicsCommandList*)pCommandList; + ID3D12PipelineState* dx12PipelineStateObject = nullptr; + + dx12PipelineStateObject = s_debugPacingPipeline; + + FFX_ASSERT(nullptr != pCommandList); + FFX_ASSERT(nullptr != pCmdList); + FFX_ASSERT(nullptr != dx12PipelineStateObject); + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = pRtResource; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + if (barrier.Transition.StateBefore != barrier.Transition.StateAfter) + { + pCmdList->ResourceBarrier(1, &barrier); + } + + // set root signature + pCmdList->SetGraphicsRootSignature(s_debugPacingRootSignature); + + // set descriptor heap + ID3D12DescriptorHeap* dx12DescriptorHeap = reinterpret_cast(s_debugPacingDescRingBuffer); + pCmdList->SetDescriptorHeaps(1, &dx12DescriptorHeap); + + // set up the descriptor table + D3D12_GPU_DESCRIPTOR_HANDLE gpuView = dx12DescriptorHeap->GetGPUDescriptorHandleForHeapStart(); + gpuView.ptr += s_debugPacingDescRingBufferBase * dx12Device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + D3D12_CPU_DESCRIPTOR_HANDLE cpuView = dx12DescriptorHeap->GetCPUDescriptorHandleForHeapStart(); + cpuView.ptr += s_debugPacingDescRingBufferBase * dx12Device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + UINT8* gpuData; + s_debugPacingConstantBuffer[s_debugPacingFrameIndex]->Map(0, nullptr, reinterpret_cast(&gpuData)); + memcpy(gpuData, &s_debugPacingFrameIndex, sizeof(s_debugPacingFrameIndex)); + s_debugPacingConstantBuffer[s_debugPacingFrameIndex]->Unmap(0, nullptr); + + D3D12_CONSTANT_BUFFER_VIEW_DESC dx12CbvDescription = {}; + dx12CbvDescription.BufferLocation = s_debugPacingConstantBuffer[s_debugPacingFrameIndex]->GetGPUVirtualAddress(); + dx12CbvDescription.SizeInBytes = s_debugPacingConstantBufferSize; + dx12Device->CreateConstantBufferView(&dx12CbvDescription, cpuView); + + s_debugPacingFrameIndex = (s_debugPacingFrameIndex + 1) % FFX_FRAME_INTERPOLATION_SWAP_CHAIN_MAX_BUFFER_COUNT; + + s_debugPacingDescRingBufferBase = (s_debugPacingDescRingBufferBase + 1) % s_debugPacingDescRingBufferSize; + pCmdList->SetGraphicsRootDescriptorTable(0, gpuView); + + D3D12_RENDER_TARGET_VIEW_DESC colorDesc = {}; + colorDesc.Format = desc.Format; + colorDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + colorDesc.Texture2D.MipSlice = 0; + colorDesc.Texture2D.PlaneSlice = 0; + + D3D12_CPU_DESCRIPTOR_HANDLE backbufferRTV = s_debugPacingDescHeapRtvCpu->GetCPUDescriptorHandleForHeapStart(); + backbufferRTV.ptr += s_debugPacingNextRtvDescriptor * dx12Device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + s_debugPacingNextRtvDescriptor = (s_debugPacingNextRtvDescriptor + 1) % s_debugPacingDescHeapRtvSize; + dx12Device->CreateRenderTargetView(pRtResource, &colorDesc, backbufferRTV); + + D3D12_RESOURCE_DESC backBufferDesc = pRtResource->GetDesc(); + D3D12_VIEWPORT vpd = {0.0f, 0.0f, 32, static_cast(backBufferDesc.Height), 0.0f, 1.0f}; + D3D12_RECT srd = {0, 0, (LONG)backBufferDesc.Width, (LONG)backBufferDesc.Height}; + + pCmdList->OMSetRenderTargets(1, &backbufferRTV, true, NULL); + pCmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pCmdList->SetPipelineState(dx12PipelineStateObject); + pCmdList->RSSetViewports(1, &vpd); + pCmdList->RSSetScissorRects(1, &srd); + pCmdList->DrawInstanced(3, 1, 0, 0); + + D3D12_RESOURCE_STATES tmpStateBefore = barrier.Transition.StateBefore; + barrier.Transition.StateBefore = barrier.Transition.StateAfter; + barrier.Transition.StateAfter = tmpStateBefore; + + pCmdList->ResourceBarrier(1, &barrier); + + return FFX_OK; +} diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_DebugPacing.h b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_DebugPacing.h new file mode 100644 index 00000000..45c12208 --- /dev/null +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_DebugPacing.h @@ -0,0 +1,30 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +void releasePacingDebugGpuResources(); + +D3D12_RESOURCE_STATES ffxGetDX12StateFromResourceState(FfxResourceStates state); + +FFX_API FfxErrorCode ffxFrameInterpolationDebugPacing(const FfxPresentCallbackDescription* params); diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.cpp b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.cpp index 882f0ba2..03f52750 100644 --- a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.cpp +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.cpp @@ -25,10 +25,14 @@ #include "FrameInterpolationSwapchainDX12_Helpers.h" +#include +#pragma comment(lib, "winmm.lib") #include #pragma comment(lib, "Dwmapi.lib") +#include //needed for std::to_wstring + IDXGIFactory* getDXGIFactoryFromSwapChain(IDXGISwapChain* swapChain) { IDXGIFactory* factory = nullptr; @@ -39,49 +43,78 @@ IDXGIFactory* getDXGIFactoryFromSwapChain(IDXGISwapChain* swapChain) return factory; } -void waitForPerformanceCount(const int64_t targetCount) +void waitForPerformanceCount(const int64_t targetCount, const int64_t frequency, const UINT timerResolution, const UINT spinTime) { - int64_t currentCount = 0; + int64_t currentCount; + QueryPerformanceCounter(reinterpret_cast(¤tCount)); + if (currentCount >= targetCount) + return; + + double millis = static_cast(((targetCount - currentCount) * 1000000) / frequency) / 1000.; + + //Sleep if safe, to free up cores. + while (timerResolution != UNKNOWN_TIMER_RESOlUTION && millis > spinTime * timerResolution) + { + MMRESULT result = timeBeginPeriod(timerResolution); //Request 1ms timer resolution from OS. Necessary to prevent overshooting sleep. + if (result != TIMERR_NOERROR) + break; //Can't guarantee sleep precision. + Sleep(static_cast((millis - timerResolution*spinTime))); //End sleep a few timer resolution units early to prevent overshooting. + timeEndPeriod(timerResolution); + + QueryPerformanceCounter(reinterpret_cast(¤tCount)); + + millis = static_cast(((targetCount - currentCount) * 1000000) / frequency) / 1000.; + } + do { QueryPerformanceCounter(reinterpret_cast(¤tCount)); } while (currentCount < targetCount); } -bool waitForFenceValue(ID3D12Fence* fence, UINT64 value, DWORD dwMilliseconds, FfxWaitCallbackFunc waitCallback) +bool waitForFenceValue(ID3D12Fence* fence, UINT64 value, DWORD dwMilliseconds, FfxWaitCallbackFunc waitCallback, const bool waitForSingleObjectOnFence) { bool status = false; if (fence) { - if (dwMilliseconds == INFINITE) + int64_t originalQpc = 0; + QueryPerformanceCounter(reinterpret_cast(&originalQpc)); + int64_t currentQpc = originalQpc; + int64_t qpcFrequency; + QueryPerformanceFrequency(reinterpret_cast(&qpcFrequency)); + const DWORD waitCallbackIntervalInMs = 1; + int64_t deltaQpcWaitCallback = qpcFrequency * waitCallbackIntervalInMs / 1000; + int64_t deltaQpcTimeout = qpcFrequency * dwMilliseconds /1000; + wchar_t fenceName[64]; + uint32_t fenceNameLen = sizeof(fenceName); + fence->GetPrivateData(WKPDID_D3DDebugObjectNameW, &fenceNameLen, &fenceName); + + if (waitForSingleObjectOnFence == false) { - int64_t previousQpc = 0; - int64_t currentQpc = 0; - QueryPerformanceCounter(reinterpret_cast(&previousQpc)); - - // call waitCallback every fTimeoutInSeconds - int64_t qpcFrequency; - QueryPerformanceFrequency(reinterpret_cast(&qpcFrequency)); - const float fTimeoutInSeconds = 0.001f; //1ms - double deltaQpcResetThreashold = double(qpcFrequency * fTimeoutInSeconds); - wchar_t fenceName[64]; - uint32_t fenceNameLen = sizeof(fenceName); - fence->GetPrivateData(WKPDID_D3DDebugObjectNameW, &fenceNameLen, &fenceName); - while (fence->GetCompletedValue() < value) + int64_t previousQpc = originalQpc; + while (status != true) { + status = fence->GetCompletedValue() >= value; + QueryPerformanceCounter(reinterpret_cast(¤tQpc)); if (waitCallback) { - QueryPerformanceCounter(reinterpret_cast(¤tQpc)); - double deltaQpc = double(currentQpc - previousQpc); - if ((deltaQpc > deltaQpcResetThreashold)) + int64_t deltaQpc = currentQpc - previousQpc; + if ((deltaQpc > deltaQpcWaitCallback)) { waitCallback(fenceName, value); previousQpc = currentQpc; } } + if (dwMilliseconds != INFINITE) + { + int64_t deltaQpc = currentQpc - originalQpc; + if (deltaQpc > deltaQpcTimeout) + { + break; + } + } } - status = true; } else { @@ -93,15 +126,23 @@ bool waitForFenceValue(ID3D12Fence* fence, UINT64 value, DWORD dwMilliseconds, F if (isValidHandle(handle)) { - //Wait until command queue is done. - if (!status) + if (SUCCEEDED(fence->SetEventOnCompletion(value, handle))) { - if (SUCCEEDED(fence->SetEventOnCompletion(value, handle))) + while (status != true) { - status = (WaitForSingleObject(handle, dwMilliseconds) == WAIT_OBJECT_0); + QueryPerformanceCounter(reinterpret_cast(¤tQpc)); + int64_t deltaQpc = currentQpc - originalQpc; + if (deltaQpc > deltaQpcTimeout && dwMilliseconds != INFINITE) + { + break; + } + status = (WaitForSingleObject(handle, waitCallbackIntervalInMs) == WAIT_OBJECT_0); + if (waitCallback) + { + waitCallback(fenceName, value); + } } } - CloseHandle(handle); } } diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.h b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.h index 792c7a3d..855b5024 100644 --- a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.h +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_Helpers.h @@ -34,10 +34,12 @@ typedef int32_t FfxErrorCode; typedef FfxErrorCode(*FfxWaitCallbackFunc)(wchar_t* fenceName, uint64_t fenceValueToWaitFor); +constexpr UINT UNKNOWN_TIMER_RESOlUTION = 0; //Timer resolution is not known. + IDXGIFactory* getDXGIFactoryFromSwapChain(IDXGISwapChain* swapChain); bool isExclusiveFullscreen(IDXGISwapChain* swapChain); -void waitForPerformanceCount(const int64_t targetCount); -bool waitForFenceValue(ID3D12Fence* fence, UINT64 value, DWORD dwMilliseconds = INFINITE, FfxWaitCallbackFunc waitCallback = nullptr); +void waitForPerformanceCount(const int64_t targetCount, const int64_t frequency, const UINT timerResolution, const UINT spinTime); +bool waitForFenceValue(ID3D12Fence* fence, UINT64 value, DWORD dwMilliseconds = INFINITE, FfxWaitCallbackFunc waitCallback = nullptr, const bool waitForSingleObjectOnFence = false); bool isTearingSupported(IDXGIFactory* dxgiFactory); bool getMonitorLuminanceRange(IDXGISwapChain* swapChain, float* outMinLuminance, float* outMaxLuminance); inline bool isValidHandle(HANDLE handle); @@ -68,6 +70,56 @@ inline void SafeCloseHandle(HANDLE& handle) } } +// fix up format in case resource passed for SRV cannot be mapped +static DXGI_FORMAT convertFormatSrv(DXGI_FORMAT format) +{ + switch (format) + { + // Handle Depth + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + case DXGI_FORMAT_D32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + case DXGI_FORMAT_D16_UNORM: + return DXGI_FORMAT_R16_UNORM; + + // Handle TYPELESS format for color: assume FLOAT for 16 and 32 bit channels, else UNORM + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case DXGI_FORMAT_R32G32B32_TYPELESS: + return DXGI_FORMAT_R32G32B32_FLOAT; + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case DXGI_FORMAT_R32G32_TYPELESS: + return DXGI_FORMAT_R32G32_FLOAT; + case DXGI_FORMAT_R16G16_TYPELESS: + return DXGI_FORMAT_R16G16_FLOAT; + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + return DXGI_FORMAT_R10G10B10A2_UNORM; + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB; + case DXGI_FORMAT_R32_TYPELESS: + return DXGI_FORMAT_R32_FLOAT; + case DXGI_FORMAT_R8G8_TYPELESS: + return DXGI_FORMAT_R8G8_UNORM; + case DXGI_FORMAT_R16_TYPELESS: + return DXGI_FORMAT_R16_FLOAT; + case DXGI_FORMAT_R8_TYPELESS: + return DXGI_FORMAT_R8_UNORM; + default: + return format; + } +} + class Dx12Commands { ID3D12CommandQueue* queue = nullptr; diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_UiComposition.cpp b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_UiComposition.cpp index 5e70c0d5..aa6dbab6 100644 --- a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_UiComposition.cpp +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDX12_UiComposition.cpp @@ -247,35 +247,6 @@ void releaseUiBlitGpuResources() s_uiCompositionNextRtvDescriptor = 0; } -// fix up format in case resource passed for SRV cannot be mapped -static DXGI_FORMAT convertFormatSrv(DXGI_FORMAT format) -{ - switch (format) { - // Handle Depth - case DXGI_FORMAT_R32G8X24_TYPELESS: - case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: - return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; - case DXGI_FORMAT_D32_FLOAT: - return DXGI_FORMAT_R32_FLOAT; - case DXGI_FORMAT_R24G8_TYPELESS: - case DXGI_FORMAT_X24_TYPELESS_G8_UINT: - case DXGI_FORMAT_D24_UNORM_S8_UINT: - return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; - case DXGI_FORMAT_D16_UNORM: - return DXGI_FORMAT_R16_UNORM; - - // Handle Color - case DXGI_FORMAT_B8G8R8A8_TYPELESS: - return DXGI_FORMAT_B8G8R8A8_UNORM; - case DXGI_FORMAT_R8G8B8A8_TYPELESS: - return DXGI_FORMAT_R8G8B8A8_UNORM; - - // Others can map as is - default: - return format; - } -} - FFX_API FfxErrorCode ffxFrameInterpolationUiComposition(const FfxPresentCallbackDescription* params, void* unusedUserCtx) { (void)unusedUserCtx; diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDebugPacing.hlsl b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDebugPacing.hlsl new file mode 100644 index 00000000..52d3eeaf --- /dev/null +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDebugPacing.hlsl @@ -0,0 +1,50 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +cbuffer input : register(b0) +{ + uint frameIndex; +}; + +float4 mainVS(uint vertexId : SV_VertexID) : SV_POSITION +{ + return float4((int) (vertexId & 1) * 4 - 1, (int) (vertexId & 2) * (-2) + 1, 0.5, 1); +} + +float4 mainPS(float4 vPosition: SV_POSITION) : SV_Target +{ + float a = frameIndex & 1; + float b = (~frameIndex) & 1; + + if (vPosition.x < 16.0f) + { + // Alternate between magenta and green + return float4(b, a, b, 1.0f); + } + else if (vPosition.x < 32.0f) + { + // Alternate between black and white + return float4(a, a, a, 1); + } + + return float4(0.0f, 0.0f, 0.0f, 0.0f); +} diff --git a/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDebugPacingPS.h b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDebugPacingPS.h new file mode 100644 index 00000000..90db5da6 --- /dev/null +++ b/sdk/src/backends/dx12/FrameInterpolationSwapchain/FrameInterpolationSwapchainDebugPacingPS.h @@ -0,0 +1,459 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +#if 0 +; +; Input signature: +; +; Name Index Mask Register SysValue Format Used +; -------------------- ----- ------ -------- -------- ------- ------ +; SV_Position 0 xyzw 0 POS float x +; +; +; Output signature: +; +; Name Index Mask Register SysValue Format Used +; -------------------- ----- ------ -------- -------- ------- ------ +; SV_Target 0 xyzw 0 TARGET float xyzw +; +; shader hash: 19decc2ff08fd3cbd2edcf26536e91df +; +; Pipeline Runtime Information: +; +; Pixel Shader +; DepthOutput=0 +; SampleFrequency=0 +; +; +; Input signature: +; +; Name Index InterpMode DynIdx +; -------------------- ----- ---------------------- ------ +; SV_Position 0 noperspective +; +; Output signature: +; +; Name Index InterpMode DynIdx +; -------------------- ----- ---------------------- ------ +; SV_Target 0 +; +; Buffer Definitions: +; +; cbuffer input +; { +; +; struct input +; { +; +; uint frameIndex; ; Offset: 0 +; +; } input; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; input cbuffer NA NA CB0 cb0 1 +; +; +; ViewId state: +; +; Number of inputs: 4, outputs: 4 +; Outputs dependent on ViewId: { } +; Inputs contributing to computation of Outputs: +; output 0 depends on inputs: { 0 } +; output 1 depends on inputs: { 0 } +; output 2 depends on inputs: { 0 } +; output 3 depends on inputs: { 0 } +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 } +%input = type { i32 } + +define void @mainPS() { + %1 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 0, i1 false) ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex) + %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis) + %3 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %1, i32 0) ; CBufferLoadLegacy(handle,regIndex) + %4 = extractvalue %dx.types.CBufRet.i32 %3, 0 + %5 = and i32 %4, 1 + %6 = uitofp i32 %5 to float + %7 = xor i32 %5, 1 + %8 = uitofp i32 %7 to float + %9 = fcmp fast olt float %2, 1.600000e+01 + br i1 %9, label %13, label %10 + +;