Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

possible naive RCAS negative output fix #116

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
Original file line number Diff line number Diff line change
Expand Up @@ -748,9 +748,14 @@ void FsrEasuH(
// Immediate constants for peak range.
FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
// Limiters, these need to be high precision RCPs.
FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R);
FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G);
FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B);
#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL));
#else
const FfxFloat32 lowerLimiterMultiplier = 1.f;
#endif
FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier;
FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier;
FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier;
FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y);
FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y);
FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y);
Expand Down Expand Up @@ -848,9 +853,14 @@ void FsrEasuH(
// Immediate constants for peak range.
FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
// Limiters, these need to be high precision RCPs.
FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
#else
const FfxFloat16 lowerLimiterMultiplier = 1.f;
#endif
FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R)*lowerLimiterMultiplier;
FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G)*lowerLimiterMultiplier;
FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B)*lowerLimiterMultiplier;
FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
Expand Down Expand Up @@ -967,9 +977,14 @@ void FsrEasuH(
// Immediate constants for peak range.
FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
// Limiters, these need to be high precision RCPs.
FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
#else
const FfxFloat16x2 lowerLimiterMultiplier = 1.f;
#endif
FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R)*lowerLimiterMultiplier;
FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G)*lowerLimiterMultiplier;
FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B)*lowerLimiterMultiplier;
FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);
Expand Down
1 change: 1 addition & 0 deletions sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#define GROUP_SIZE 8
#define FSR_RCAS_DENOISE 1
#define FSR_RCAS_LOWER_LIMITER_COMPENSATION 1

#include "ffx_core.h"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#define GROUP_SIZE 8
#define FSR_RCAS_DENOISE 1
#define FSR_RCAS_LOWER_LIMITER_COMPENSATION 1

#include "ffx_core.h"

Expand Down