From 24aef9a98267ee07f3771f010b9e35de392e8355 Mon Sep 17 00:00:00 2001 From: Rye Date: Fri, 22 Aug 2025 13:12:43 -0400 Subject: Fix large performance drop when enabling AA on macOS/lower end GPU hardware --- .../app_settings/shaders/class1/deferred/SMAA.glsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'indra/newview/app_settings/shaders/class1') diff --git a/indra/newview/app_settings/shaders/class1/deferred/SMAA.glsl b/indra/newview/app_settings/shaders/class1/deferred/SMAA.glsl index fdb77cce6e..5837308965 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/SMAA.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/SMAA.glsl @@ -1351,6 +1351,10 @@ float4 SMAABlendingWeightCalculationPS(float2 texcoord, //----------------------------------------------------------------------------- // Neighborhood Blending Pixel Shader (Third Pass) +vec3 srgb_to_linear(vec3 cs); +vec4 srgb_to_linear4(vec4 cs); +vec3 linear_to_srgb(vec3 cl); + float4 SMAANeighborhoodBlendingPS(float2 texcoord, float4 offset, SMAATexture2D(colorTex), @@ -1369,6 +1373,7 @@ float4 SMAANeighborhoodBlendingPS(float2 texcoord, SMAA_BRANCH if (dot(a, float4(1.0, 1.0, 1.0, 1.0)) < 1e-5) { float4 color = SMAASampleLevelZero(colorTex, texcoord); + color.rgb = srgb_to_linear(color.rgb); #if SMAA_REPROJECTION float2 velocity = SMAA_DECODE_VELOCITY(SMAASampleLevelZero(velocityTex, texcoord)); @@ -1377,6 +1382,7 @@ float4 SMAANeighborhoodBlendingPS(float2 texcoord, color.a = sqrt(5.0 * length(velocity)); #endif + color.rgb = linear_to_srgb(color.rgb); return color; } else { bool h = max(a.x, a.z) > max(a.y, a.w); // max(horizontal) > max(vertical) @@ -1393,8 +1399,13 @@ float4 SMAANeighborhoodBlendingPS(float2 texcoord, // We exploit bilinear filtering to mix current pixel with the chosen // neighbor: - float4 color = blendingWeight.x * SMAASampleLevelZero(colorTex, blendingCoord.xy); - color += blendingWeight.y * SMAASampleLevelZero(colorTex, blendingCoord.zw); + float4 color = SMAASampleLevelZero(colorTex, blendingCoord.xy); + color.rgb = srgb_to_linear(color.rgb); + color = blendingWeight.x * color; + + float4 color2 = SMAASampleLevelZero(colorTex, blendingCoord.zw); + color2.rgb = srgb_to_linear(color2.rgb); + color += blendingWeight.y * color2; #if SMAA_REPROJECTION // Antialias velocity for proper reprojection in a later stage: @@ -1405,6 +1416,7 @@ float4 SMAANeighborhoodBlendingPS(float2 texcoord, color.a = sqrt(5.0 * length(velocity)); #endif + color.rgb = linear_to_srgb(color.rgb); return color; } } -- cgit v1.2.3 From 452c8e0ea40da1de684c80b84ffa9ec712f72ed2 Mon Sep 17 00:00:00 2001 From: Rye Date: Thu, 11 Sep 2025 20:54:32 -0400 Subject: Follow up fixes for Apple Silicon (#4662) * Remove GLM sse flag from cmake that was moved to llpreprocessor.h * Further reduce performance loss of HDR and Sharpening on bandwith-constrained gpu by combining gamma correction into tonemap/sharpening shader passes * Update SSE2NEON to 1.8.0 to fix random render nans * Fix occasional startup crash from LLCachedControl being declared in global scope --- .../app_settings/shaders/class1/deferred/CASF.glsl | 19 +++++++++++++++ .../class1/deferred/postDeferredGammaCorrect.glsl | 4 +--- .../class1/deferred/postDeferredTonemap.glsl | 28 ++++++++++++++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) (limited to 'indra/newview/app_settings/shaders/class1') diff --git a/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl b/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl index 017855325c..8e12d09443 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl @@ -2545,12 +2545,31 @@ A_STATIC void CasSetup( #endif #ifdef A_GPU + +#ifdef LEGACY_GAMMA +uniform float gamma; + +vec3 legacyGamma(vec3 color) +{ + vec3 c = 1. - clamp(color, vec3(0.), vec3(1.)); + c = 1. - pow(c, vec3(gamma)); // s/b inverted already CPU-side + + return c; +} +#endif + void main() { vec4 diff = vec4(0.f); uvec2 point = uvec2(vary_fragcoord * out_screen_res.xy); CasFilter(diff.r, diff.g, diff.b, point, cas_param_0, cas_param_1, true); diff.a = texture(diffuseRect, vary_fragcoord).a; + diff.rgb = linear_to_srgb(diff.rgb); + +#ifdef LEGACY_GAMMA + diff.rgb = legacyGamma(diff.rgb); +#endif + frag_color = diff; } #endif diff --git a/indra/newview/app_settings/shaders/class1/deferred/postDeferredGammaCorrect.glsl b/indra/newview/app_settings/shaders/class1/deferred/postDeferredGammaCorrect.glsl index 4ccc6f54a8..d7a47ef8cd 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/postDeferredGammaCorrect.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/postDeferredGammaCorrect.glsl @@ -43,8 +43,6 @@ vec3 legacyGamma(vec3 color) return c; } -vec3 clampHDRRange(vec3 color); - void main() { //this is the one of the rare spots where diffuseRect contains linear color values (not sRGB) @@ -55,7 +53,7 @@ void main() diff.rgb = legacyGamma(diff.rgb); #endif - diff.rgb = clampHDRRange(diff.rgb); + diff.rgb = clamp(diff.rgb, vec3(0.0), vec3(1.0)); frag_color = diff; } diff --git a/indra/newview/app_settings/shaders/class1/deferred/postDeferredTonemap.glsl b/indra/newview/app_settings/shaders/class1/deferred/postDeferredTonemap.glsl index 1f01c7f16a..b1218d61af 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/postDeferredTonemap.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/postDeferredTonemap.glsl @@ -31,11 +31,25 @@ uniform sampler2D diffuseRect; in vec2 vary_fragcoord; +#ifdef GAMMA_CORRECT +uniform float gamma; +#endif + vec3 linear_to_srgb(vec3 cl); vec3 toneMap(vec3 color); vec3 clampHDRRange(vec3 color); +#ifdef GAMMA_CORRECT +vec3 legacyGamma(vec3 color) +{ + vec3 c = 1. - clamp(color, vec3(0.), vec3(1.)); + c = 1. - pow(c, vec3(gamma)); // s/b inverted already CPU-side + + return c; +} +#endif + void main() { //this is the one of the rare spots where diffuseRect contains linear color values (not sRGB) @@ -47,8 +61,18 @@ void main() diff.rgb = clamp(diff.rgb, vec3(0.0), vec3(1.0)); #endif - diff.rgb = clampHDRRange(diff.rgb); +#ifdef GAMMA_CORRECT + diff.rgb = linear_to_srgb(diff.rgb); + +#ifdef LEGACY_GAMMA + diff.rgb = legacyGamma(diff.rgb); +#endif + +#endif + + diff.rgb = clamp(diff.rgb, vec3(0.0), vec3(1.0)); // We should always be 0-1 past this point + //debugExposure(diff.rgb); - frag_color = max(diff, vec4(0)); + frag_color = diff; } -- cgit v1.2.3