diff options
| author | Dave Parks <davep@lindenlab.com> | 2011-08-21 16:23:04 -0500 | 
|---|---|---|
| committer | Dave Parks <davep@lindenlab.com> | 2011-08-21 16:23:04 -0500 | 
| commit | ee4fdd2c18c722164d78a7305777fad6e49cba8b (patch) | |
| tree | 62ca05fea249f510152dc339ea333eeb248e17c0 | |
| parent | 233e42b3314c17b2e24334587d960af6e3dc963c (diff) | |
SH-2242 Work in progress on FXAA/glVertexAttrib -- DoF works, physics shape display still doesn't.
| -rw-r--r-- | indra/llrender/llrender.cpp | 20 | ||||
| -rw-r--r-- | indra/newview/app_settings/shaders/class1/deferred/postDeferredF.glsl | 2099 | ||||
| -rw-r--r-- | indra/newview/app_settings/shaders/class1/interface/debugF.glsl | 31 | ||||
| -rw-r--r-- | indra/newview/app_settings/shaders/class1/interface/debugV.glsl | 32 | ||||
| -rw-r--r-- | indra/newview/app_settings/shaders/class1/interface/splattexturerectF.glsl | 33 | ||||
| -rw-r--r-- | indra/newview/app_settings/shaders/class1/interface/splattexturerectV.glsl | 36 | ||||
| -rw-r--r-- | indra/newview/llspatialpartition.cpp | 6 | ||||
| -rw-r--r-- | indra/newview/llviewershadermgr.cpp | 31 | ||||
| -rw-r--r-- | indra/newview/llviewershadermgr.h | 2 | ||||
| -rw-r--r-- | indra/newview/pipeline.cpp | 34 | 
10 files changed, 2286 insertions, 38 deletions
| diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index da85bc202c..03a45c35dc 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -1586,12 +1586,12 @@ void LLRender::diffuseColor3f(F32 r, F32 g, F32 b)  	S32 loc = -1;  	if (shader)  	{ -		loc = shader->getAttribLocation(LLVertexBuffer::TYPE_COLOR); +		loc = shader->getUniformLocation("color");  	}  	if (loc >= 0)  	{ -		glVertexAttrib3fARB(loc, r,g,b); +		shader->uniform4f(loc, r,g,b,1.f);  	}  	else  	{ @@ -1605,12 +1605,12 @@ void LLRender::diffuseColor3fv(const F32* c)  	S32 loc = -1;  	if (shader)  	{ -		loc = shader->getAttribLocation(LLVertexBuffer::TYPE_COLOR); +		loc = shader->getUniformLocation("color");  	}  	if (loc >= 0)  	{ -		glVertexAttrib3fvARB(loc, c); +		shader->uniform4f(loc, c[0], c[1], c[2], 1.f);  	}  	else  	{ @@ -1624,12 +1624,12 @@ void LLRender::diffuseColor4f(F32 r, F32 g, F32 b, F32 a)  	S32 loc = -1;  	if (shader)  	{ -		loc = shader->getAttribLocation(LLVertexBuffer::TYPE_COLOR); +		loc = shader->getUniformLocation("color");  	}  	if (loc >= 0)  	{ -		glVertexAttrib4fARB(loc, r,g,b,a); +		shader->uniform4f(loc, r,g,b,a);  	}  	else  	{ @@ -1644,12 +1644,12 @@ void LLRender::diffuseColor4fv(const F32* c)  	S32 loc = -1;  	if (shader)  	{ -		loc = shader->getAttribLocation(LLVertexBuffer::TYPE_COLOR); +		loc = shader->getUniformLocation("color");  	}  	if (loc >= 0)  	{ -		glVertexAttrib4fvARB(loc, c); +		shader->uniform4fv(loc, 1, c);  	}  	else  	{ @@ -1663,12 +1663,12 @@ void LLRender::diffuseColor4ubv(const U8* c)  	S32 loc = -1;  	if (shader)  	{ -		loc = shader->getAttribLocation(LLVertexBuffer::TYPE_COLOR); +		loc = shader->getUniformLocation("color");  	}  	if (loc >= 0)  	{ -		glVertexAttrib4ubvARB(loc, c); +		shader->uniform4f(loc, c[0]/255.f, c[1]/255.f, c[2]/255.f, c[3]/255.f);  	}  	else  	{ diff --git a/indra/newview/app_settings/shaders/class1/deferred/postDeferredF.glsl b/indra/newview/app_settings/shaders/class1/deferred/postDeferredF.glsl index 29f5f899ba..cfcd8585f1 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/postDeferredF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/postDeferredF.glsl @@ -22,16 +22,2070 @@   * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA   * $/LicenseInfo$   */ + +#extension GL_ARB_texture_rectangle : enable + +#define FXAA_PC 1 +#define FXAA_GLSL_130 1 +#define FXAA_QUALITY__PRESET 12 + +/*============================================================================ + + +                    NVIDIA FXAA 3.11 by TIMOTHY LOTTES + + +------------------------------------------------------------------------------ +COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED. +------------------------------------------------------------------------------ +TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED +*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA +OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR +CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR +LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, +OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE +THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + +------------------------------------------------------------------------------ +                           INTEGRATION CHECKLIST +------------------------------------------------------------------------------ +(1.) +In the shader source, setup defines for the desired configuration. +When providing multiple shaders (for different presets), +simply setup the defines differently in multiple files. +Example, + +  #define FXAA_PC 1 +  #define FXAA_HLSL_5 1 +  #define FXAA_QUALITY__PRESET 12 + +Or, + +  #define FXAA_360 1 +   +Or, + +  #define FXAA_PS3 1 +   +Etc. + +(2.) +Then include this file, + +  #include "Fxaa3_11.h" + +(3.) +Then call the FXAA pixel shader from within your desired shader. +Look at the FXAA Quality FxaaPixelShader() for docs on inputs. +As for FXAA 3.11 all inputs for all shaders are the same  +to enable easy porting between platforms. + +  return FxaaPixelShader(...); + +(4.) +Insure pass prior to FXAA outputs RGBL (see next section). +Or use, + +  #define FXAA_GREEN_AS_LUMA 1 + +(5.) +Setup engine to provide the following constants +which are used in the FxaaPixelShader() inputs, + +  FxaaFloat2 fxaaQualityRcpFrame, +  FxaaFloat4 fxaaConsoleRcpFrameOpt, +  FxaaFloat4 fxaaConsoleRcpFrameOpt2, +  FxaaFloat4 fxaaConsole360RcpFrameOpt2, +  FxaaFloat fxaaQualitySubpix, +  FxaaFloat fxaaQualityEdgeThreshold, +  FxaaFloat fxaaQualityEdgeThresholdMin, +  FxaaFloat fxaaConsoleEdgeSharpness, +  FxaaFloat fxaaConsoleEdgeThreshold, +  FxaaFloat fxaaConsoleEdgeThresholdMin, +  FxaaFloat4 fxaaConsole360ConstDir + +Look at the FXAA Quality FxaaPixelShader() for docs on inputs. + +(6.) +Have FXAA vertex shader run as a full screen triangle, +and output "pos" and "fxaaConsolePosPos"  +such that inputs in the pixel shader provide, + +  // {xy} = center of pixel +  FxaaFloat2 pos, + +  // {xy__} = upper left of pixel +  // {__zw} = lower right of pixel +  FxaaFloat4 fxaaConsolePosPos, + +(7.) +Insure the texture sampler(s) used by FXAA are set to bilinear filtering. + + +------------------------------------------------------------------------------ +                    INTEGRATION - RGBL AND COLORSPACE +------------------------------------------------------------------------------ +FXAA3 requires RGBL as input unless the following is set,  + +  #define FXAA_GREEN_AS_LUMA 1 + +In which case the engine uses green in place of luma, +and requires RGB input is in a non-linear colorspace. + +RGB should be LDR (low dynamic range). +Specifically do FXAA after tonemapping. + +RGB data as returned by a texture fetch can be non-linear, +or linear when FXAA_GREEN_AS_LUMA is not set. +Note an "sRGB format" texture counts as linear, +because the result of a texture fetch is linear data. +Regular "RGBA8" textures in the sRGB colorspace are non-linear. + +If FXAA_GREEN_AS_LUMA is not set, +luma must be stored in the alpha channel prior to running FXAA. +This luma should be in a perceptual space (could be gamma 2.0). +Example pass before FXAA where output is gamma 2.0 encoded, + +  color.rgb = ToneMap(color.rgb); // linear color output +  color.rgb = sqrt(color.rgb);    // gamma 2.0 color output +  return color; + +To use FXAA, + +  color.rgb = ToneMap(color.rgb);  // linear color output +  color.rgb = sqrt(color.rgb);     // gamma 2.0 color output +  color.a = dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114)); // compute luma +  return color; + +Another example where output is linear encoded, +say for instance writing to an sRGB formated render target, +where the render target does the conversion back to sRGB after blending, + +  color.rgb = ToneMap(color.rgb); // linear color output +  return color; + +To use FXAA, + +  color.rgb = ToneMap(color.rgb); // linear color output +  color.a = sqrt(dot(color.rgb, FxaaFloat3(0.299, 0.587, 0.114))); // compute luma +  return color; + +Getting luma correct is required for the algorithm to work correctly. + + +------------------------------------------------------------------------------ +                          BEING LINEARLY CORRECT? +------------------------------------------------------------------------------ +Applying FXAA to a framebuffer with linear RGB color will look worse. +This is very counter intuitive, but happends to be true in this case. +The reason is because dithering artifacts will be more visiable  +in a linear colorspace. + + +------------------------------------------------------------------------------ +                             COMPLEX INTEGRATION +------------------------------------------------------------------------------ +Q. What if the engine is blending into RGB before wanting to run FXAA? + +A. In the last opaque pass prior to FXAA, +   have the pass write out luma into alpha. +   Then blend into RGB only. +   FXAA should be able to run ok +   assuming the blending pass did not any add aliasing. +   This should be the common case for particles and common blending passes. + +A. Or use FXAA_GREEN_AS_LUMA. + +============================================================================*/ + +/*============================================================================ + +                             INTEGRATION KNOBS + +============================================================================*/ +// +// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE). +// FXAA_360_OPT is a prototype for the new optimized 360 version. +// +// 1 = Use API. +// 0 = Don't use API. +// +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_PS3 +    #define FXAA_PS3 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_360 +    #define FXAA_360 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_360_OPT +    #define FXAA_360_OPT 0 +#endif +/*==========================================================================*/ +#ifndef FXAA_PC +    // +    // FXAA Quality +    // The high quality PC algorithm. +    // +    #define FXAA_PC 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_PC_CONSOLE +    // +    // The console algorithm for PC is included +    // for developers targeting really low spec machines. +    // Likely better to just run FXAA_PC, and use a really low preset. +    // +    #define FXAA_PC_CONSOLE 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GLSL_120 +    #define FXAA_GLSL_120 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GLSL_130 +    #define FXAA_GLSL_130 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_3 +    #define FXAA_HLSL_3 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_4 +    #define FXAA_HLSL_4 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_5 +    #define FXAA_HLSL_5 0 +#endif +/*==========================================================================*/ +#ifndef FXAA_GREEN_AS_LUMA +    // +    // For those using non-linear color, +    // and either not able to get luma in alpha, or not wanting to, +    // this enables FXAA to run using green as a proxy for luma. +    // So with this enabled, no need to pack luma in alpha. +    // +    // This will turn off AA on anything which lacks some amount of green. +    // Pure red and blue or combination of only R and B, will get no AA. +    // +    // Might want to lower the settings for both, +    //    fxaaConsoleEdgeThresholdMin +    //    fxaaQualityEdgeThresholdMin +    // In order to insure AA does not get turned off on colors  +    // which contain a minor amount of green. +    // +    // 1 = On. +    // 0 = Off. +    // +    #define FXAA_GREEN_AS_LUMA 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_EARLY_EXIT +    // +    // Controls algorithm's early exit path. +    // On PS3 turning this ON adds 2 cycles to the shader. +    // On 360 turning this OFF adds 10ths of a millisecond to the shader. +    // Turning this off on console will result in a more blurry image. +    // So this defaults to on. +    // +    // 1 = On. +    // 0 = Off. +    // +    #define FXAA_EARLY_EXIT 1 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_DISCARD +    // +    // Only valid for PC OpenGL currently. +    // Probably will not work when FXAA_GREEN_AS_LUMA = 1. +    // +    // 1 = Use discard on pixels which don't need AA. +    //     For APIs which enable concurrent TEX+ROP from same surface. +    // 0 = Return unchanged color on pixels which don't need AA. +    // +    #define FXAA_DISCARD 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_FAST_PIXEL_OFFSET +    // +    // Used for GLSL 120 only. +    // +    // 1 = GL API supports fast pixel offsets +    // 0 = do not use fast pixel offsets +    // +    #ifdef GL_EXT_gpu_shader4 +        #define FXAA_FAST_PIXEL_OFFSET 1 +    #endif +    #ifdef GL_NV_gpu_shader5 +        #define FXAA_FAST_PIXEL_OFFSET 1 +    #endif +    #ifdef GL_ARB_gpu_shader5 +        #define FXAA_FAST_PIXEL_OFFSET 1 +    #endif +    #ifndef FXAA_FAST_PIXEL_OFFSET +        #define FXAA_FAST_PIXEL_OFFSET 0 +    #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GATHER4_ALPHA +    // +    // 1 = API supports gather4 on alpha channel. +    // 0 = API does not support gather4 on alpha channel. +    // +    #if (FXAA_HLSL_5 == 1) +        #define FXAA_GATHER4_ALPHA 1 +    #endif +    #ifdef GL_ARB_gpu_shader5 +        #define FXAA_GATHER4_ALPHA 1 +    #endif +    #ifdef GL_NV_gpu_shader5 +        #define FXAA_GATHER4_ALPHA 1 +    #endif +    #ifndef FXAA_GATHER4_ALPHA +        #define FXAA_GATHER4_ALPHA 0 +    #endif +#endif + +/*============================================================================ +                      FXAA CONSOLE PS3 - TUNING KNOBS +============================================================================*/ +#ifndef FXAA_CONSOLE__PS3_EDGE_SHARPNESS +    // +    // Consoles the sharpness of edges on PS3 only. +    // Non-PS3 tuning is done with shader input. +    // +    // Due to the PS3 being ALU bound, +    // there are only two safe values here: 4 and 8. +    // These options use the shaders ability to a free *|/ by 2|4|8. +    // +    // 8.0 is sharper +    // 4.0 is softer +    // 2.0 is really soft (good for vector graphics inputs) +    // +    #if 1 +        #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 8.0 +    #endif +    #if 0 +        #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 4.0 +    #endif +    #if 0 +        #define FXAA_CONSOLE__PS3_EDGE_SHARPNESS 2.0 +    #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_CONSOLE__PS3_EDGE_THRESHOLD +    // +    // Only effects PS3. +    // Non-PS3 tuning is done with shader input. +    // +    // The minimum amount of local contrast required to apply algorithm. +    // The console setting has a different mapping than the quality setting. +    // +    // This only applies when FXAA_EARLY_EXIT is 1. +    // +    // Due to the PS3 being ALU bound, +    // there are only two safe values here: 0.25 and 0.125. +    // These options use the shaders ability to a free *|/ by 2|4|8. +    // +    // 0.125 leaves less aliasing, but is softer +    // 0.25 leaves more aliasing, and is sharper +    // +    #if 1 +        #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.125 +    #else +        #define FXAA_CONSOLE__PS3_EDGE_THRESHOLD 0.25 +    #endif +#endif + +/*============================================================================ +                        FXAA QUALITY - TUNING KNOBS +------------------------------------------------------------------------------ +NOTE the other tuning knobs are now in the shader function inputs! +============================================================================*/ +#ifndef FXAA_QUALITY__PRESET +    // +    // Choose the quality preset. +    // This needs to be compiled into the shader as it effects code. +    // Best option to include multiple presets is to  +    // in each shader define the preset, then include this file. +    //  +    // OPTIONS +    // ----------------------------------------------------------------------- +    // 10 to 15 - default medium dither (10=fastest, 15=highest quality) +    // 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality) +    // 39       - no dither, very expensive  +    // +    // NOTES +    // ----------------------------------------------------------------------- +    // 12 = slightly faster then FXAA 3.9 and higher edge quality (default) +    // 13 = about same speed as FXAA 3.9 and better than 12 +    // 23 = closest to FXAA 3.9 visually and performance wise +    //  _ = the lowest digit is directly related to performance +    // _  = the highest digit is directly related to style +    //  +    #define FXAA_QUALITY__PRESET 12 +#endif + + +/*============================================================================ + +                           FXAA QUALITY - PRESETS + +============================================================================*/ + +/*============================================================================ +                     FXAA QUALITY - MEDIUM DITHER PRESETS +============================================================================*/ +#if (FXAA_QUALITY__PRESET == 10) +    #define FXAA_QUALITY__PS 3 +    #define FXAA_QUALITY__P0 1.5 +    #define FXAA_QUALITY__P1 3.0 +    #define FXAA_QUALITY__P2 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 11) +    #define FXAA_QUALITY__PS 4 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 3.0 +    #define FXAA_QUALITY__P3 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 12) +    #define FXAA_QUALITY__PS 5 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 4.0 +    #define FXAA_QUALITY__P4 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 13) +    #define FXAA_QUALITY__PS 6 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 4.0 +    #define FXAA_QUALITY__P5 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 14) +    #define FXAA_QUALITY__PS 7 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 4.0 +    #define FXAA_QUALITY__P6 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 15) +    #define FXAA_QUALITY__PS 8 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 2.0 +    #define FXAA_QUALITY__P6 4.0 +    #define FXAA_QUALITY__P7 12.0 +#endif + +/*============================================================================ +                     FXAA QUALITY - LOW DITHER PRESETS +============================================================================*/ +#if (FXAA_QUALITY__PRESET == 20) +    #define FXAA_QUALITY__PS 3 +    #define FXAA_QUALITY__P0 1.5 +    #define FXAA_QUALITY__P1 2.0 +    #define FXAA_QUALITY__P2 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 21) +    #define FXAA_QUALITY__PS 4 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 22) +    #define FXAA_QUALITY__PS 5 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 23) +    #define FXAA_QUALITY__PS 6 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 24) +    #define FXAA_QUALITY__PS 7 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 3.0 +    #define FXAA_QUALITY__P6 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 25) +    #define FXAA_QUALITY__PS 8 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 2.0 +    #define FXAA_QUALITY__P6 4.0 +    #define FXAA_QUALITY__P7 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 26) +    #define FXAA_QUALITY__PS 9 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 2.0 +    #define FXAA_QUALITY__P6 2.0 +    #define FXAA_QUALITY__P7 4.0 +    #define FXAA_QUALITY__P8 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 27) +    #define FXAA_QUALITY__PS 10 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 2.0 +    #define FXAA_QUALITY__P6 2.0 +    #define FXAA_QUALITY__P7 2.0 +    #define FXAA_QUALITY__P8 4.0 +    #define FXAA_QUALITY__P9 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 28) +    #define FXAA_QUALITY__PS 11 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 2.0 +    #define FXAA_QUALITY__P6 2.0 +    #define FXAA_QUALITY__P7 2.0 +    #define FXAA_QUALITY__P8 2.0 +    #define FXAA_QUALITY__P9 4.0 +    #define FXAA_QUALITY__P10 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY__PRESET == 29) +    #define FXAA_QUALITY__PS 12 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.5 +    #define FXAA_QUALITY__P2 2.0 +    #define FXAA_QUALITY__P3 2.0 +    #define FXAA_QUALITY__P4 2.0 +    #define FXAA_QUALITY__P5 2.0 +    #define FXAA_QUALITY__P6 2.0 +    #define FXAA_QUALITY__P7 2.0 +    #define FXAA_QUALITY__P8 2.0 +    #define FXAA_QUALITY__P9 2.0 +    #define FXAA_QUALITY__P10 4.0 +    #define FXAA_QUALITY__P11 8.0 +#endif + +/*============================================================================ +                     FXAA QUALITY - EXTREME QUALITY +============================================================================*/ +#if (FXAA_QUALITY__PRESET == 39) +    #define FXAA_QUALITY__PS 12 +    #define FXAA_QUALITY__P0 1.0 +    #define FXAA_QUALITY__P1 1.0 +    #define FXAA_QUALITY__P2 1.0 +    #define FXAA_QUALITY__P3 1.0 +    #define FXAA_QUALITY__P4 1.0 +    #define FXAA_QUALITY__P5 1.5 +    #define FXAA_QUALITY__P6 2.0 +    #define FXAA_QUALITY__P7 2.0 +    #define FXAA_QUALITY__P8 2.0 +    #define FXAA_QUALITY__P9 2.0 +    #define FXAA_QUALITY__P10 4.0 +    #define FXAA_QUALITY__P11 8.0 +#endif + + + +/*============================================================================ + +                                API PORTING + +============================================================================*/ +#if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1) +    #define FxaaBool bool +    #define FxaaDiscard discard +    #define FxaaFloat float +    #define FxaaFloat2 vec2 +    #define FxaaFloat3 vec3 +    #define FxaaFloat4 vec4 +    #define FxaaHalf float +    #define FxaaHalf2 vec2 +    #define FxaaHalf3 vec3 +    #define FxaaHalf4 vec4 +    #define FxaaInt2 ivec2 +    #define FxaaSat(x) clamp(x, 0.0, 1.0) +    #define FxaaTex sampler2D +#else +    #define FxaaBool bool +    #define FxaaDiscard clip(-1) +    #define FxaaFloat float +    #define FxaaFloat2 float2 +    #define FxaaFloat3 float3 +    #define FxaaFloat4 float4 +    #define FxaaHalf half +    #define FxaaHalf2 half2 +    #define FxaaHalf3 half3 +    #define FxaaHalf4 half4 +    #define FxaaSat(x) saturate(x) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_GLSL_120 == 1) +    // Requires, +    //  #version 120 +    // And at least, +    //  #extension GL_EXT_gpu_shader4 : enable +    //  (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9) +    #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0) +    #if (FXAA_FAST_PIXEL_OFFSET == 1) +        #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o) +    #else +        #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0) +    #endif +    #if (FXAA_GATHER4_ALPHA == 1) +        // use #extension GL_ARB_gpu_shader5 : enable +        #define FxaaTexAlpha4(t, p) textureGather(t, p, 3) +        #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3) +        #define FxaaTexGreen4(t, p) textureGather(t, p, 1) +        #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1) +    #endif +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_GLSL_130 == 1) +    // Requires "#version 130" or better +    #define FxaaTexTop(t, p) textureLod(t, p, 0.0) +    #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o) +    #if (FXAA_GATHER4_ALPHA == 1) +        // use #extension GL_ARB_gpu_shader5 : enable +        #define FxaaTexAlpha4(t, p) textureGather(t, p, 3) +        #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3) +        #define FxaaTexGreen4(t, p) textureGather(t, p, 1) +        #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1) +    #endif +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1) +    #define FxaaInt2 float2 +    #define FxaaTex sampler2D +    #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0)) +    #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0)) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_4 == 1) +    #define FxaaInt2 int2 +    struct FxaaTex { SamplerState smpl; Texture2D tex; }; +    #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) +    #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_5 == 1) +    #define FxaaInt2 int2 +    struct FxaaTex { SamplerState smpl; Texture2D tex; }; +    #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) +    #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) +    #define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p) +    #define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o) +    #define FxaaTexGreen4(t, p) t.tex.GatherGreen(t.smpl, p) +    #define FxaaTexOffGreen4(t, p, o) t.tex.GatherGreen(t.smpl, p, o) +#endif + + +/*============================================================================ +                   GREEN AS LUMA OPTION SUPPORT FUNCTION +============================================================================*/ +#if (FXAA_GREEN_AS_LUMA == 0) +    FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.w; } +#else +    FxaaFloat FxaaLuma(FxaaFloat4 rgba) { return rgba.y; } +#endif     + + + + +/*============================================================================ + +                             FXAA3 QUALITY - PC + +============================================================================*/ +#if (FXAA_PC == 1) +/*--------------------------------------------------------------------------*/ +FxaaFloat4 FxaaPixelShader( +    // +    // Use noperspective interpolation here (turn off perspective interpolation). +    // {xy} = center of pixel +    FxaaFloat2 pos, +    // +    // Used only for FXAA Console, and not used on the 360 version. +    // Use noperspective interpolation here (turn off perspective interpolation). +    // {xy__} = upper left of pixel +    // {__zw} = lower right of pixel +    FxaaFloat4 fxaaConsolePosPos, +    // +    // Input color texture. +    // {rgb_} = color in linear or perceptual color space +    // if (FXAA_GREEN_AS_LUMA == 0) +    //     {___a} = luma in perceptual color space (not linear) +    FxaaTex tex, +    // +    // Only used on the optimized 360 version of FXAA Console. +    // For everything but 360, just use the same input here as for "tex". +    // For 360, same texture, just alias with a 2nd sampler. +    // This sampler needs to have an exponent bias of -1. +    FxaaTex fxaaConsole360TexExpBiasNegOne, +    // +    // Only used on the optimized 360 version of FXAA Console. +    // For everything but 360, just use the same input here as for "tex". +    // For 360, same texture, just alias with a 3nd sampler. +    // This sampler needs to have an exponent bias of -2. +    FxaaTex fxaaConsole360TexExpBiasNegTwo, +    // +    // Only used on FXAA Quality. +    // This must be from a constant/uniform. +    // {x_} = 1.0/screenWidthInPixels +    // {_y} = 1.0/screenHeightInPixels +    FxaaFloat2 fxaaQualityRcpFrame, +    // +    // Only used on FXAA Console. +    // This must be from a constant/uniform. +    // This effects sub-pixel AA quality and inversely sharpness. +    //   Where N ranges between, +    //     N = 0.50 (default) +    //     N = 0.33 (sharper) +    // {x___} = -N/screenWidthInPixels   +    // {_y__} = -N/screenHeightInPixels +    // {__z_} =  N/screenWidthInPixels   +    // {___w} =  N/screenHeightInPixels  +    FxaaFloat4 fxaaConsoleRcpFrameOpt, +    // +    // Only used on FXAA Console. +    // Not used on 360, but used on PS3 and PC. +    // This must be from a constant/uniform. +    // {x___} = -2.0/screenWidthInPixels   +    // {_y__} = -2.0/screenHeightInPixels +    // {__z_} =  2.0/screenWidthInPixels   +    // {___w} =  2.0/screenHeightInPixels  +    FxaaFloat4 fxaaConsoleRcpFrameOpt2, +    // +    // Only used on FXAA Console. +    // Only used on 360 in place of fxaaConsoleRcpFrameOpt2. +    // This must be from a constant/uniform. +    // {x___} =  8.0/screenWidthInPixels   +    // {_y__} =  8.0/screenHeightInPixels +    // {__z_} = -4.0/screenWidthInPixels   +    // {___w} = -4.0/screenHeightInPixels  +    FxaaFloat4 fxaaConsole360RcpFrameOpt2, +    // +    // Only used on FXAA Quality. +    // This used to be the FXAA_QUALITY__SUBPIX define. +    // It is here now to allow easier tuning. +    // Choose the amount of sub-pixel aliasing removal. +    // This can effect sharpness. +    //   1.00 - upper limit (softer) +    //   0.75 - default amount of filtering +    //   0.50 - lower limit (sharper, less sub-pixel aliasing removal) +    //   0.25 - almost off +    //   0.00 - completely off +    FxaaFloat fxaaQualitySubpix, +    // +    // Only used on FXAA Quality. +    // This used to be the FXAA_QUALITY__EDGE_THRESHOLD define. +    // It is here now to allow easier tuning. +    // The minimum amount of local contrast required to apply algorithm. +    //   0.333 - too little (faster) +    //   0.250 - low quality +    //   0.166 - default +    //   0.125 - high quality  +    //   0.063 - overkill (slower) +    FxaaFloat fxaaQualityEdgeThreshold, +    // +    // Only used on FXAA Quality. +    // This used to be the FXAA_QUALITY__EDGE_THRESHOLD_MIN define. +    // It is here now to allow easier tuning. +    // Trims the algorithm from processing darks. +    //   0.0833 - upper limit (default, the start of visible unfiltered edges) +    //   0.0625 - high quality (faster) +    //   0.0312 - visible limit (slower) +    // Special notes when using FXAA_GREEN_AS_LUMA, +    //   Likely want to set this to zero. +    //   As colors that are mostly not-green +    //   will appear very dark in the green channel! +    //   Tune by looking at mostly non-green content, +    //   then start at zero and increase until aliasing is a problem. +    FxaaFloat fxaaQualityEdgeThresholdMin, +    //  +    // Only used on FXAA Console. +    // This used to be the FXAA_CONSOLE__EDGE_SHARPNESS define. +    // It is here now to allow easier tuning. +    // This does not effect PS3, as this needs to be compiled in. +    //   Use FXAA_CONSOLE__PS3_EDGE_SHARPNESS for PS3. +    //   Due to the PS3 being ALU bound, +    //   there are only three safe values here: 2 and 4 and 8. +    //   These options use the shaders ability to a free *|/ by 2|4|8. +    // For all other platforms can be a non-power of two. +    //   8.0 is sharper (default!!!) +    //   4.0 is softer +    //   2.0 is really soft (good only for vector graphics inputs) +    FxaaFloat fxaaConsoleEdgeSharpness, +    // +    // Only used on FXAA Console. +    // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD define. +    // It is here now to allow easier tuning. +    // This does not effect PS3, as this needs to be compiled in. +    //   Use FXAA_CONSOLE__PS3_EDGE_THRESHOLD for PS3. +    //   Due to the PS3 being ALU bound, +    //   there are only two safe values here: 1/4 and 1/8. +    //   These options use the shaders ability to a free *|/ by 2|4|8. +    // The console setting has a different mapping than the quality setting. +    // Other platforms can use other values. +    //   0.125 leaves less aliasing, but is softer (default!!!) +    //   0.25 leaves more aliasing, and is sharper +    FxaaFloat fxaaConsoleEdgeThreshold, +    // +    // Only used on FXAA Console. +    // This used to be the FXAA_CONSOLE__EDGE_THRESHOLD_MIN define. +    // It is here now to allow easier tuning. +    // Trims the algorithm from processing darks. +    // The console setting has a different mapping than the quality setting. +    // This only applies when FXAA_EARLY_EXIT is 1. +    // This does not apply to PS3,  +    // PS3 was simplified to avoid more shader instructions. +    //   0.06 - faster but more aliasing in darks +    //   0.05 - default +    //   0.04 - slower and less aliasing in darks +    // Special notes when using FXAA_GREEN_AS_LUMA, +    //   Likely want to set this to zero. +    //   As colors that are mostly not-green +    //   will appear very dark in the green channel! +    //   Tune by looking at mostly non-green content, +    //   then start at zero and increase until aliasing is a problem. +    FxaaFloat fxaaConsoleEdgeThresholdMin, +    //     +    // Extra constants for 360 FXAA Console only. +    // Use zeros or anything else for other platforms. +    // These must be in physical constant registers and NOT immedates. +    // Immedates will result in compiler un-optimizing. +    // {xyzw} = float4(1.0, -1.0, 0.25, -0.25) +    FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +    FxaaFloat2 posM; +    posM.x = pos.x; +    posM.y = pos.y; +    #if (FXAA_GATHER4_ALPHA == 1) +        #if (FXAA_DISCARD == 0) +            FxaaFloat4 rgbyM = FxaaTexTop(tex, posM); +            #if (FXAA_GREEN_AS_LUMA == 0) +                #define lumaM rgbyM.w +            #else +                #define lumaM rgbyM.y +            #endif +        #endif +        #if (FXAA_GREEN_AS_LUMA == 0) +            FxaaFloat4 luma4A = FxaaTexAlpha4(tex, posM); +            FxaaFloat4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1)); +        #else +            FxaaFloat4 luma4A = FxaaTexGreen4(tex, posM); +            FxaaFloat4 luma4B = FxaaTexOffGreen4(tex, posM, FxaaInt2(-1, -1)); +        #endif +        #if (FXAA_DISCARD == 1) +            #define lumaM luma4A.w +        #endif +        #define lumaE luma4A.z +        #define lumaS luma4A.x +        #define lumaSE luma4A.y +        #define lumaNW luma4B.w +        #define lumaN luma4B.z +        #define lumaW luma4B.x +    #else +        FxaaFloat4 rgbyM = FxaaTexTop(tex, posM); +        #if (FXAA_GREEN_AS_LUMA == 0) +            #define lumaM rgbyM.w +        #else +            #define lumaM rgbyM.y +        #endif +        FxaaFloat lumaS = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0, 1), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 0), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaN = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 0,-1), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 0), fxaaQualityRcpFrame.xy)); +    #endif +/*--------------------------------------------------------------------------*/ +    FxaaFloat maxSM = max(lumaS, lumaM); +    FxaaFloat minSM = min(lumaS, lumaM); +    FxaaFloat maxESM = max(lumaE, maxSM); +    FxaaFloat minESM = min(lumaE, minSM); +    FxaaFloat maxWN = max(lumaN, lumaW); +    FxaaFloat minWN = min(lumaN, lumaW); +    FxaaFloat rangeMax = max(maxWN, maxESM); +    FxaaFloat rangeMin = min(minWN, minESM); +    FxaaFloat rangeMaxScaled = rangeMax * fxaaQualityEdgeThreshold; +    FxaaFloat range = rangeMax - rangeMin; +    FxaaFloat rangeMaxClamped = max(fxaaQualityEdgeThresholdMin, rangeMaxScaled); +    FxaaBool earlyExit = range < rangeMaxClamped; +/*--------------------------------------------------------------------------*/ +    if(earlyExit) +        #if (FXAA_DISCARD == 1) +            FxaaDiscard; +        #else +            return rgbyM; +        #endif +/*--------------------------------------------------------------------------*/ +    #if (FXAA_GATHER4_ALPHA == 0) +        FxaaFloat lumaNW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1,-1), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaSE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1, 1), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2( 1,-1), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy)); +    #else +        FxaaFloat lumaNE = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(1, -1), fxaaQualityRcpFrame.xy)); +        FxaaFloat lumaSW = FxaaLuma(FxaaTexOff(tex, posM, FxaaInt2(-1, 1), fxaaQualityRcpFrame.xy)); +    #endif +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaNS = lumaN + lumaS; +    FxaaFloat lumaWE = lumaW + lumaE; +    FxaaFloat subpixRcpRange = 1.0/range; +    FxaaFloat subpixNSWE = lumaNS + lumaWE; +    FxaaFloat edgeHorz1 = (-2.0 * lumaM) + lumaNS; +    FxaaFloat edgeVert1 = (-2.0 * lumaM) + lumaWE; +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaNESE = lumaNE + lumaSE; +    FxaaFloat lumaNWNE = lumaNW + lumaNE; +    FxaaFloat edgeHorz2 = (-2.0 * lumaE) + lumaNESE; +    FxaaFloat edgeVert2 = (-2.0 * lumaN) + lumaNWNE; +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaNWSW = lumaNW + lumaSW; +    FxaaFloat lumaSWSE = lumaSW + lumaSE; +    FxaaFloat edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2); +    FxaaFloat edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2); +    FxaaFloat edgeHorz3 = (-2.0 * lumaW) + lumaNWSW; +    FxaaFloat edgeVert3 = (-2.0 * lumaS) + lumaSWSE; +    FxaaFloat edgeHorz = abs(edgeHorz3) + edgeHorz4; +    FxaaFloat edgeVert = abs(edgeVert3) + edgeVert4; +/*--------------------------------------------------------------------------*/ +    FxaaFloat subpixNWSWNESE = lumaNWSW + lumaNESE; +    FxaaFloat lengthSign = fxaaQualityRcpFrame.x; +    FxaaBool horzSpan = edgeHorz >= edgeVert; +    FxaaFloat subpixA = subpixNSWE * 2.0 + subpixNWSWNESE; +/*--------------------------------------------------------------------------*/ +    if(!horzSpan) lumaN = lumaW; +    if(!horzSpan) lumaS = lumaE; +    if(horzSpan) lengthSign = fxaaQualityRcpFrame.y; +    FxaaFloat subpixB = (subpixA * (1.0/12.0)) - lumaM; +/*--------------------------------------------------------------------------*/ +    FxaaFloat gradientN = lumaN - lumaM; +    FxaaFloat gradientS = lumaS - lumaM; +    FxaaFloat lumaNN = lumaN + lumaM; +    FxaaFloat lumaSS = lumaS + lumaM; +    FxaaBool pairN = abs(gradientN) >= abs(gradientS); +    FxaaFloat gradient = max(abs(gradientN), abs(gradientS)); +    if(pairN) lengthSign = -lengthSign; +    FxaaFloat subpixC = FxaaSat(abs(subpixB) * subpixRcpRange); +/*--------------------------------------------------------------------------*/ +    FxaaFloat2 posB; +    posB.x = posM.x; +    posB.y = posM.y; +    FxaaFloat2 offNP; +    offNP.x = (!horzSpan) ? 0.0 : fxaaQualityRcpFrame.x; +    offNP.y = ( horzSpan) ? 0.0 : fxaaQualityRcpFrame.y; +    if(!horzSpan) posB.x += lengthSign * 0.5; +    if( horzSpan) posB.y += lengthSign * 0.5; +/*--------------------------------------------------------------------------*/ +    FxaaFloat2 posN; +    posN.x = posB.x - offNP.x * FXAA_QUALITY__P0; +    posN.y = posB.y - offNP.y * FXAA_QUALITY__P0; +    FxaaFloat2 posP; +    posP.x = posB.x + offNP.x * FXAA_QUALITY__P0; +    posP.y = posB.y + offNP.y * FXAA_QUALITY__P0; +    FxaaFloat subpixD = ((-2.0)*subpixC) + 3.0; +    FxaaFloat lumaEndN = FxaaLuma(FxaaTexTop(tex, posN)); +    FxaaFloat subpixE = subpixC * subpixC; +    FxaaFloat lumaEndP = FxaaLuma(FxaaTexTop(tex, posP)); +/*--------------------------------------------------------------------------*/ +    if(!pairN) lumaNN = lumaSS; +    FxaaFloat gradientScaled = gradient * 1.0/4.0; +    FxaaFloat lumaMM = lumaM - lumaNN * 0.5; +    FxaaFloat subpixF = subpixD * subpixE; +    FxaaBool lumaMLTZero = lumaMM < 0.0; +/*--------------------------------------------------------------------------*/ +    lumaEndN -= lumaNN * 0.5; +    lumaEndP -= lumaNN * 0.5; +    FxaaBool doneN = abs(lumaEndN) >= gradientScaled; +    FxaaBool doneP = abs(lumaEndP) >= gradientScaled; +    if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1; +    if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1; +    FxaaBool doneNP = (!doneN) || (!doneP); +    if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1; +    if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1; +/*--------------------------------------------------------------------------*/ +    if(doneNP) { +        if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +        if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +        if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +        if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +        doneN = abs(lumaEndN) >= gradientScaled; +        doneP = abs(lumaEndP) >= gradientScaled; +        if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2; +        if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2; +        doneNP = (!doneN) || (!doneP); +        if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2; +        if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2; +/*--------------------------------------------------------------------------*/ +        #if (FXAA_QUALITY__PS > 3) +        if(doneNP) { +            if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +            if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +            if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +            if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +            doneN = abs(lumaEndN) >= gradientScaled; +            doneP = abs(lumaEndP) >= gradientScaled; +            if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3; +            if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3; +            doneNP = (!doneN) || (!doneP); +            if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3; +            if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3; +/*--------------------------------------------------------------------------*/ +            #if (FXAA_QUALITY__PS > 4) +            if(doneNP) { +                if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                doneN = abs(lumaEndN) >= gradientScaled; +                doneP = abs(lumaEndP) >= gradientScaled; +                if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4; +                if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4; +                doneNP = (!doneN) || (!doneP); +                if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4; +                if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4; +/*--------------------------------------------------------------------------*/ +                #if (FXAA_QUALITY__PS > 5) +                if(doneNP) { +                    if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                    if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                    if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                    if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                    doneN = abs(lumaEndN) >= gradientScaled; +                    doneP = abs(lumaEndP) >= gradientScaled; +                    if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5; +                    if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5; +                    doneNP = (!doneN) || (!doneP); +                    if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5; +                    if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5; +/*--------------------------------------------------------------------------*/ +                    #if (FXAA_QUALITY__PS > 6) +                    if(doneNP) { +                        if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                        if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                        if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                        if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                        doneN = abs(lumaEndN) >= gradientScaled; +                        doneP = abs(lumaEndP) >= gradientScaled; +                        if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6; +                        if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6; +                        doneNP = (!doneN) || (!doneP); +                        if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6; +                        if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6; +/*--------------------------------------------------------------------------*/ +                        #if (FXAA_QUALITY__PS > 7) +                        if(doneNP) { +                            if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                            if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                            if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                            if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                            doneN = abs(lumaEndN) >= gradientScaled; +                            doneP = abs(lumaEndP) >= gradientScaled; +                            if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7; +                            if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7; +                            doneNP = (!doneN) || (!doneP); +                            if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7; +                            if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7; +/*--------------------------------------------------------------------------*/ +    #if (FXAA_QUALITY__PS > 8) +    if(doneNP) { +        if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +        if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +        if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +        if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +        doneN = abs(lumaEndN) >= gradientScaled; +        doneP = abs(lumaEndP) >= gradientScaled; +        if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8; +        if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8; +        doneNP = (!doneN) || (!doneP); +        if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8; +        if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8; +/*--------------------------------------------------------------------------*/ +        #if (FXAA_QUALITY__PS > 9) +        if(doneNP) { +            if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +            if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +            if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +            if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +            doneN = abs(lumaEndN) >= gradientScaled; +            doneP = abs(lumaEndP) >= gradientScaled; +            if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9; +            if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9; +            doneNP = (!doneN) || (!doneP); +            if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9; +            if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9; +/*--------------------------------------------------------------------------*/ +            #if (FXAA_QUALITY__PS > 10) +            if(doneNP) { +                if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                doneN = abs(lumaEndN) >= gradientScaled; +                doneP = abs(lumaEndP) >= gradientScaled; +                if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10; +                if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10; +                doneNP = (!doneN) || (!doneP); +                if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10; +                if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10; +/*--------------------------------------------------------------------------*/ +                #if (FXAA_QUALITY__PS > 11) +                if(doneNP) { +                    if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                    if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                    if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                    if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                    doneN = abs(lumaEndN) >= gradientScaled; +                    doneP = abs(lumaEndP) >= gradientScaled; +                    if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11; +                    if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11; +                    doneNP = (!doneN) || (!doneP); +                    if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11; +                    if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11; +/*--------------------------------------------------------------------------*/ +                    #if (FXAA_QUALITY__PS > 12) +                    if(doneNP) { +                        if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy)); +                        if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy)); +                        if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; +                        if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; +                        doneN = abs(lumaEndN) >= gradientScaled; +                        doneP = abs(lumaEndP) >= gradientScaled; +                        if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12; +                        if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12; +                        doneNP = (!doneN) || (!doneP); +                        if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12; +                        if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12; +/*--------------------------------------------------------------------------*/ +                    } +                    #endif +/*--------------------------------------------------------------------------*/ +                } +                #endif +/*--------------------------------------------------------------------------*/ +            } +            #endif +/*--------------------------------------------------------------------------*/ +        } +        #endif +/*--------------------------------------------------------------------------*/ +    } +    #endif +/*--------------------------------------------------------------------------*/ +                        } +                        #endif +/*--------------------------------------------------------------------------*/ +                    } +                    #endif +/*--------------------------------------------------------------------------*/ +                } +                #endif +/*--------------------------------------------------------------------------*/ +            } +            #endif +/*--------------------------------------------------------------------------*/ +        } +        #endif +/*--------------------------------------------------------------------------*/ +    } +/*--------------------------------------------------------------------------*/ +    FxaaFloat dstN = posM.x - posN.x; +    FxaaFloat dstP = posP.x - posM.x; +    if(!horzSpan) dstN = posM.y - posN.y; +    if(!horzSpan) dstP = posP.y - posM.y; +/*--------------------------------------------------------------------------*/ +    FxaaBool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero; +    FxaaFloat spanLength = (dstP + dstN); +    FxaaBool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero; +    FxaaFloat spanLengthRcp = 1.0/spanLength; +/*--------------------------------------------------------------------------*/ +    FxaaBool directionN = dstN < dstP; +    FxaaFloat dst = min(dstN, dstP); +    FxaaBool goodSpan = directionN ? goodSpanN : goodSpanP; +    FxaaFloat subpixG = subpixF * subpixF; +    FxaaFloat pixelOffset = (dst * (-spanLengthRcp)) + 0.5; +    FxaaFloat subpixH = subpixG * fxaaQualitySubpix; +/*--------------------------------------------------------------------------*/ +    FxaaFloat pixelOffsetGood = goodSpan ? pixelOffset : 0.0; +    FxaaFloat pixelOffsetSubpix = max(pixelOffsetGood, subpixH); +    if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign; +    if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign; +    #if (FXAA_DISCARD == 1) +        return FxaaTexTop(tex, posM); +    #else +        return FxaaFloat4(FxaaTexTop(tex, posM).xyz, lumaM); +    #endif +} +/*==========================================================================*/ +#endif + + + + +/*============================================================================ + +                         FXAA3 CONSOLE - PC VERSION +                          +------------------------------------------------------------------------------ +Instead of using this on PC, I'd suggest just using FXAA Quality with +    #define FXAA_QUALITY__PRESET 10 +Or  +    #define FXAA_QUALITY__PRESET 20 +Either are higher qualilty and almost as fast as this on modern PC GPUs. +============================================================================*/ +#if (FXAA_PC_CONSOLE == 1) +/*--------------------------------------------------------------------------*/ +FxaaFloat4 FxaaPixelShader( +    // See FXAA Quality FxaaPixelShader() source for docs on Inputs! +    FxaaFloat2 pos, +    FxaaFloat4 fxaaConsolePosPos, +    FxaaTex tex, +    FxaaTex fxaaConsole360TexExpBiasNegOne, +    FxaaTex fxaaConsole360TexExpBiasNegTwo, +    FxaaFloat2 fxaaQualityRcpFrame, +    FxaaFloat4 fxaaConsoleRcpFrameOpt, +    FxaaFloat4 fxaaConsoleRcpFrameOpt2, +    FxaaFloat4 fxaaConsole360RcpFrameOpt2, +    FxaaFloat fxaaQualitySubpix, +    FxaaFloat fxaaQualityEdgeThreshold, +    FxaaFloat fxaaQualityEdgeThresholdMin, +    FxaaFloat fxaaConsoleEdgeSharpness, +    FxaaFloat fxaaConsoleEdgeThreshold, +    FxaaFloat fxaaConsoleEdgeThresholdMin, +    FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaNw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xy)); +    FxaaFloat lumaSw = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.xw)); +    FxaaFloat lumaNe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zy)); +    FxaaFloat lumaSe = FxaaLuma(FxaaTexTop(tex, fxaaConsolePosPos.zw)); +/*--------------------------------------------------------------------------*/ +    FxaaFloat4 rgbyM = FxaaTexTop(tex, pos.xy); +    #if (FXAA_GREEN_AS_LUMA == 0) +        FxaaFloat lumaM = rgbyM.w; +    #else +        FxaaFloat lumaM = rgbyM.y; +    #endif +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaMaxNwSw = max(lumaNw, lumaSw); +    lumaNe += 1.0/384.0; +    FxaaFloat lumaMinNwSw = min(lumaNw, lumaSw); +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaMaxNeSe = max(lumaNe, lumaSe); +    FxaaFloat lumaMinNeSe = min(lumaNe, lumaSe); +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaMax = max(lumaMaxNeSe, lumaMaxNwSw); +    FxaaFloat lumaMin = min(lumaMinNeSe, lumaMinNwSw); +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaMaxScaled = lumaMax * fxaaConsoleEdgeThreshold; +/*--------------------------------------------------------------------------*/ +    FxaaFloat lumaMinM = min(lumaMin, lumaM); +    FxaaFloat lumaMaxScaledClamped = max(fxaaConsoleEdgeThresholdMin, lumaMaxScaled); +    FxaaFloat lumaMaxM = max(lumaMax, lumaM); +    FxaaFloat dirSwMinusNe = lumaSw - lumaNe; +    FxaaFloat lumaMaxSubMinM = lumaMaxM - lumaMinM; +    FxaaFloat dirSeMinusNw = lumaSe - lumaNw; +    if(lumaMaxSubMinM < lumaMaxScaledClamped) return rgbyM; +/*--------------------------------------------------------------------------*/ +    FxaaFloat2 dir; +    dir.x = dirSwMinusNe + dirSeMinusNw; +    dir.y = dirSwMinusNe - dirSeMinusNw; +/*--------------------------------------------------------------------------*/ +    FxaaFloat2 dir1 = normalize(dir.xy); +    FxaaFloat4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * fxaaConsoleRcpFrameOpt.zw); +    FxaaFloat4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * fxaaConsoleRcpFrameOpt.zw); +/*--------------------------------------------------------------------------*/ +    FxaaFloat dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * fxaaConsoleEdgeSharpness; +    FxaaFloat2 dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0); +/*--------------------------------------------------------------------------*/ +    FxaaFloat4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * fxaaConsoleRcpFrameOpt2.zw); +    FxaaFloat4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * fxaaConsoleRcpFrameOpt2.zw); +/*--------------------------------------------------------------------------*/ +    FxaaFloat4 rgbyA = rgbyN1 + rgbyP1; +    FxaaFloat4 rgbyB = ((rgbyN2 + rgbyP2) * 0.25) + (rgbyA * 0.25); +/*--------------------------------------------------------------------------*/ +    #if (FXAA_GREEN_AS_LUMA == 0) +        FxaaBool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax); +    #else +        FxaaBool twoTap = (rgbyB.y < lumaMin) || (rgbyB.y > lumaMax); +    #endif +    if(twoTap) rgbyB.xyz = rgbyA.xyz * 0.5; +    return rgbyB; } +/*==========================================================================*/ +#endif + + + +/*============================================================================ + +                      FXAA3 CONSOLE - 360 PIXEL SHADER  + +------------------------------------------------------------------------------ +This optimized version thanks to suggestions from Andy Luedke. +Should be fully tex bound in all cases. +As of the FXAA 3.11 release, I have still not tested this code, +however I fixed a bug which was in both FXAA 3.9 and FXAA 3.10. +And note this is replacing the old unoptimized version. +If it does not work, please let me know so I can fix it. +============================================================================*/ +#if (FXAA_360 == 1) +/*--------------------------------------------------------------------------*/ +[reduceTempRegUsage(4)] +float4 FxaaPixelShader( +    // See FXAA Quality FxaaPixelShader() source for docs on Inputs! +    FxaaFloat2 pos, +    FxaaFloat4 fxaaConsolePosPos, +    FxaaTex tex, +    FxaaTex fxaaConsole360TexExpBiasNegOne, +    FxaaTex fxaaConsole360TexExpBiasNegTwo, +    FxaaFloat2 fxaaQualityRcpFrame, +    FxaaFloat4 fxaaConsoleRcpFrameOpt, +    FxaaFloat4 fxaaConsoleRcpFrameOpt2, +    FxaaFloat4 fxaaConsole360RcpFrameOpt2, +    FxaaFloat fxaaQualitySubpix, +    FxaaFloat fxaaQualityEdgeThreshold, +    FxaaFloat fxaaQualityEdgeThresholdMin, +    FxaaFloat fxaaConsoleEdgeSharpness, +    FxaaFloat fxaaConsoleEdgeThreshold, +    FxaaFloat fxaaConsoleEdgeThresholdMin, +    FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +    float4 lumaNwNeSwSe; +    #if (FXAA_GREEN_AS_LUMA == 0) +        asm {  +            tfetch2D lumaNwNeSwSe.w___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false +            tfetch2D lumaNwNeSwSe._w__, tex, pos.xy, OffsetX =  0.5, OffsetY = -0.5, UseComputedLOD=false +            tfetch2D lumaNwNeSwSe.__w_, tex, pos.xy, OffsetX = -0.5, OffsetY =  0.5, UseComputedLOD=false +            tfetch2D lumaNwNeSwSe.___w, tex, pos.xy, OffsetX =  0.5, OffsetY =  0.5, UseComputedLOD=false +        }; +    #else +        asm {  +            tfetch2D lumaNwNeSwSe.y___, tex, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false +            tfetch2D lumaNwNeSwSe._y__, tex, pos.xy, OffsetX =  0.5, OffsetY = -0.5, UseComputedLOD=false +            tfetch2D lumaNwNeSwSe.__y_, tex, pos.xy, OffsetX = -0.5, OffsetY =  0.5, UseComputedLOD=false +            tfetch2D lumaNwNeSwSe.___y, tex, pos.xy, OffsetX =  0.5, OffsetY =  0.5, UseComputedLOD=false +        }; +    #endif +/*--------------------------------------------------------------------------*/ +    lumaNwNeSwSe.y += 1.0/384.0; +    float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw); +    float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw); +    float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y); +    float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y); +/*--------------------------------------------------------------------------*/ +    float4 rgbyM = tex2Dlod(tex, float4(pos.xy, 0.0, 0.0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        float lumaMinM = min(lumaMin, rgbyM.w); +        float lumaMaxM = max(lumaMax, rgbyM.w); +    #else +        float lumaMinM = min(lumaMin, rgbyM.y); +        float lumaMaxM = max(lumaMax, rgbyM.y); +    #endif         +    if((lumaMaxM - lumaMinM) < max(fxaaConsoleEdgeThresholdMin, lumaMax * fxaaConsoleEdgeThreshold)) return rgbyM; +/*--------------------------------------------------------------------------*/ +    float2 dir; +    dir.x = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.yyxx); +    dir.y = dot(lumaNwNeSwSe, fxaaConsole360ConstDir.xyxy); +    dir = normalize(dir); +/*--------------------------------------------------------------------------*/ +    float4 dir1 = dir.xyxy * fxaaConsoleRcpFrameOpt.xyzw; +/*--------------------------------------------------------------------------*/ +    float4 dir2; +    float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y)) * fxaaConsoleEdgeSharpness; +    dir2 = saturate(fxaaConsole360ConstDir.zzww * dir.xyxy / dirAbsMinTimesC + 0.5); +    dir2 = dir2 * fxaaConsole360RcpFrameOpt2.xyxy + fxaaConsole360RcpFrameOpt2.zwzw; +/*--------------------------------------------------------------------------*/ +    float4 rgbyN1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.xy, 0.0, 0.0)); +    float4 rgbyP1 = tex2Dlod(fxaaConsole360TexExpBiasNegOne, float4(pos.xy + dir1.zw, 0.0, 0.0)); +    float4 rgbyN2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.xy, 0.0, 0.0)); +    float4 rgbyP2 = tex2Dlod(fxaaConsole360TexExpBiasNegTwo, float4(pos.xy + dir2.zw, 0.0, 0.0)); +/*--------------------------------------------------------------------------*/ +    float4 rgbyA = rgbyN1 + rgbyP1; +    float4 rgbyB = rgbyN2 + rgbyP2 * 0.5 + rgbyA; +/*--------------------------------------------------------------------------*/ +    float4 rgbyR = ((rgbyB.w - lumaMax) > 0.0) ? rgbyA : rgbyB; +    rgbyR = ((rgbyB.w - lumaMin) > 0.0) ? rgbyR : rgbyA; +    return rgbyR; } +/*==========================================================================*/ +#endif + + + +/*============================================================================ + +         FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT) + +============================================================================== +The code below does not exactly match the assembly. +I have a feeling that 12 cycles is possible, but was not able to get there. +Might have to increase register count to get full performance. +Note this shader does not use perspective interpolation. + +Use the following cgc options, + +  --fenable-bx2 --fastmath --fastprecision --nofloatbindings + +------------------------------------------------------------------------------ +                             NVSHADERPERF OUTPUT +------------------------------------------------------------------------------ +For reference and to aid in debug, output of NVShaderPerf should match this, + +Shader to schedule: +  0: texpkb h0.w(TRUE), v5.zyxx, #0 +  2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x +  4: texpkb h0.w(TRUE), v5.xwxx, #0 +  6: addh h0.z(TRUE), -h2, h0.w +  7: texpkb h1.w(TRUE), v5, #0 +  9: addh h0.x(TRUE), h0.z, -h1.w + 10: addh h3.w(TRUE), h0.z, h1 + 11: texpkb h2.w(TRUE), v5.zwzz, #0 + 13: addh h0.z(TRUE), h3.w, -h2.w + 14: addh h0.x(TRUE), h2.w, h0 + 15: nrmh h1.xz(TRUE), h0_n + 16: minh_m8 h0.x(TRUE), |h1|, |h1.z| + 17: maxh h4.w(TRUE), h0, h1 + 18: divx h2.xy(TRUE), h1_n.xzzw, h0_n + 19: movr r1.zw(TRUE), v4.xxxy + 20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww + 22: minh h5.w(TRUE), h0, h1 + 23: texpkb h0(TRUE), r2.xzxx, #0 + 25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1 + 27: maxh h4.x(TRUE), h2.z, h2.w + 28: texpkb h1(TRUE), r0.zwzz, #0 + 30: addh_d2 h1(TRUE), h0, h1 + 31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 33: texpkb h0(TRUE), r0, #0 + 35: minh h4.z(TRUE), h2, h2.w + 36: fenct TRUE + 37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 39: texpkb h2(TRUE), r1, #0 + 41: addh_d2 h0(TRUE), h0, h2 + 42: maxh h2.w(TRUE), h4, h4.x + 43: minh h2.x(TRUE), h5.w, h4.z + 44: addh_d2 h0(TRUE), h0, h1 + 45: slth h2.x(TRUE), h0.w, h2 + 46: sgth h2.w(TRUE), h0, h2 + 47: movh h0(TRUE), h0 + 48: addx.c0 rc(TRUE), h2, h2.w + 49: movh h0(c0.NE.x), h1 + +IPU0 ------ Simplified schedule: -------- +Pass |  Unit  |  uOp |  PC:  Op +-----+--------+------+------------------------- +   1 | SCT0/1 |  mov |   0:  TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; +     |    TEX |  txl |   0:  TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; +     |   SCB1 |  add |   2:  ADDh h2.z, h0.--w-, const.--x-; +     |        |      | +   2 | SCT0/1 |  mov |   4:  TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0; +     |    TEX |  txl |   4:  TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0; +     |   SCB1 |  add |   6:  ADDh h0.z,-h2, h0.--w-; +     |        |      | +   3 | SCT0/1 |  mov |   7:  TXLr h1.w, g[TEX1], const.xxxx, TEX0; +     |    TEX |  txl |   7:  TXLr h1.w, g[TEX1], const.xxxx, TEX0; +     |   SCB0 |  add |   9:  ADDh h0.x, h0.z---,-h1.w---; +     |   SCB1 |  add |  10:  ADDh h3.w, h0.---z, h1; +     |        |      | +   4 | SCT0/1 |  mov |  11:  TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; +     |    TEX |  txl |  11:  TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; +     |   SCB0 |  add |  14:  ADDh h0.x, h2.w---, h0; +     |   SCB1 |  add |  13:  ADDh h0.z, h3.--w-,-h2.--w-; +     |        |      | +   5 |   SCT1 |  mov |  15:  NRMh h1.xz, h0; +     |    SRB |  nrm |  15:  NRMh h1.xz, h0; +     |   SCB0 |  min |  16:  MINh*8 h0.x, |h1|, |h1.z---|; +     |   SCB1 |  max |  17:  MAXh h4.w, h0, h1; +     |        |      | +   6 |   SCT0 |  div |  18:  DIVx h2.xy, h1.xz--, h0; +     |   SCT1 |  mov |  19:  MOVr r1.zw, g[TEX0].--xy; +     |   SCB0 |  mad |  20:  MADr r2.xz,-h1, const.z-w-, r1.z-w-; +     |   SCB1 |  min |  22:  MINh h5.w, h0, h1; +     |        |      | +   7 | SCT0/1 |  mov |  23:  TXLr h0, r2.xzxx, const.xxxx, TEX0; +     |    TEX |  txl |  23:  TXLr h0, r2.xzxx, const.xxxx, TEX0; +     |   SCB0 |  max |  27:  MAXh h4.x, h2.z---, h2.w---; +     |   SCB1 |  mad |  25:  MADr r0.zw, h1.--xz, const, r1; +     |        |      | +   8 | SCT0/1 |  mov |  28:  TXLr h1, r0.zwzz, const.xxxx, TEX0; +     |    TEX |  txl |  28:  TXLr h1, r0.zwzz, const.xxxx, TEX0; +     | SCB0/1 |  add |  30:  ADDh/2 h1, h0, h1; +     |        |      | +   9 |   SCT0 |  mad |  31:  MADr r0.xy,-h2, const.xy--, r1.zw--; +     |   SCT1 |  mov |  33:  TXLr h0, r0, const.zzzz, TEX0; +     |    TEX |  txl |  33:  TXLr h0, r0, const.zzzz, TEX0; +     |   SCB1 |  min |  35:  MINh h4.z, h2, h2.--w-; +     |        |      | +  10 |   SCT0 |  mad |  37:  MADr r1.xy, h2, const.xy--, r1.zw--; +     |   SCT1 |  mov |  39:  TXLr h2, r1, const.zzzz, TEX0; +     |    TEX |  txl |  39:  TXLr h2, r1, const.zzzz, TEX0; +     | SCB0/1 |  add |  41:  ADDh/2 h0, h0, h2; +     |        |      | +  11 |   SCT0 |  min |  43:  MINh h2.x, h5.w---, h4.z---; +     |   SCT1 |  max |  42:  MAXh h2.w, h4, h4.---x; +     | SCB0/1 |  add |  44:  ADDh/2 h0, h0, h1; +     |        |      | +  12 |   SCT0 |  set |  45:  SLTh h2.x, h0.w---, h2; +     |   SCT1 |  set |  46:  SGTh h2.w, h0, h2; +     | SCB0/1 |  mul |  47:  MOVh h0, h0; +     |        |      | +  13 |   SCT0 |  mad |  48:  ADDxc0_s rc, h2, h2.w---; +     | SCB0/1 |  mul |  49:  MOVh h0(NE0.xxxx), h1; +Pass   SCT  TEX  SCB +  1:   0% 100%  25% +  2:   0% 100%  25% +  3:   0% 100%  50% +  4:   0% 100%  50% +  5:   0%   0%  50% +  6: 100%   0%  75% +  7:   0% 100%  75% +  8:   0% 100% 100% +  9:   0% 100%  25% + 10:   0% 100% 100% + 11:  50%   0% 100% + 12:  50%   0% 100% + 13:  25%   0% 100% +MEAN:  17%  61%  67% -#extension GL_ARB_texture_rectangle : enable +Pass   SCT0  SCT1   TEX  SCB0  SCB1 +  1:    0%    0%  100%    0%  100% +  2:    0%    0%  100%    0%  100% +  3:    0%    0%  100%  100%  100% +  4:    0%    0%  100%  100%  100% +  5:    0%    0%    0%  100%  100% +  6:  100%  100%    0%  100%  100% +  7:    0%    0%  100%  100%  100% +  8:    0%    0%  100%  100%  100% +  9:    0%    0%  100%    0%  100% + 10:    0%    0%  100%  100%  100% + 11:  100%  100%    0%  100%  100% + 12:  100%  100%    0%  100%  100% + 13:  100%    0%    0%  100%  100% + +MEAN:   30%   23%   61%   76%  100% +Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5 +Results 13 cycles, 3 r regs, 923,076,923 pixels/s +============================================================================*/ +#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0) +/*--------------------------------------------------------------------------*/ +#pragma regcount 7 +#pragma disablepc all +#pragma option O3 +#pragma option OutColorPrec=fp16 +#pragma texformat default RGBA8 +/*==========================================================================*/ +half4 FxaaPixelShader( +    // See FXAA Quality FxaaPixelShader() source for docs on Inputs! +    FxaaFloat2 pos, +    FxaaFloat4 fxaaConsolePosPos, +    FxaaTex tex, +    FxaaTex fxaaConsole360TexExpBiasNegOne, +    FxaaTex fxaaConsole360TexExpBiasNegTwo, +    FxaaFloat2 fxaaQualityRcpFrame, +    FxaaFloat4 fxaaConsoleRcpFrameOpt, +    FxaaFloat4 fxaaConsoleRcpFrameOpt2, +    FxaaFloat4 fxaaConsole360RcpFrameOpt2, +    FxaaFloat fxaaQualitySubpix, +    FxaaFloat fxaaQualityEdgeThreshold, +    FxaaFloat fxaaQualityEdgeThresholdMin, +    FxaaFloat fxaaConsoleEdgeSharpness, +    FxaaFloat fxaaConsoleEdgeThreshold, +    FxaaFloat fxaaConsoleEdgeThresholdMin, +    FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +// (1) +    half4 dir; +    half4 lumaNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        lumaNe.w += half(1.0/512.0); +        dir.x = -lumaNe.w; +        dir.z = -lumaNe.w; +    #else +        lumaNe.y += half(1.0/512.0); +        dir.x = -lumaNe.y; +        dir.z = -lumaNe.y; +    #endif +/*--------------------------------------------------------------------------*/ +// (2) +    half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        dir.x += lumaSw.w; +        dir.z += lumaSw.w; +    #else +        dir.x += lumaSw.y; +        dir.z += lumaSw.y; +    #endif         +/*--------------------------------------------------------------------------*/ +// (3) +    half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        dir.x -= lumaNw.w; +        dir.z += lumaNw.w; +    #else +        dir.x -= lumaNw.y; +        dir.z += lumaNw.y; +    #endif +/*--------------------------------------------------------------------------*/ +// (4) +    half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        dir.x += lumaSe.w; +        dir.z -= lumaSe.w; +    #else +        dir.x += lumaSe.y; +        dir.z -= lumaSe.y; +    #endif +/*--------------------------------------------------------------------------*/ +// (5) +    half4 dir1_pos; +    dir1_pos.xy = normalize(dir.xyz).xz; +    half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS); +/*--------------------------------------------------------------------------*/ +// (6) +    half4 dir2_pos; +    dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0)); +    dir1_pos.zw = pos.xy; +    dir2_pos.zw = pos.xy; +    half4 temp1N; +    temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; +/*--------------------------------------------------------------------------*/ +// (7) +    temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0)); +    half4 rgby1; +    rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; +/*--------------------------------------------------------------------------*/ +// (8) +    rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0)); +    rgby1 = (temp1N + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (9) +    half4 temp2N; +    temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; +    temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0)); +/*--------------------------------------------------------------------------*/ +// (10) +    half4 rgby2; +    rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; +    rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0)); +    rgby2 = (temp2N + rgby2) * 0.5; +/*--------------------------------------------------------------------------*/ +// (11) +    // compilier moves these scalar ops up to other cycles +    #if (FXAA_GREEN_AS_LUMA == 0) +        half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w)); +        half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w)); +    #else +        half lumaMin = min(min(lumaNw.y, lumaSw.y), min(lumaNe.y, lumaSe.y)); +        half lumaMax = max(max(lumaNw.y, lumaSw.y), max(lumaNe.y, lumaSe.y)); +    #endif         +    rgby2 = (rgby2 + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (12) +    #if (FXAA_GREEN_AS_LUMA == 0) +        bool twoTapLt = rgby2.w < lumaMin; +        bool twoTapGt = rgby2.w > lumaMax; +    #else +        bool twoTapLt = rgby2.y < lumaMin; +        bool twoTapGt = rgby2.y > lumaMax; +    #endif +/*--------------------------------------------------------------------------*/ +// (13) +    if(twoTapLt || twoTapGt) rgby2 = rgby1; +/*--------------------------------------------------------------------------*/ +    return rgby2; } +/*==========================================================================*/ +#endif + + + +/*============================================================================ + +       FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT) + +============================================================================== +The code mostly matches the assembly. +I have a feeling that 14 cycles is possible, but was not able to get there. +Might have to increase register count to get full performance. +Note this shader does not use perspective interpolation. + +Use the following cgc options, + + --fenable-bx2 --fastmath --fastprecision --nofloatbindings + +Use of FXAA_GREEN_AS_LUMA currently adds a cycle (16 clks). +Will look at fixing this for FXAA 3.12. +------------------------------------------------------------------------------ +                             NVSHADERPERF OUTPUT +------------------------------------------------------------------------------ +For reference and to aid in debug, output of NVShaderPerf should match this, + +Shader to schedule: +  0: texpkb h0.w(TRUE), v5.zyxx, #0 +  2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x +  4: texpkb h1.w(TRUE), v5.xwxx, #0 +  6: addh h0.x(TRUE), h1.w, -h2.y +  7: texpkb h2.w(TRUE), v5.zwzz, #0 +  9: minh h4.w(TRUE), h2.y, h2 + 10: maxh h5.x(TRUE), h2.y, h2.w + 11: texpkb h0.w(TRUE), v5, #0 + 13: addh h3.w(TRUE), -h0, h0.x + 14: addh h0.x(TRUE), h0.w, h0 + 15: addh h0.z(TRUE), -h2.w, h0.x + 16: addh h0.x(TRUE), h2.w, h3.w + 17: minh h5.y(TRUE), h0.w, h1.w + 18: nrmh h2.xz(TRUE), h0_n + 19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z| + 20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w + 21: movr r1.zw(TRUE), v4.xxxy + 22: maxh h2.w(TRUE), h0, h1 + 23: fenct TRUE + 24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz + 26: texpkb h0(TRUE), r0, #0 + 28: maxh h5.x(TRUE), h2.w, h5 + 29: minh h5.w(TRUE), h5.y, h4 + 30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz + 32: texpkb h2(TRUE), r1, #0 + 34: addh_d2 h2(TRUE), h0, h2 + 35: texpkb h1(TRUE), v4, #0 + 37: maxh h5.y(TRUE), h5.x, h1.w + 38: minh h4.w(TRUE), h1, h5 + 39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 41: texpkb h0(TRUE), r0, #0 + 43: addh_m8 h5.z(TRUE), h5.y, -h4.w + 44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz + 46: texpkb h3(TRUE), r2, #0 + 48: addh_d2 h0(TRUE), h0, h3 + 49: addh_d2 h3(TRUE), h0, h2 + 50: movh h0(TRUE), h3 + 51: slth h3.x(TRUE), h3.w, h5.w + 52: sgth h3.w(TRUE), h3, h5.x + 53: addx.c0 rc(TRUE), h3.x, h3 + 54: slth.c0 rc(TRUE), h5.z, h5 + 55: movh h0(c0.NE.w), h2 + 56: movh h0(c0.NE.x), h1 + +IPU0 ------ Simplified schedule: -------- +Pass |  Unit  |  uOp |  PC:  Op +-----+--------+------+------------------------- +   1 | SCT0/1 |  mov |   0:  TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; +     |    TEX |  txl |   0:  TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0; +     |   SCB0 |  add |   2:  ADDh h2.y, h0.-w--, const.-x--; +     |        |      | +   2 | SCT0/1 |  mov |   4:  TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0; +     |    TEX |  txl |   4:  TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0; +     |   SCB0 |  add |   6:  ADDh h0.x, h1.w---,-h2.y---; +     |        |      | +   3 | SCT0/1 |  mov |   7:  TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; +     |    TEX |  txl |   7:  TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0; +     |   SCB0 |  max |  10:  MAXh h5.x, h2.y---, h2.w---; +     |   SCB1 |  min |   9:  MINh h4.w, h2.---y, h2; +     |        |      | +   4 | SCT0/1 |  mov |  11:  TXLr h0.w, g[TEX1], const.xxxx, TEX0; +     |    TEX |  txl |  11:  TXLr h0.w, g[TEX1], const.xxxx, TEX0; +     |   SCB0 |  add |  14:  ADDh h0.x, h0.w---, h0; +     |   SCB1 |  add |  13:  ADDh h3.w,-h0, h0.---x; +     |        |      | +   5 |   SCT0 |  mad |  16:  ADDh h0.x, h2.w---, h3.w---; +     |   SCT1 |  mad |  15:  ADDh h0.z,-h2.--w-, h0.--x-; +     |   SCB0 |  min |  17:  MINh h5.y, h0.-w--, h1.-w--; +     |        |      | +   6 |   SCT1 |  mov |  18:  NRMh h2.xz, h0; +     |    SRB |  nrm |  18:  NRMh h2.xz, h0; +     |   SCB1 |  min |  19:  MINh*8 h2.w, |h2.---x|, |h2.---z|; +     |        |      | +   7 |   SCT0 |  div |  20:  DIVx h4.xy, h2.xz--, h2.ww--; +     |   SCT1 |  mov |  21:  MOVr r1.zw, g[TEX0].--xy; +     |   SCB1 |  max |  22:  MAXh h2.w, h0, h1; +     |        |      | +   8 |   SCT0 |  mad |  24:  MADr r0.xy,-h2.xz--, const.zw--, r1.zw--; +     |   SCT1 |  mov |  26:  TXLr h0, r0, const.xxxx, TEX0; +     |    TEX |  txl |  26:  TXLr h0, r0, const.xxxx, TEX0; +     |   SCB0 |  max |  28:  MAXh h5.x, h2.w---, h5; +     |   SCB1 |  min |  29:  MINh h5.w, h5.---y, h4; +     |        |      | +   9 |   SCT0 |  mad |  30:  MADr r1.xy, h2.xz--, const.zw--, r1.zw--; +     |   SCT1 |  mov |  32:  TXLr h2, r1, const.xxxx, TEX0; +     |    TEX |  txl |  32:  TXLr h2, r1, const.xxxx, TEX0; +     | SCB0/1 |  add |  34:  ADDh/2 h2, h0, h2; +     |        |      | +  10 | SCT0/1 |  mov |  35:  TXLr h1, g[TEX0], const.xxxx, TEX0; +     |    TEX |  txl |  35:  TXLr h1, g[TEX0], const.xxxx, TEX0; +     |   SCB0 |  max |  37:  MAXh h5.y, h5.-x--, h1.-w--; +     |   SCB1 |  min |  38:  MINh h4.w, h1, h5; +     |        |      | +  11 |   SCT0 |  mad |  39:  MADr r0.xy,-h4, const.xy--, r1.zw--; +     |   SCT1 |  mov |  41:  TXLr h0, r0, const.zzzz, TEX0; +     |    TEX |  txl |  41:  TXLr h0, r0, const.zzzz, TEX0; +     |   SCB0 |  mad |  44:  MADr r2.xy, h4, const.xy--, r1.zw--; +     |   SCB1 |  add |  43:  ADDh*8 h5.z, h5.--y-,-h4.--w-; +     |        |      | +  12 | SCT0/1 |  mov |  46:  TXLr h3, r2, const.xxxx, TEX0; +     |    TEX |  txl |  46:  TXLr h3, r2, const.xxxx, TEX0; +     | SCB0/1 |  add |  48:  ADDh/2 h0, h0, h3; +     |        |      | +  13 | SCT0/1 |  mad |  49:  ADDh/2 h3, h0, h2; +     | SCB0/1 |  mul |  50:  MOVh h0, h3; +     |        |      | +  14 |   SCT0 |  set |  51:  SLTh h3.x, h3.w---, h5.w---; +     |   SCT1 |  set |  52:  SGTh h3.w, h3, h5.---x; +     |   SCB0 |  set |  54:  SLThc0 rc, h5.z---, h5; +     |   SCB1 |  add |  53:  ADDxc0_s rc, h3.---x, h3; +     |        |      | +  15 | SCT0/1 |  mul |  55:  MOVh h0(NE0.wwww), h2; +     | SCB0/1 |  mul |  56:  MOVh h0(NE0.xxxx), h1; +  +Pass   SCT  TEX  SCB +  1:   0% 100%  25% +  2:   0% 100%  25% +  3:   0% 100%  50% +  4:   0% 100%  50% +  5:  50%   0%  25% +  6:   0%   0%  25% +  7: 100%   0%  25% +  8:   0% 100%  50% +  9:   0% 100% 100% + 10:   0% 100%  50% + 11:   0% 100%  75% + 12:   0% 100% 100% + 13: 100%   0% 100% + 14:  50%   0%  50% + 15: 100%   0% 100% + +MEAN:  26%  60%  56% + +Pass   SCT0  SCT1   TEX  SCB0  SCB1 +  1:    0%    0%  100%  100%    0% +  2:    0%    0%  100%  100%    0% +  3:    0%    0%  100%  100%  100% +  4:    0%    0%  100%  100%  100% +  5:  100%  100%    0%  100%    0% +  6:    0%    0%    0%    0%  100% +  7:  100%  100%    0%    0%  100% +  8:    0%    0%  100%  100%  100% +  9:    0%    0%  100%  100%  100% + 10:    0%    0%  100%  100%  100% + 11:    0%    0%  100%  100%  100% + 12:    0%    0%  100%  100%  100% + 13:  100%  100%    0%  100%  100% + 14:  100%  100%    0%  100%  100% + 15:  100%  100%    0%  100%  100% + +MEAN:   33%   33%   60%   86%   80% +Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5 +Results 15 cycles, 3 r regs, 800,000,000 pixels/s +============================================================================*/ +#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1) +/*--------------------------------------------------------------------------*/ +#pragma regcount 7 +#pragma disablepc all +#pragma option O2 +#pragma option OutColorPrec=fp16 +#pragma texformat default RGBA8 +/*==========================================================================*/ +half4 FxaaPixelShader( +    // See FXAA Quality FxaaPixelShader() source for docs on Inputs! +    FxaaFloat2 pos, +    FxaaFloat4 fxaaConsolePosPos, +    FxaaTex tex, +    FxaaTex fxaaConsole360TexExpBiasNegOne, +    FxaaTex fxaaConsole360TexExpBiasNegTwo, +    FxaaFloat2 fxaaQualityRcpFrame, +    FxaaFloat4 fxaaConsoleRcpFrameOpt, +    FxaaFloat4 fxaaConsoleRcpFrameOpt2, +    FxaaFloat4 fxaaConsole360RcpFrameOpt2, +    FxaaFloat fxaaQualitySubpix, +    FxaaFloat fxaaQualityEdgeThreshold, +    FxaaFloat fxaaQualityEdgeThresholdMin, +    FxaaFloat fxaaConsoleEdgeSharpness, +    FxaaFloat fxaaConsoleEdgeThreshold, +    FxaaFloat fxaaConsoleEdgeThresholdMin, +    FxaaFloat4 fxaaConsole360ConstDir +) { +/*--------------------------------------------------------------------------*/ +// (1) +    half4 rgbyNe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zy, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        half lumaNe = rgbyNe.w + half(1.0/512.0); +    #else +        half lumaNe = rgbyNe.y + half(1.0/512.0); +    #endif +/*--------------------------------------------------------------------------*/ +// (2) +    half4 lumaSw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xw, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        half lumaSwNegNe = lumaSw.w - lumaNe; +    #else +        half lumaSwNegNe = lumaSw.y - lumaNe; +    #endif +/*--------------------------------------------------------------------------*/ +// (3) +    half4 lumaNw = h4tex2Dlod(tex, half4(fxaaConsolePosPos.xy, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        half lumaMaxNwSw = max(lumaNw.w, lumaSw.w); +        half lumaMinNwSw = min(lumaNw.w, lumaSw.w); +    #else +        half lumaMaxNwSw = max(lumaNw.y, lumaSw.y); +        half lumaMinNwSw = min(lumaNw.y, lumaSw.y); +    #endif +/*--------------------------------------------------------------------------*/ +// (4) +    half4 lumaSe = h4tex2Dlod(tex, half4(fxaaConsolePosPos.zw, 0, 0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        half dirZ =  lumaNw.w + lumaSwNegNe; +        half dirX = -lumaNw.w + lumaSwNegNe; +    #else +        half dirZ =  lumaNw.y + lumaSwNegNe; +        half dirX = -lumaNw.y + lumaSwNegNe; +    #endif +/*--------------------------------------------------------------------------*/ +// (5) +    half3 dir; +    dir.y = 0.0; +    #if (FXAA_GREEN_AS_LUMA == 0) +        dir.x =  lumaSe.w + dirX; +        dir.z = -lumaSe.w + dirZ; +        half lumaMinNeSe = min(lumaNe, lumaSe.w); +    #else +        dir.x =  lumaSe.y + dirX; +        dir.z = -lumaSe.y + dirZ; +        half lumaMinNeSe = min(lumaNe, lumaSe.y); +    #endif +/*--------------------------------------------------------------------------*/ +// (6) +    half4 dir1_pos; +    dir1_pos.xy = normalize(dir).xz; +    half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__PS3_EDGE_SHARPNESS); +/*--------------------------------------------------------------------------*/ +// (7) +    half4 dir2_pos; +    dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0)); +    dir1_pos.zw = pos.xy; +    dir2_pos.zw = pos.xy; +    #if (FXAA_GREEN_AS_LUMA == 0) +        half lumaMaxNeSe = max(lumaNe, lumaSe.w); +    #else +        half lumaMaxNeSe = max(lumaNe, lumaSe.y); +    #endif +/*--------------------------------------------------------------------------*/ +// (8) +    half4 temp1N; +    temp1N.xy = dir1_pos.zw - dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; +    temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0)); +    half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe); +    half lumaMin = min(lumaMinNwSw, lumaMinNeSe); +/*--------------------------------------------------------------------------*/ +// (9) +    half4 rgby1; +    rgby1.xy = dir1_pos.zw + dir1_pos.xy * fxaaConsoleRcpFrameOpt.zw; +    rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0)); +    rgby1 = (temp1N + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (10) +    half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0)); +    #if (FXAA_GREEN_AS_LUMA == 0) +        half lumaMaxM = max(lumaMax, rgbyM.w); +        half lumaMinM = min(lumaMin, rgbyM.w); +    #else +        half lumaMaxM = max(lumaMax, rgbyM.y); +        half lumaMinM = min(lumaMin, rgbyM.y); +    #endif +/*--------------------------------------------------------------------------*/ +// (11) +    half4 temp2N; +    temp2N.xy = dir2_pos.zw - dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; +    temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0)); +    half4 rgby2; +    rgby2.xy = dir2_pos.zw + dir2_pos.xy * fxaaConsoleRcpFrameOpt2.zw; +    half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__PS3_EDGE_THRESHOLD; +/*--------------------------------------------------------------------------*/ +// (12) +    rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0)); +    rgby2 = (temp2N + rgby2) * 0.5; +/*--------------------------------------------------------------------------*/ +// (13) +    rgby2 = (rgby2 + rgby1) * 0.5; +/*--------------------------------------------------------------------------*/ +// (14) +    #if (FXAA_GREEN_AS_LUMA == 0) +        bool twoTapLt = rgby2.w < lumaMin; +        bool twoTapGt = rgby2.w > lumaMax; +    #else +        bool twoTapLt = rgby2.y < lumaMin; +        bool twoTapGt = rgby2.y > lumaMax; +    #endif +    bool earlyExit = lumaRangeM < lumaMax; +    bool twoTap = twoTapLt || twoTapGt; +/*--------------------------------------------------------------------------*/ +// (15) +    if(twoTap) rgby2 = rgby1; +    if(earlyExit) rgby2 = rgbyM; +/*--------------------------------------------------------------------------*/ +    return rgby2; } +/*==========================================================================*/ +#endif -uniform sampler2DRect diffuseRect; -uniform sampler2DRect edgeMap; +uniform sampler2D diffuseMap;  uniform sampler2DRect depthMap;  uniform sampler2DRect normalMap; -uniform sampler2D bloomMap; + +uniform vec2 tc_scale; +uniform vec2 rcp_screen_res; +uniform vec4 rcp_frame_opt; +uniform vec4 rcp_frame_opt2; +uniform vec2 screen_res;  uniform float depth_cutoff;  uniform float norm_cutoff; @@ -41,9 +2095,10 @@ uniform float tan_pixel_angle;  uniform float magnification;  uniform mat4 inv_proj; -uniform vec2 screen_res;  varying vec2 vary_fragcoord; +varying vec2 vary_tc; +  float getDepth(vec2 pos_screen)  { @@ -76,8 +2131,8 @@ void dofSampleNear(inout vec4 diff, inout float w, float cur_sc, vec2 tc)  	float sc = calc_cof(d);  	float wg = 0.25; -		 -	vec4 s = texture2DRect(diffuseRect, tc); +	 +	vec4 s = texture2D(diffuseMap, tc*tc_scale/screen_res);  	// de-weight dull areas to make highlights 'pop'  	wg += s.r+s.g+s.b; @@ -97,7 +2152,7 @@ void dofSample(inout vec4 diff, inout float w, float min_sc, float cur_depth, ve  	{  		float wg = 0.25; -		vec4 s = texture2DRect(diffuseRect, tc); +		vec4 s = texture2D(diffuseMap, tc*tc_scale/screen_res);  		// de-weight dull areas to make highlights 'pop'  		wg += s.r+s.g+s.b; @@ -107,7 +2162,6 @@ void dofSample(inout vec4 diff, inout float w, float min_sc, float cur_depth, ve  	}  } -  void main()   {  	vec3 norm = texture2DRect(normalMap, vary_fragcoord.xy).xyz; @@ -117,7 +2171,7 @@ void main()  	float depth = getDepth(tc); -	vec4 diff = texture2DRect(diffuseRect, vary_fragcoord.xy); +	vec4 diff = texture2D(diffuseMap, vary_fragcoord.xy*tc_scale/screen_res);  	{   		float w = 1.0; @@ -131,6 +2185,7 @@ void main()  		// sample quite uniformly spaced points within a circle, for a circular 'bokeh'		  		//if (depth < focal_distance) +		if (sc > 0.5)  		{  			while (sc > 0.5)  			{ @@ -146,10 +2201,30 @@ void main()  				sc -= 1.0;  			}  		} +		else +		{ +				diff =			FxaaPixelShader(vary_tc,			//pos +										vec4(vary_fragcoord.xy, 0, 0), //fxaaConsolePosPos +										diffuseMap,					//tex +										diffuseMap,					 +										diffuseMap, +										rcp_screen_res,				//fxaaQualityRcpFrame +										vec4(0,0,0,0),				//fxaaConsoleRcpFrameOpt +										rcp_frame_opt,				//fxaaConsoleRcpFrameOpt2 +										rcp_frame_opt2,				//fxaaConsole360RcpFrameOpt2 +										0.75,						//fxaaQualitySubpix +										0.166,						//fxaaQualityEdgeThreshold +										0.0833,						//fxaaQualityEdgeThresholdMin +										8.0,						//fxaaConsoleEdgeSharpness +										0.125,						//fxaaConsoleEdgeThreshold +										0.05,						//fxaaConsoleEdgeThresholdMin +										vec4(0,0,0,0));				//fxaaConsole360ConstDir + + +		}  		diff /= w;  	} -	vec4 bloom = texture2D(bloomMap, vary_fragcoord.xy/screen_res); -	gl_FragColor = diff + bloom; +	gl_FragColor = diff;  } diff --git a/indra/newview/app_settings/shaders/class1/interface/debugF.glsl b/indra/newview/app_settings/shaders/class1/interface/debugF.glsl new file mode 100644 index 0000000000..d43bf3fb50 --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/debugF.glsl @@ -0,0 +1,31 @@ +/**  + * @file debugF.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ +  +uniform vec4 color; + +void main()  +{ +	gl_FragColor = color; +} diff --git a/indra/newview/app_settings/shaders/class1/interface/debugV.glsl b/indra/newview/app_settings/shaders/class1/interface/debugV.glsl new file mode 100644 index 0000000000..2f64fdb7bc --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/debugV.glsl @@ -0,0 +1,32 @@ +/**  + * @file debugV.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +attribute vec3 position; + +void main() +{ +	gl_Position = gl_ModelViewProjectionMatrix * vec4(position.xyz, 1.0); +} + diff --git a/indra/newview/app_settings/shaders/class1/interface/splattexturerectF.glsl b/indra/newview/app_settings/shaders/class1/interface/splattexturerectF.glsl new file mode 100644 index 0000000000..c263f4dc6a --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/splattexturerectF.glsl @@ -0,0 +1,33 @@ +/**  + * @file splattexturerectF.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ +  +#extension GL_ARB_texture_rectangle : enable + +uniform sampler2DRect screenMap; + +void main()  +{ +	gl_FragColor = 	texture2DRect(screenMap, gl_TexCoord[0].xy) * gl_Color; +} diff --git a/indra/newview/app_settings/shaders/class1/interface/splattexturerectV.glsl b/indra/newview/app_settings/shaders/class1/interface/splattexturerectV.glsl new file mode 100644 index 0000000000..085970f549 --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/splattexturerectV.glsl @@ -0,0 +1,36 @@ +/**  + * @file splattexturerectV.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + *  + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + *  + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + *  + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + *  + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ +  +attribute vec3 position; +attribute vec2 texcoord0; +attribute vec4 diffuse_color; + +void main() +{ +	gl_Position = gl_ModelViewProjectionMatrix * vec4(position.xyz, 1.0); +	gl_TexCoord[0] = vec4(texcoord0,0,1); +	gl_FrontColor = diffuse_color; +} + diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index b1c7b7f159..d7d5e5f432 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -2876,10 +2876,9 @@ void renderNormals(LLDrawable* drawablep)  		{  			const LLVolumeFace& face = volume->getVolumeFace(i); -			gGL.begin(LLRender::LINES); -			  			for (S32 j = 0; j < face.mNumVertices; ++j)  			{ +				gGL.begin(LLRender::LINES);  				LLVector4a n,p;  				n.setMul(face.mNormals[j], scale); @@ -2898,9 +2897,8 @@ void renderNormals(LLDrawable* drawablep)  					gGL.vertex3fv(face.mPositions[j].getF32ptr());  					gGL.vertex3fv(p.getF32ptr());  				}	 +				gGL.end();  			} - -			gGL.end();  		}  		gGL.popMatrix(); diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index de9d853c7c..9fac986bf1 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -65,9 +65,11 @@ LLVector4			gShinyOrigin;  LLGLSLShader	gOcclusionProgram;  LLGLSLShader	gCustomAlphaProgram;  LLGLSLShader	gGlowCombineProgram; +LLGLSLShader	gSplatTextureRectProgram;  LLGLSLShader	gGlowCombineFXAAProgram;  LLGLSLShader	gTwoTextureAddProgram;  LLGLSLShader	gOneTextureNoColorProgram; +LLGLSLShader	gDebugProgram;  //object shaders  LLGLSLShader		gObjectSimpleProgram; @@ -216,6 +218,7 @@ LLViewerShaderMgr::LLViewerShaderMgr() :  	mShaderList.push_back(&gOneTextureNoColorProgram);  	mShaderList.push_back(&gSolidColorProgram);  	mShaderList.push_back(&gOcclusionProgram); +	mShaderList.push_back(&gDebugProgram);  	mShaderList.push_back(&gObjectEmissiveProgram);  	mShaderList.push_back(&gObjectEmissiveWaterProgram);  	mShaderList.push_back(&gObjectFullbrightProgram); @@ -670,9 +673,11 @@ void LLViewerShaderMgr::setShaders()  void LLViewerShaderMgr::unloadShaders()  {  	gOcclusionProgram.unload(); +	gDebugProgram.unload();  	gUIProgram.unload();  	gCustomAlphaProgram.unload();  	gGlowCombineProgram.unload(); +	gSplatTextureRectProgram.unload();  	gGlowCombineFXAAProgram.unload();  	gTwoTextureAddProgram.unload();  	gOneTextureNoColorProgram.unload(); @@ -2715,6 +2720,22 @@ BOOL LLViewerShaderMgr::loadShadersInterface()  	if (success)  	{ +		gSplatTextureRectProgram.mName = "Splat Texture Rect Shader"; +		gSplatTextureRectProgram.mShaderFiles.clear(); +		gSplatTextureRectProgram.mShaderFiles.push_back(make_pair("interface/splattexturerectV.glsl", GL_VERTEX_SHADER_ARB)); +		gSplatTextureRectProgram.mShaderFiles.push_back(make_pair("interface/splattexturerectF.glsl", GL_FRAGMENT_SHADER_ARB)); +		gSplatTextureRectProgram.mShaderLevel = mVertexShaderLevel[SHADER_INTERFACE]; +		success = gSplatTextureRectProgram.createShader(NULL, NULL); +		if (success) +		{ +			gSplatTextureRectProgram.bind(); +			gSplatTextureRectProgram.uniform1i("screenMap", 0); +			gSplatTextureRectProgram.unbind(); +		} +	} + +	if (success) +	{  		gGlowCombineProgram.mName = "Glow Combine Shader";  		gGlowCombineProgram.mShaderFiles.clear();  		gGlowCombineProgram.mShaderFiles.push_back(make_pair("interface/glowcombineV.glsl", GL_VERTEX_SHADER_ARB)); @@ -2805,6 +2826,16 @@ BOOL LLViewerShaderMgr::loadShadersInterface()  		success = gOcclusionProgram.createShader(NULL, NULL);  	} +	if (success) +	{ +		gDebugProgram.mName = "Debug Shader"; +		gDebugProgram.mShaderFiles.clear(); +		gDebugProgram.mShaderFiles.push_back(make_pair("interface/debugV.glsl", GL_VERTEX_SHADER_ARB)); +		gDebugProgram.mShaderFiles.push_back(make_pair("interface/debugF.glsl", GL_FRAGMENT_SHADER_ARB)); +		gDebugProgram.mShaderLevel = mVertexShaderLevel[SHADER_INTERFACE]; +		success = gDebugProgram.createShader(NULL, NULL); +	} +  	if( !success )  	{  		mVertexShaderLevel[SHADER_INTERFACE] = 0; diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index c63260fb2e..1c9d7f8453 100644 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -273,7 +273,9 @@ extern LLVector4			gShinyOrigin;  extern LLGLSLShader			gOcclusionProgram;  extern LLGLSLShader			gCustomAlphaProgram;  extern LLGLSLShader			gGlowCombineProgram; +extern LLGLSLShader			gSplatTextureRectProgram;  extern LLGLSLShader			gGlowCombineFXAAProgram; +extern LLGLSLShader		gDebugProgram;  //output tex0[tc0] + tex1[tc1]  extern LLGLSLShader			gTwoTextureAddProgram; diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 45268d203d..27672a05cb 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -4126,6 +4126,11 @@ void LLPipeline::renderPhysicsDisplay()  	gGL.setColorMask(true, false); +	if (LLGLSLShader::sNoFixedFunction) +	{ +		gDebugProgram.bind(); +	} +  	for (LLWorld::region_list_t::const_iterator iter = LLWorld::getInstance()->getRegionList().begin();   			iter != LLWorld::getInstance()->getRegionList().end(); ++iter)  	{ @@ -4155,8 +4160,13 @@ void LLPipeline::renderPhysicsDisplay()  		}  	} -  	gGL.flush(); + +	if (LLGLSLShader::sNoFixedFunction) +	{ +		gDebugProgram.unbind(); +	} +  	mPhysicsDisplay.flush();  } @@ -6137,8 +6147,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield)  {  	LLMemType mt_ru(LLMemType::MTYPE_PIPELINE_RENDER_BLOOM);  	if (!(gPipeline.canUseVertexShaders() && -		sRenderGlow) || -		(!sRenderDeferred && hasRenderDebugMask(LLPipeline::RENDER_DEBUG_PHYSICS_SHAPES))) +		sRenderGlow))  	{  		return;  	} @@ -6569,19 +6578,13 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield)  	} -	if (LLRenderTarget::sUseFBO) -	{ //copy depth buffer from mScreen to framebuffer -		LLRenderTarget::copyContentsToFramebuffer(mScreen, 0, 0, mScreen.getWidth(), mScreen.getHeight(),  -			0, 0, mScreen.getWidth(), mScreen.getHeight(), GL_DEPTH_BUFFER_BIT, GL_NEAREST); -	} -	  	gGL.setSceneBlendType(LLRender::BT_ALPHA);  	if (hasRenderDebugMask(LLPipeline::RENDER_DEBUG_PHYSICS_SHAPES))  	{  		if (LLGLSLShader::sNoFixedFunction)  		{ -			gUIProgram.bind(); +			gSplatTextureRectProgram.bind();  		}  		gGL.setColorMask(true, false); @@ -6595,7 +6598,7 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield)  		gGL.getTexUnit(0)->bind(&mPhysicsDisplay); -		gGL.begin(LLRender::TRIANGLE_STRIP); +		gGL.begin(LLRender::TRIANGLES);  		gGL.texCoord2f(tc1.mV[0], tc1.mV[1]);  		gGL.vertex2f(-1,-1); @@ -6610,10 +6613,17 @@ void LLPipeline::renderBloom(BOOL for_snapshot, F32 zoom_factor, int subfield)  		if (LLGLSLShader::sNoFixedFunction)  		{ -			gUIProgram.unbind(); +			gSplatTextureRectProgram.unbind();  		} +	} +	 +	if (LLRenderTarget::sUseFBO) +	{ //copy depth buffer from mScreen to framebuffer +		LLRenderTarget::copyContentsToFramebuffer(mScreen, 0, 0, mScreen.getWidth(), mScreen.getHeight(),  +			0, 0, mScreen.getWidth(), mScreen.getHeight(), GL_DEPTH_BUFFER_BIT, GL_NEAREST);  	} +	  	glMatrixMode(GL_PROJECTION);  	glPopMatrix(); | 
