diff options
author | Dave Parks <davep@lindenlab.com> | 2022-09-19 17:27:33 -0500 |
---|---|---|
committer | Dave Parks <davep@lindenlab.com> | 2022-09-19 17:27:33 -0500 |
commit | 04d3a29a699cd0a4c08ab096bfbab153e65c1fd1 (patch) | |
tree | e3c5211bdca91d237486f261c8c82d6bf2763e36 | |
parent | 4f7c86a145877bdaae3e74900076b6790b7abc0e (diff) |
SL-18190 Faster better stronger radiance/irradiance maps
7 files changed, 218 insertions, 77 deletions
diff --git a/indra/llrender/llglheaders.h b/indra/llrender/llglheaders.h index 0aacf3bf0e..b80680a3d2 100644 --- a/indra/llrender/llglheaders.h +++ b/indra/llrender/llglheaders.h @@ -1061,13 +1061,6 @@ extern void glGetBufferPointervARB (GLenum, GLenum, GLvoid* *); #endif #if defined(TRACY_ENABLE) && LL_PROFILER_ENABLE_TRACY_OPENGL - // Tracy uses the following: - // glGenQueries - // glGetQueryiv - // glGetQueryObjectiv - #define glGenQueries glGenQueriesARB - #define glGetQueryiv glGetQueryivARB - #define glGetQueryObjectiv glGetQueryObjectivARB #include <tracy/TracyOpenGL.hpp> #endif diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp index 015312e570..2179b441e5 100644 --- a/indra/llrender/llrendertarget.cpp +++ b/indra/llrender/llrendertarget.cpp @@ -471,6 +471,7 @@ void LLRenderTarget::release() void LLRenderTarget::bindTarget() { + LL_PROFILE_GPU_ZONE("bindTarget"); llassert(mFBO); if (mFBO) @@ -577,6 +578,7 @@ void LLRenderTarget::bindTexture(U32 index, S32 channel, LLTexUnit::eTextureFilt void LLRenderTarget::flush(bool fetch_depth) { + LL_PROFILE_GPU_ZONE("rt flush"); gGL.flush(); llassert(mFBO); if (!mFBO) diff --git a/indra/newview/app_settings/shaders/class1/deferred/deferredUtil.glsl b/indra/newview/app_settings/shaders/class1/deferred/deferredUtil.glsl index 950776aa47..49f85af53b 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/deferredUtil.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/deferredUtil.glsl @@ -366,7 +366,7 @@ vec3 pbrIbl(vec3 diffuseColor, vec2 brdf = BRDF(clamp(nv, 0, 1), 1.0-perceptualRough); vec3 diffuseLight = irradiance; vec3 specularLight = radiance; - + vec3 diffuse = diffuseLight * diffuseColor; vec3 specular = specularLight * (specularColor * brdf.x + brdf.y); diff --git a/indra/newview/app_settings/shaders/class1/interface/irradianceGenF.glsl b/indra/newview/app_settings/shaders/class1/interface/irradianceGenF.glsl index 4681fa1abd..63e2fce40f 100644 --- a/indra/newview/app_settings/shaders/class1/interface/irradianceGenF.glsl +++ b/indra/newview/app_settings/shaders/class1/interface/irradianceGenF.glsl @@ -38,63 +38,190 @@ uniform int sourceIdx; VARYING vec3 vary_dir; -// ============================================================================================================= -// Parts of this file are (c) 2018 Sascha Willems -// SNIPPED FROM https://github.com/SaschaWillems/Vulkan-glTF-PBR/blob/master/data/shaders/irradiancecube.frag -/* -MIT License -Copyright (c) 2018 Sascha Willems +// Code below is derived from the Khronos GLTF Sample viewer: +// https://github.com/KhronosGroup/glTF-Sample-Viewer/blob/master/source/shaders/ibl_filtering.frag -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +#define MATH_PI 3.1415926535897932384626433832795 -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ -// ============================================================================================================= +float u_roughness = 1.0; +int u_sampleCount = 16; +float u_lodBias = 2.0; +int u_width = 64; +// Hammersley Points on the Hemisphere +// CC BY 3.0 (Holger Dammertz) +// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html +// with adapted interface +float radicalInverse_VdC(uint bits) +{ + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + return float(bits) * 2.3283064365386963e-10; // / 0x100000000 +} + +// hammersley2d describes a sequence of points in the 2d unit square [0,1)^2 +// that can be used for quasi Monte Carlo integration +vec2 hammersley2d(int i, int N) { + return vec2(float(i)/float(N), radicalInverse_VdC(uint(i))); +} + +// Hemisphere Sample + +// TBN generates a tangent bitangent normal coordinate frame from the normal +// (the normal must be normalized) +mat3 generateTBN(vec3 normal) +{ + vec3 bitangent = vec3(0.0, 1.0, 0.0); + + float NdotUp = dot(normal, vec3(0.0, 1.0, 0.0)); + float epsilon = 0.0000001; + /*if (1.0 - abs(NdotUp) <= epsilon) + { + // Sampling +Y or -Y, so we need a more robust bitangent. + if (NdotUp > 0.0) + { + bitangent = vec3(0.0, 0.0, 1.0); + } + else + { + bitangent = vec3(0.0, 0.0, -1.0); + } + }*/ + + vec3 tangent = normalize(cross(bitangent, normal)); + bitangent = cross(normal, tangent); + + return mat3(tangent, bitangent, normal); +} + +struct MicrofacetDistributionSample +{ + float pdf; + float cosTheta; + float sinTheta; + float phi; +}; + +MicrofacetDistributionSample Lambertian(vec2 xi, float roughness) +{ + MicrofacetDistributionSample lambertian; + + // Cosine weighted hemisphere sampling + // http://www.pbr-book.org/3ed-2018/Monte_Carlo_Integration/2D_Sampling_with_Multidimensional_Transformations.html#Cosine-WeightedHemisphereSampling + lambertian.cosTheta = sqrt(1.0 - xi.y); + lambertian.sinTheta = sqrt(xi.y); // equivalent to `sqrt(1.0 - cosTheta*cosTheta)`; + lambertian.phi = 2.0 * MATH_PI * xi.x; + + lambertian.pdf = lambertian.cosTheta / MATH_PI; // evaluation for solid angle, therefore drop the sinTheta + + return lambertian; +} + + +// getImportanceSample returns an importance sample direction with pdf in the .w component +vec4 getImportanceSample(int sampleIndex, vec3 N, float roughness) +{ + // generate a quasi monte carlo point in the unit square [0.1)^2 + vec2 xi = hammersley2d(sampleIndex, u_sampleCount); + + MicrofacetDistributionSample importanceSample; + + // generate the points on the hemisphere with a fitting mapping for + // the distribution (e.g. lambertian uses a cosine importance) + importanceSample = Lambertian(xi, roughness); + + // transform the hemisphere sample to the normal coordinate frame + // i.e. rotate the hemisphere to the normal direction + vec3 localSpaceDirection = normalize(vec3( + importanceSample.sinTheta * cos(importanceSample.phi), + importanceSample.sinTheta * sin(importanceSample.phi), + importanceSample.cosTheta + )); + mat3 TBN = generateTBN(N); + vec3 direction = TBN * localSpaceDirection; + + return vec4(direction, importanceSample.pdf); +} + +// Mipmap Filtered Samples (GPU Gems 3, 20.4) +// https://developer.nvidia.com/gpugems/gpugems3/part-iii-rendering/chapter-20-gpu-based-importance-sampling +// https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf +float computeLod(float pdf) +{ + // // Solid angle of current sample -- bigger for less likely samples + // float omegaS = 1.0 / (float(u_sampleCount) * pdf); + // // Solid angle of texel + // // note: the factor of 4.0 * MATH_PI + // float omegaP = 4.0 * MATH_PI / (6.0 * float(u_width) * float(u_width)); + // // Mip level is determined by the ratio of our sample's solid angle to a texel's solid angle + // // note that 0.5 * log2 is equivalent to log4 + // float lod = 0.5 * log2(omegaS / omegaP); + + // babylon introduces a factor of K (=4) to the solid angle ratio + // this helps to avoid undersampling the environment map + // this does not appear in the original formulation by Jaroslav Krivanek and Mark Colbert + // log4(4) == 1 + // lod += 1.0; + + // We achieved good results by using the original formulation from Krivanek & Colbert adapted to cubemaps + // https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf + float lod = 0.5 * log2( 6.0 * float(u_width) * float(u_width) / (float(u_sampleCount) * pdf)); + + + return lod; +} + +vec3 filterColor(vec3 N) +{ + //return textureLod(uCubeMap, N, 3.0).rgb; + vec3 color = vec3(0.f); + float weight = 0.0f; -#define PI 3.1415926535897932384626433832795 + for(int i = 0; i < u_sampleCount; ++i) + { + vec4 importanceSample = getImportanceSample(i, N, 1.0); + vec3 H = vec3(importanceSample.xyz); + float pdf = importanceSample.w; + + // mipmap filtered samples (GPU Gems 3, 20.4) + float lod = computeLod(pdf); + + // apply the bias to the lod + lod += u_lodBias; + + lod = clamp(lod, 0, 7); + // sample lambertian at a lower resolution to avoid fireflies + vec3 lambertian = textureLod(reflectionProbes, vec4(H, sourceIdx), lod).rgb; + + color += lambertian; + } + + if(weight != 0.0f) + { + color /= weight; + } + else + { + color /= float(u_sampleCount); + } + + return color.rgb ; +} + +// entry point void main() { - float deltaPhi = (2.0 * PI) / 11.25; - float deltaTheta = (0.5 * PI) / 4.0; - float mipLevel = 2; - - vec3 N = normalize(vary_dir); - vec3 up = vec3(0.0, 1.0, 0.0); - vec3 right = normalize(cross(up, N)); - up = normalize(cross(N, right)); - - const float TWO_PI = PI * 2.0; - const float HALF_PI = PI * 0.5; - - vec3 color = vec3(0.0); - uint sampleCount = 0u; - for (float phi = 0.0; phi < TWO_PI; phi += deltaPhi) { - for (float theta = 0.0; theta < HALF_PI; theta += deltaTheta) { - vec3 tempVec = cos(phi) * right + sin(phi) * up; - vec3 sampleVector = cos(theta) * N + sin(theta) * tempVec; - color += textureLod(reflectionProbes, vec4(sampleVector, sourceIdx), mipLevel).rgb * cos(theta) * sin(theta); - sampleCount++; - } - } - frag_color = vec4(PI * color / float(sampleCount), 1.0); + vec3 color = vec3(0); + + color = filterColor(vary_dir); + + frag_color = vec4(color,1.0); } -// ============================================================================================================= diff --git a/indra/newview/app_settings/shaders/class1/interface/radianceGenF.glsl b/indra/newview/app_settings/shaders/class1/interface/radianceGenF.glsl index 94fedce243..7c175eab5f 100644 --- a/indra/newview/app_settings/shaders/class1/interface/radianceGenF.glsl +++ b/indra/newview/app_settings/shaders/class1/interface/radianceGenF.glsl @@ -66,7 +66,7 @@ SOFTWARE. // ============================================================================================================= -uniform float roughness; +//uniform float roughness; uniform float mipLevel; @@ -123,14 +123,18 @@ float D_GGX(float dotNH, float roughness) return (alpha2)/(PI * denom*denom); } -vec3 prefilterEnvMap(vec3 R, float roughness) +vec3 prefilterEnvMap(vec3 R) { vec3 N = R; vec3 V = R; vec3 color = vec3(0.0); float totalWeight = 0.0; float envMapDim = 256.0; - int numSamples = 32/max(int(mipLevel), 1); + int numSamples = 8; + + float numMips = 7.0; + + float roughness = (mipLevel+1)/numMips; for(uint i = 0u; i < numSamples; i++) { vec2 Xi = hammersley2d(i, numSamples); @@ -150,8 +154,9 @@ vec3 prefilterEnvMap(vec3 R, float roughness) // Solid angle of 1 pixel across all cube faces float omegaP = 4.0 * PI / (6.0 * envMapDim * envMapDim); // Biased (+1.0) mip level for better result - //float mipLevel = roughness == 0.0 ? 0.0 : max(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f); - color += textureLod(reflectionProbes, vec4(L,sourceIdx), mipLevel).rgb * dotNL; + //float mip = roughness == 0.0 ? 0.0 : max(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f); + float mip = clamp(0.5 * log2(omegaS / omegaP) + 1.0, 0.0f, 7.f); + color += textureLod(reflectionProbes, vec4(L,sourceIdx), mip).rgb * dotNL; totalWeight += dotNL; } @@ -162,7 +167,7 @@ vec3 prefilterEnvMap(vec3 R, float roughness) void main() { vec3 N = normalize(vary_dir); - frag_color = vec4(prefilterEnvMap(N, roughness), 1.0); + frag_color = vec4(prefilterEnvMap(N), 1.0); } // ============================================================================================================= diff --git a/indra/newview/app_settings/shaders/class1/interface/reflectionmipF.glsl b/indra/newview/app_settings/shaders/class1/interface/reflectionmipF.glsl index ea687aab4f..e8452a9c14 100644 --- a/indra/newview/app_settings/shaders/class1/interface/reflectionmipF.glsl +++ b/indra/newview/app_settings/shaders/class1/interface/reflectionmipF.glsl @@ -39,6 +39,7 @@ VARYING vec2 vary_texcoord0; void main() { +#if 0 float w[9]; float c = 1.0/16.0; //corner weight @@ -72,4 +73,7 @@ void main() //color /= wsum; frag_color = vec4(color, 1.0); +#else + frag_color = vec4(texture2DRect(screenMap, vary_texcoord0.xy).rgb, 1.0); +#endif } diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp index 97277ee798..57ec51221e 100644 --- a/indra/newview/llreflectionmapmanager.cpp +++ b/indra/newview/llreflectionmapmanager.cpp @@ -410,8 +410,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) S32 mips = log2((F32)LL_REFLECTION_PROBE_RESOLUTION) + 0.5f; - //for (int i = 0; i < mMipChain.size(); ++i) - for (int i = 0; i < 1; ++i) + for (int i = 0; i < mMipChain.size(); ++i) { LL_PROFILE_GPU_ZONE("probe mip"); mMipChain[i].bindTarget(); @@ -447,10 +446,14 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) if (mip >= 0) { + LL_PROFILE_GPU_ZONE("probe mip copy"); mTexture->bind(0); //glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, probe->mCubeIndex * 6 + face, 0, 0, res, res); glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, targetIdx * 6 + face, 0, 0, res, res); - glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, probe->mCubeIndex * 6 + face, 0, 0, res, res); + if (i == 0) + { + glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, probe->mCubeIndex * 6 + face, 0, 0, res, res); + } mTexture->unbind(); } mMipChain[i].flush(); @@ -474,8 +477,12 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) static LLStaticHashedString sMipLevel("mipLevel"); + mMipChain[1].bindTarget(); + U32 res = mMipChain[1].getWidth(); + for (int i = 1; i < mMipChain.size(); ++i) { + LL_PROFILE_GPU_ZONE("probe radiance gen"); for (int cf = 0; cf < 6; ++cf) { // for each cube face LLCoordFrame frame; @@ -485,15 +492,11 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) frame.getOpenGLRotation(mat); gGL.loadMatrix(mat); - mMipChain[i].bindTarget(); static LLStaticHashedString sRoughness("roughness"); gRadianceGenProgram.uniform1f(sRoughness, (F32)i / (F32)(mMipChain.size() - 1)); gRadianceGenProgram.uniform1f(sMipLevel, llmax((F32)(i - 1), 0.f)); - if (i > 0) - { - gRadianceGenProgram.uniform1i(sSourceIdx, probe->mCubeIndex); - } + gGL.begin(gGL.QUADS); gGL.vertex3f(-1, -1, -1); gGL.vertex3f(1, -1, -1); @@ -501,12 +504,17 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) gGL.vertex3f(-1, 1, -1); gGL.end(); gGL.flush(); - - S32 res = mMipChain[i].getWidth(); + glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, i, 0, 0, probe->mCubeIndex * 6 + cf, 0, 0, res, res); - mMipChain[i].flush(); + } + + if (i != mMipChain.size() - 1) + { + res /= 2; + glViewport(0, 0, res, res); } } + gRadianceGenProgram.unbind(); //generate irradiance map @@ -514,7 +522,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) channel = gIrradianceGenProgram.enableTexture(LLShaderMgr::REFLECTION_PROBES, LLTexUnit::TT_CUBE_MAP_ARRAY); mTexture->bind(channel); - gIrradianceGenProgram.uniform1i(sSourceIdx, probe->mCubeIndex); + gIrradianceGenProgram.uniform1i(sSourceIdx, targetIdx); int start_mip = 0; // find the mip target to start with based on irradiance map resolution @@ -528,6 +536,8 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) for (int i = start_mip; i < mMipChain.size(); ++i) { + LL_PROFILE_GPU_ZONE("probe irradiance gen"); + glViewport(0, 0, mMipChain[i].getWidth(), mMipChain[i].getHeight()); for (int cf = 0; cf < 6; ++cf) { // for each cube face LLCoordFrame frame; @@ -537,8 +547,6 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) frame.getOpenGLRotation(mat); gGL.loadMatrix(mat); - mMipChain[i].bindTarget(); - gGL.begin(gGL.QUADS); gGL.vertex3f(-1, -1, -1); gGL.vertex3f(1, -1, -1); @@ -551,9 +559,11 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) mIrradianceMaps->bind(channel); glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, i - start_mip, 0, 0, probe->mCubeIndex * 6 + cf, 0, 0, res, res); mTexture->bind(channel); - mMipChain[i].flush(); } } + + mMipChain[1].flush(); + gIrradianceGenProgram.unbind(); } } |