SL-18869 Optimizations -- Use _mm_prefetch to cut down on cache misses when iterating over render batches.

author: Dave Parks <davep@lindenlab.com> 2023-01-09 15:18:57 -0600
committer: Dave Parks <davep@lindenlab.com> 2023-01-09 15:18:57 -0600
commit: a710bf9067bd4c4217b9febc0ad277a1636ec882 (patch)
tree: df6b4105f0e7d518f8fecfe3ab654f17abd1ac7c /indra/newview
parent: b3fc82ff1da0c869f0b1dd841647a120a1ae56af (diff)
10 files changed, 89 insertions, 112 deletions
diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp
index dd6b914783..2abbe2f2f8 100644
--- a/indra/newview/lldrawpool.cpp
+++ b/indra/newview/lldrawpool.cpp
@@ -449,10 +449,14 @@ void teardown_texture_matrix(LLDrawInfo& params)
 void LLRenderPass::pushGLTFBatches(U32 type, U32 mask)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("pushGLTFBatch");
         LLDrawInfo& params = **i;
+        LLCullResult::increment_iterator(i, end);
+
         auto& mat = params.mGLTFMaterial;
 
         mat->bind();
@@ -476,10 +480,14 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type, U32 mask)
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
     mask |= LLVertexBuffer::MAP_WEIGHT4;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("pushRiggedGLTFBatch");
         LLDrawInfo& params = **i;
+        LLCullResult::increment_iterator(i, end);
+
         auto& mat = params.mGLTFMaterial;
 
         mat->bind();
@@ -507,13 +515,14 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type, U32 mask)
 void LLRenderPass::pushBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-	for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
-	{
-		LLDrawInfo* pparams = *i;
-		if (pparams) 
-		{
-			pushBatch(*pparams, mask, texture, batch_textures);
-		}
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
+    {
+        LLDrawInfo* pparams = *i;
+        LLCullResult::increment_iterator(i, end);
+
+		pushBatch(*pparams, mask, texture, batch_textures);
 	}
 }
 
@@ -523,34 +532,35 @@ void LLRenderPass::pushRiggedBatches(U32 type, U32 mask, BOOL texture, BOOL batc
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
     mask |= LLVertexBuffer::MAP_WEIGHT4;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LLDrawInfo* pparams = *i;
-        if (pparams)
-        {
-            if (pparams->mAvatar.notNull() && (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash))
-            {
-                uploadMatrixPalette(*pparams);
-                lastAvatar = pparams->mAvatar;
-                lastMeshId = pparams->mSkinInfo->mHash;
-            }
+        LLCullResult::increment_iterator(i, end);
 
-            pushBatch(*pparams, mask, texture, batch_textures);
+        if (pparams->mAvatar.notNull() && (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash))
+        {
+            uploadMatrixPalette(*pparams);
+            lastAvatar = pparams->mAvatar;
+            lastMeshId = pparams->mSkinInfo->mHash;
         }
+
+        pushBatch(*pparams, mask, texture, batch_textures);
     }
 }
 
 void LLRenderPass::pushMaskBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-	for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)	
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+	for (LLCullResult::drawinfo_iterator i = begin; i != end; )
 	{
-		LLDrawInfo* pparams = *i;
-		if (pparams) 
-		{
-			LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
-			pushBatch(*pparams, mask, texture, batch_textures);
-		}
+        LLDrawInfo* pparams = *i;
+        LLCullResult::increment_iterator(i, end);
+		LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
+		pushBatch(*pparams, mask, texture, batch_textures);
 	}
 }
 
@@ -559,29 +569,31 @@ void LLRenderPass::pushRiggedMaskBatches(U32 type, U32 mask, BOOL texture, BOOL
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LLDrawInfo* pparams = *i;
-        if (pparams)
-        {
-            if (LLGLSLShader::sCurBoundShaderPtr)
-            {
-                LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
-            }
-            else
-            {
-                gGL.flush();
-            }
 
-            if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
-            {
-                uploadMatrixPalette(*pparams);
-                lastAvatar = pparams->mAvatar;
-                lastMeshId = pparams->mSkinInfo->mHash;
-            }
+        LLCullResult::increment_iterator(i, end);
 
-            pushBatch(*pparams, mask | LLVertexBuffer::MAP_WEIGHT4, texture, batch_textures);
+        if (LLGLSLShader::sCurBoundShaderPtr)
+        {
+            LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
+        }
+        else
+        {
+            gGL.flush();
         }
+
+        if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
+        {
+            uploadMatrixPalette(*pparams);
+            lastAvatar = pparams->mAvatar;
+            lastMeshId = pparams->mSkinInfo->mHash;
+        }
+
+        pushBatch(*pparams, mask | LLVertexBuffer::MAP_WEIGHT4, texture, batch_textures);
     }
 }
 
diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp
index dc7e5f51df..3ce2ced255 100644
--- a/indra/newview/lldrawpoolalpha.cpp
+++ b/indra/newview/lldrawpoolalpha.cpp
@@ -634,7 +634,7 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, bool depth_only, bool rigged)
                     continue;
                 }
 
-                LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("ra - push batch")
+                LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("ra - push batch");
 
                 U32 have_mask = params.mVertexBuffer->getTypeMask() & mask;
 				if (have_mask != mask)
diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp
index 6af4e2274c..4379fdc603 100644
--- a/indra/newview/lldrawpoolbump.cpp
+++ b/indra/newview/lldrawpoolbump.cpp
@@ -724,10 +724,12 @@ void LLDrawPoolBump::renderDeferred(S32 pass)
         LLVOAvatar* avatar = nullptr;
         U64 skin = 0;
 
-        for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i)
+        for (LLCullResult::drawinfo_iterator i = begin; i != end; )
         {
             LLDrawInfo& params = **i;
 
+            LLCullResult::increment_iterator(i, end);
+
             LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(params.mAlphaMaskCutoff);
             LLDrawPoolBump::bindBumpMap(params, bump_channel);
 
diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp
index ec1ac6a88c..858fb871d3 100644
--- a/indra/newview/lldrawpoolmaterials.cpp
+++ b/indra/newview/lldrawpoolmaterials.cpp
@@ -203,11 +203,13 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
 
     LLVOAvatar* lastAvatar = nullptr;
 
-	for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i)
+	for (LLCullResult::drawinfo_iterator i = begin; i != end; )
 	{
         LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL("materials draw loop");
 		LLDrawInfo& params = **i;
 		
+        LLCullResult::increment_iterator(i, end);
+
         if (specular > -1 && params.mSpecColor != lastSpecular)
         {
             lastSpecular = params.mSpecColor;
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 113cd98164..7653913c2b 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -2872,42 +2872,6 @@ void renderBatchSize(LLDrawInfo* params)
     }
 }
 
-void renderShadowFrusta(LLDrawInfo* params)
-{
-	LLGLEnable blend(GL_BLEND);
-	gGL.setSceneBlendType(LLRender::BT_ADD);
-
-	LLVector4a center;
-	center.setAdd(params->mExtents[1], params->mExtents[0]);
-	center.mul(0.5f);
-	LLVector4a size;
-	size.setSub(params->mExtents[1],params->mExtents[0]);
-	size.mul(0.5f);
-
-	if (gPipeline.mShadowCamera[4].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(1,0,0);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-	if (gPipeline.mShadowCamera[5].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(0,1,0);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-	if (gPipeline.mShadowCamera[6].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(0,0,1);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-	if (gPipeline.mShadowCamera[7].AABBInFrustum(center, size))
-	{
-		gGL.diffuseColor3f(1,0,1);
-		pushVerts(params, LLVertexBuffer::MAP_VERTEX);
-	}
-
-	gGL.setSceneBlendType(LLRender::BT_ALPHA);
-}
-
 void renderTexelDensity(LLDrawable* drawable)
 {
 	if (LLViewerTexture::sDebugTexelsMode == LLViewerTexture::DEBUG_TEXELS_OFF
@@ -3492,10 +3456,6 @@ public:
 				{
 					renderBatchSize(draw_info);
 				}
-				if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_SHADOW_FRUSTA))
-				{
-					renderShadowFrusta(draw_info);
-				}
 			}
 		}
 	}
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index 020a010405..b765bd1632 100644
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -87,8 +87,6 @@ public:
     // return mSkinHash->mHash, or 0 if mSkinHash is null
     U64 getSkinHash();
 
-	LLVector4a mExtents[2];
-	
 	LLPointer<LLVertexBuffer> mVertexBuffer;
 	LLPointer<LLViewerTexture>     mTexture;
 	std::vector<LLPointer<LLViewerTexture> > mTextureList;
@@ -505,6 +503,23 @@ public:
 	typedef LLDrawInfo** drawinfo_iterator;
 	typedef LLDrawable** drawable_iterator;
 
+    // Helper function for taking advantage of _mm_prefetch when iterating over cull results
+    static inline void increment_iterator(LLCullResult::drawinfo_iterator& i, const LLCullResult::drawinfo_iterator& end)
+    {
+        ++i;
+
+        if (i != end)
+        {
+            _mm_prefetch((char*)(*i)->mVertexBuffer.get(), _MM_HINT_NTA);
+
+            auto* ni = i + 1;
+            if (ni != end)
+            {
+                _mm_prefetch((char*)*ni, _MM_HINT_NTA);
+            }
+        }
+    }
+
 	void clear();
 	
 	sg_iterator beginVisibleGroups();
diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp
index 36d66cccef..b4b2db5d51 100644
--- a/indra/newview/llvograss.cpp
+++ b/indra/newview/llvograss.cpp
@@ -731,9 +731,6 @@ void LLGrassPartition::getGeometry(LLSpatialGroup* group)
 				//facep->getTexture(),
 				buffer, object->isSelected(), fullbright);
 
-			const LLVector4a* exts = group->getObjectExtents();
-			info->mExtents[0] = exts[0];
-			info->mExtents[1] = exts[1];
 			info->mVSize = vsize;
 			draw_vec.push_back(info);
 			//for alpha sorting
diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp
index 0c09cbf2c2..a5c65d6ed4 100644
--- a/indra/newview/llvopartgroup.cpp
+++ b/indra/newview/llvopartgroup.cpp
@@ -963,9 +963,6 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group)
 			LLDrawInfo* info = new LLDrawInfo(start,end,count,offset,facep->getTexture(), 
 				buffer, object->isSelected(), fullbright);
 
-			const LLVector4a* exts = group->getObjectExtents();
-			info->mExtents[0] = exts[0];
-			info->mExtents[1] = exts[1];
 			info->mVSize = vsize;
 			info->mBlendFuncDst = bf_dst;
 			info->mBlendFuncSrc = bf_src;
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index afebf27d8b..53158ee66f 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -5305,8 +5305,6 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
             draw_vec[idx]->mTextureListVSize[index] = vsize;
 		}
 		draw_vec[idx]->validate();
-		update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[0]);
-		update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[1]);
 	}
 	else
 	{
@@ -5385,8 +5383,6 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
 		{ //for alpha sorting
 			facep->setDrawInfo(draw_info);
 		}
-		draw_info->mExtents[0] = facep->mExtents[0];
-		draw_info->mExtents[1] = facep->mExtents[1];
 
 		if (index < FACE_DO_NOT_BATCH_TEXTURES)
 		{ //initialize texture list for texture batching
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 10c271cddc..9851d4bc6a 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -2863,6 +2863,8 @@ void LLPipeline::clearRebuildDrawables()
 void LLPipeline::rebuildPriorityGroups()
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
+    LL_PROFILE_GPU_ZONE("rebuildPriorityGroups");
+
 	LLTimer update_timer;
 	assertInitialized();
 
@@ -7412,18 +7414,7 @@ void LLPipeline::renderShadowSimple(U32 type)
     {
         LLDrawInfo& params = **i;
 
-        ++i;
-
-        if (i != end)
-        {
-            _mm_prefetch((char*) (*i)->mVertexBuffer.get(), _MM_HINT_NTA);
-
-            auto* ni = i + 1;
-            if (ni != end)
-            {
-                _mm_prefetch((char*)*ni, _MM_HINT_NTA);
-            }
-        }
+        LLCullResult::increment_iterator(i, end);
 
         LLVertexBuffer* vb = params.mVertexBuffer;
         if (vb != last_vb)
@@ -7448,11 +7439,16 @@ void LLPipeline::renderAlphaObjects(U32 mask, bool texture, bool batch_texture,
     U32 type = LLRenderPass::PASS_ALPHA;
     LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
-    for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
+    auto* begin = gPipeline.beginRenderMap(type);
+    auto* end = gPipeline.endRenderMap(type);
+
+    for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
         LLDrawInfo* pparams = *i;
         if (pparams)
         {
+            LLCullResult::increment_iterator(i, end);
+
             if (rigged)
             {
                 if (pparams->mAvatar != nullptr)
author	Dave Parks <davep@lindenlab.com>	2023-01-09 15:18:57 -0600
committer	Dave Parks <davep@lindenlab.com>	2023-01-09 15:18:57 -0600
commit	a710bf9067bd4c4217b9febc0ad277a1636ec882 (patch)
tree	df6b4105f0e7d518f8fecfe3ab654f17abd1ac7c /indra/newview
parent	b3fc82ff1da0c869f0b1dd841647a120a1ae56af (diff)