SL-16239 Fix for slowdown on AMD GPUs (disable core profile and remove volatile members from LLVertexBuffer)

author: Dave Parks <davep@lindenlab.com> 2021-11-23 20:46:27 +0000
committer: Dave Parks <davep@lindenlab.com> 2021-11-23 20:46:27 +0000
commit: 724193e5b01e5c1ae879a6364f54601cdb22c2c1 (patch)
tree: ceeeeae88309e1a22b17adafd5f156db40791271 /indra/llrender
parent: 408ac8f18cfb8f1c29381b0285f9cae91f5b685a (diff)
2 files changed, 42 insertions, 34 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 5ea07ddcb1..46654cc5b9 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -157,12 +157,12 @@ LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
 	std::fill(mMissCount.begin(), mMissCount.end(), 0);
 }
 
-volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
+U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 {
 	LL_PROFILE_ZONE_SCOPED
 	llassert(vbo_block_size(size) == size);
 	
-	volatile U8* ret = NULL;
+	U8* ret = NULL;
 
 	U32 i = vbo_block_index(size);
 
@@ -256,7 +256,7 @@ volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
 	return ret;
 }
 
-void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size)
+void LLVBOPool::release(U32 name, U8* buffer, U32 size)
 {
 	llassert(vbo_block_size(size) == size);
 
@@ -749,7 +749,7 @@ void LLVertexBuffer::drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32
     mMappable = false;
     gGL.syncMatrices();
 
-    U16* idx = ((U16*)(U8*)mAlignedIndexOffset) + indices_offset;
+    U16* idx = ((U16*)getIndicesPointer()) + indices_offset;
 
     LL_PROFILER_GPU_ZONEC("gl.DrawRangeElements", 0xFFFF00)
         glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT,
@@ -1539,7 +1539,7 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count)
 
 
 // Map for data access
-volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range)
+U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range)
 {
     LL_PROFILE_ZONE_SCOPED;
 	bindGLBuffer(true);
@@ -1601,7 +1601,7 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo
 			}
 			else
 			{
-				volatile U8* src = NULL;
+				U8* src = NULL;
 				waitFence();
 				if (gGLManager.mHasMapBufferRange)
 				{
@@ -1660,7 +1660,7 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo
 
 				llassert(src != NULL);
 
-				mMappedData = LL_NEXT_ALIGNED_ADDRESS<volatile U8>(src);
+				mMappedData = LL_NEXT_ALIGNED_ADDRESS<U8>(src);
 				mAlignedOffset = mMappedData - src;
 			
 				stop_glerror();
@@ -1716,7 +1716,7 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo
 }
 
 
-volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
+U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
 {
     LL_PROFILE_ZONE_SCOPED;
 	bindGLIndices(true);
@@ -1786,7 +1786,7 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range
 			}
 			else
 			{
-				volatile U8* src = NULL;
+				U8* src = NULL;
 				waitFence();
 				if (gGLManager.mHasMapBufferRange)
 				{
@@ -2068,7 +2068,7 @@ template <class T,S32 type> struct VertexBufferStrider
 	{
 		if (type == LLVertexBuffer::TYPE_INDEX)
 		{
-			volatile U8* ptr = vbo.mapIndexBuffer(index, count, map_range);
+			U8* ptr = vbo.mapIndexBuffer(index, count, map_range);
 
 			if (ptr == NULL)
 			{
@@ -2084,7 +2084,7 @@ template <class T,S32 type> struct VertexBufferStrider
 		{
 			S32 stride = LLVertexBuffer::sTypeSize[type];
 
-			volatile U8* ptr = vbo.mapVertexBuffer(type, index, count, map_range);
+			U8* ptr = vbo.mapVertexBuffer(type, index, count, map_range);
 
 			if (ptr == NULL)
 			{
@@ -2454,29 +2454,37 @@ void LLVertexBuffer::setBuffer(U32 data_mask)
 
 void LLVertexBuffer::setBufferFast(U32 data_mask)
 {
-    //set up pointers if the data mask is different ...
-    bool setup = (sLastMask != data_mask);
+    if (useVBOs())
+    {
+        //set up pointers if the data mask is different ...
+        bool setup = (sLastMask != data_mask);
 
-    
-    const bool bindBuffer = bindGLBufferFast();
-    const bool bindIndices = bindGLIndicesFast();
+        const bool bindBuffer = bindGLBufferFast();
+        const bool bindIndices = bindGLIndicesFast();
 
-    setup = setup || bindBuffer || bindIndices;
+        setup = setup || bindBuffer || bindIndices;
+        
+        setupClientArrays(data_mask);
 
-    setupClientArrays(data_mask);
-  
-    if (data_mask && setup)
+        if (data_mask && setup)
+        {
+            setupVertexBufferFast(data_mask);
+            sSetCount++;
+        }
+    }
+    else
     {
-        setupVertexBufferFast(data_mask);
-        sSetCount++;
+        //fallback to slow path when not using VBOs
+        setBuffer(data_mask);
     }
 }
 
+
 // virtual (default)
 void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
 {
 	stop_glerror();
-	volatile U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
+	U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
 
 	if (gDebugGL && ((data_mask & mTypeMask) != data_mask))
 	{
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 1b400b3aad..baf8407fc6 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -64,10 +64,10 @@ public:
 	const U32 mType;
 
 	//size MUST be a power of 2
-	volatile U8* allocate(U32& name, U32 size, bool for_seed = false);
+	U8* allocate(U32& name, U32 size, bool for_seed = false);
 	
 	//size MUST be the size provided to allocate that returned the given name
-	void release(U32 name, volatile U8* buffer, U32 size);
+	void release(U32 name, U8* buffer, U32 size);
 	
 	//batch allocate buffers to be provided to the application on demand
 	void seedPool();
@@ -82,7 +82,7 @@ public:
 	{
 	public:
 		U32 mGLName;
-		volatile U8* mClientData;
+		U8* mClientData;
 	};
 
 	typedef std::list<Record> record_list_t;
@@ -234,8 +234,8 @@ public:
 	LLVertexBuffer(U32 typemask, S32 usage);
 	
 	// map for data access
-	volatile U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range);
-	volatile U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);
+	U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range);
+	U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);
 
 	void bindForFeedback(U32 channel, U32 type, U32 index, U32 count);
 
@@ -278,14 +278,14 @@ public:
 	S32 getNumVerts() const					{ return mNumVerts; }
 	S32 getNumIndices() const				{ return mNumIndices; }
 	
-	volatile U8* getIndicesPointer() const			{ return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; }
-	volatile U8* getVerticesPointer() const			{ return useVBOs() ? (U8*) mAlignedOffset : mMappedData; }
+	U8* getIndicesPointer() const			{ return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; }
+	U8* getVerticesPointer() const			{ return useVBOs() ? (U8*) mAlignedOffset : mMappedData; }
 	U32 getTypeMask() const					{ return mTypeMask; }
 	bool hasDataType(S32 type) const		{ return ((1 << type) & getTypeMask()); }
 	S32 getSize() const;
 	S32 getIndicesSize() const				{ return mIndicesSize; }
-	volatile U8* getMappedData() const				{ return mMappedData; }
-	volatile U8* getMappedIndices() const			{ return mMappedIndexData; }
+	U8* getMappedData() const				{ return mMappedData; }
+	U8* getMappedIndices() const			{ return mMappedIndexData; }
 	S32 getOffset(S32 type) const			{ return mOffsets[type]; }
 	S32 getUsage() const					{ return mUsage; }
 	bool isWriteable() const				{ return (mMappable || mUsage == GL_STREAM_DRAW_ARB) ? true : false; }
@@ -318,8 +318,8 @@ protected:
 	U32		mGLIndices;		// GL IBO handle
 	U32		mGLArray;		// GL VAO handle
 	
-	volatile U8* mMappedData;	// pointer to currently mapped data (NULL if unmapped)
-	volatile U8* mMappedIndexData;	// pointer to currently mapped indices (NULL if unmapped)
+	U8* mMappedData;	// pointer to currently mapped data (NULL if unmapped)
+	U8* mMappedIndexData;	// pointer to currently mapped indices (NULL if unmapped)
 
 	U32		mMappedDataUsingVBOs : 1;
 	U32		mMappedIndexDataUsingVBOs : 1;
author	Dave Parks <davep@lindenlab.com>	2021-11-23 20:46:27 +0000
committer	Dave Parks <davep@lindenlab.com>	2021-11-23 20:46:27 +0000
commit	724193e5b01e5c1ae879a6364f54601cdb22c2c1 (patch)
tree	ceeeeae88309e1a22b17adafd5f156db40791271 /indra/llrender
parent	408ac8f18cfb8f1c29381b0285f9cae91f5b685a (diff)