SL-18869 Optimizations -- LLVertexBuffer overhaul and shuffle of shadow map rendering to a place where the main camera has taken a stab at object updates for this frame before shadow map rendering has at them.

author: Dave Parks <davep@lindenlab.com> 2023-01-10 17:36:05 -0600
committer: Dave Parks <davep@lindenlab.com> 2023-01-10 17:36:05 -0600
commit: fdc0ea64f050ad09a84442f40396bb9e6497ce52 (patch)
tree: 306e79ea83514488f28402ee8a65f2dc603b4f4f /indra/llrender/llvertexbuffer.cpp
parent: 4abf39c968c31a9da943a53434388102b99d487f (diff)
1 files changed, 423 insertions, 402 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 7b8f85acba..20261dcb8a 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -62,6 +62,14 @@ U32 wpo2(U32 i)
 	return r;
 }
 
+struct CompareMappedRegion
+{
+    bool operator()(const LLVertexBuffer::MappedRegion& lhs, const LLVertexBuffer::MappedRegion& rhs)
+    {
+        return lhs.mStart < rhs.mStart;
+    }
+};
+
 
 const U32 LL_VBO_BLOCK_SIZE = 2048;
 const U32 LL_VBO_POOL_MAX_SEED_SIZE = 256*1024;
@@ -81,266 +89,217 @@ U32 vbo_block_index(U32 size)
 
 const U32 LL_VBO_POOL_SEED_COUNT = vbo_block_index(LL_VBO_POOL_MAX_SEED_SIZE) + 1;
 
+#define ENABLE_GL_WORK_QUEUE 0
+
+#if ENABLE_GL_WORK_QUEUE
+
+#define THREAD_COUNT 1
 
 //============================================================================
 
-//static
-LLVBOPool LLVertexBuffer::sStreamVBOPool(GL_STREAM_DRAW, GL_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sDynamicVBOPool(GL_DYNAMIC_DRAW, GL_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sDynamicCopyVBOPool(GL_DYNAMIC_COPY, GL_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sStreamIBOPool(GL_STREAM_DRAW, GL_ELEMENT_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sDynamicIBOPool(GL_DYNAMIC_DRAW, GL_ELEMENT_ARRAY_BUFFER);
+// High performance WorkQueue for usage in real-time rendering work
+class GLWorkQueue
+{
+public:
+    using Work = std::function<void()>;
 
-U32 LLVBOPool::sBytesPooled = 0;
-U32 LLVBOPool::sIndexBytesPooled = 0;
-U32 LLVBOPool::sNameIdx = 0;
-U32 LLVBOPool::sNamePool[1024];
+    GLWorkQueue();
 
-std::list<U32> LLVertexBuffer::sAvailableVAOName;
-U32 LLVertexBuffer::sCurVAOName = 1;
+    void post(const Work& value);
 
-U32 LLVertexBuffer::sAllocatedIndexBytes = 0;
-U32 LLVertexBuffer::sIndexCount = 0;
+    size_t size();
 
-U32 LLVertexBuffer::sBindCount = 0;
-U32 LLVertexBuffer::sSetCount = 0;
-S32 LLVertexBuffer::sCount = 0;
-S32 LLVertexBuffer::sGLCount = 0;
-S32 LLVertexBuffer::sMappedCount = 0;
-bool LLVertexBuffer::sDisableVBOMapping = false;
-bool LLVertexBuffer::sEnableVBOs = true;
-U32 LLVertexBuffer::sGLRenderBuffer = 0;
-U32 LLVertexBuffer::sGLRenderArray = 0;
-U32 LLVertexBuffer::sGLRenderIndices = 0;
-U32 LLVertexBuffer::sLastMask = 0;
-bool LLVertexBuffer::sVBOActive = false;
-bool LLVertexBuffer::sIBOActive = false;
-U32 LLVertexBuffer::sAllocatedBytes = 0;
-U32 LLVertexBuffer::sVertexCount = 0;
-bool LLVertexBuffer::sMapped = false;
-bool LLVertexBuffer::sUseStreamDraw = true;
-bool LLVertexBuffer::sUseVAO = false;
-bool LLVertexBuffer::sPreferStreamDraw = false;
+    bool done();
 
-U32 LLVBOPool::genBuffer()
-{
-	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
+    // Get the next element from the queue
+    Work pop();
 
-	if (sNameIdx == 0)
-	{
-		glGenBuffers(1024, sNamePool);
-		sNameIdx = 1024;
-	}
+    void runOne();
 
-	return sNamePool[--sNameIdx];
-}
+    bool runPending();
 
-void LLVBOPool::deleteBuffer(U32 name)
-{
-	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
-	if (gGLManager.mInited)
-	{
-		LLVertexBuffer::unbind();
+    void runUntilClose();
 
-		glBindBuffer(mType, name);
-		glBufferData(mType, 0, NULL, mUsage);
-		glBindBuffer(mType, 0);
+    void close();
 
-		glDeleteBuffers(1, &name);
-	}
-}
+    bool isClosed();
 
+    void syncGL();
 
-LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
-: mUsage(vboUsage), mType(vboType), mMissCountDirty(true)
-{
-    mFreeList.resize(LL_VBO_POOL_SEED_COUNT);
-	mMissCount.resize(LL_VBO_POOL_SEED_COUNT);
-	std::fill(mMissCount.begin(), mMissCount.end(), 0);
-}
+private:
+    std::mutex mMutex;
+    std::condition_variable mCondition;
+    std::queue<Work> mQueue;
+    bool mClosed = false;
+};
 
-U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
+GLWorkQueue::GLWorkQueue()
 {
-	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
-	llassert(vbo_block_size(size) == size);
-	
-	U8* ret = NULL;
 
-	U32 i = vbo_block_index(size);
+}
 
-	if (mFreeList.size() <= i)
-	{
-		mFreeList.resize(i+1);
-	}
+void GLWorkQueue::syncGL()
+{
+    /*if (mSync)
+    {
+        std::lock_guard<std::mutex> lock(mMutex);
+        glWaitSync(mSync, 0, GL_TIMEOUT_IGNORED);
+        mSync = 0;
+    }*/
+}
 
-	if (mFreeList[i].empty() || for_seed)
-	{
-		//make a new buffer
-		name = genBuffer();
+size_t GLWorkQueue::size()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    std::lock_guard<std::mutex> lock(mMutex);
+    return mQueue.size();
+}
 
-		glBindBuffer(mType, name);
+bool GLWorkQueue::done()
+{
+    return size() == 0 && isClosed();
+}
 
-		if (!for_seed && i < LL_VBO_POOL_SEED_COUNT)
-		{ //record this miss
-			mMissCount[i]++;	
-            mMissCountDirty = true;  // signal to ::seedPool()
-		}
+void GLWorkQueue::post(const GLWorkQueue::Work& value)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    {
+        std::lock_guard<std::mutex> lock(mMutex);
+        mQueue.push(std::move(value));
+    }
 
-		if (mType == GL_ARRAY_BUFFER)
-		{
-			LLVertexBuffer::sAllocatedBytes += size;
-		}
-		else
-		{
-			LLVertexBuffer::sAllocatedIndexBytes += size;
-		}
+    mCondition.notify_one();
+}
 
-		if (LLVertexBuffer::sDisableVBOMapping || mUsage != GL_DYNAMIC_DRAW)
-		{
-			glBufferData(mType, size, 0, mUsage);
-			if (mUsage != GL_DYNAMIC_COPY)
-			{ //data will be provided by application
-				ret = (U8*) ll_aligned_malloc<64>(size);
-				if (!ret)
-				{
-                    LL_ERRS()
-                        << "Failed to allocate " << size << " bytes for LLVBOPool buffer " << name << "." << LL_NEWLINE
-                        << "Free list size: "
-                        << mFreeList.size()  // this happens if we are out of memory so a solution might be to clear some from freelist
-							  << " Allocated Bytes: " << LLVertexBuffer::sAllocatedBytes
-                        << " Allocated Index Bytes: " << LLVertexBuffer::sAllocatedIndexBytes << " Pooled Bytes: " << sBytesPooled
-                        << " Pooled Index Bytes: " << sIndexBytesPooled << LL_ENDL;
-				}
-			}
-		}
-		else
-		{ //always use a true hint of static draw when allocating non-client-backed buffers
-			glBufferData(mType, size, 0, GL_STATIC_DRAW);
-		}
+// Get the next element from the queue
+GLWorkQueue::Work GLWorkQueue::pop()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    // Lock the mutex
+    {
+        std::unique_lock<std::mutex> lock(mMutex);
 
-		glBindBuffer(mType, 0);
+        // Wait for a new element to become available or for the queue to close
+        {
+            mCondition.wait(lock, [=] { return !mQueue.empty() || mClosed; });
+        }
+    }
 
-		if (for_seed)
-		{ //put into pool for future use
-			llassert(mFreeList.size() > i);
+    Work ret;
 
-			Record rec;
-			rec.mGLName = name;
-			rec.mClientData = ret;
-	
-			if (mType == GL_ARRAY_BUFFER)
-			{
-				sBytesPooled += size;
-			}
-			else
-			{
-				sIndexBytesPooled += size;
-			}
-			mFreeList[i].push_back(rec);
-            mMissCountDirty = true;  // signal to ::seedPool()
-		}
-	}
-	else
-	{
-		name = mFreeList[i].front().mGLName;
-		ret = mFreeList[i].front().mClientData;
+    {
+        std::lock_guard<std::mutex> lock(mMutex);
 
-		if (mType == GL_ARRAY_BUFFER)
-		{
-			sBytesPooled -= size;
-		}
-		else
-		{
-			sIndexBytesPooled -= size;
-		}
+        // Get the next element from the queue
+        if (mQueue.size() > 0)
+        {
+            ret = mQueue.front();
+            mQueue.pop();
+        }
+        else
+        {
+            ret = []() {};
+        }
+    }
 
-		mFreeList[i].pop_front();
-        mMissCountDirty = true;  // signal to ::seedPool()
-	}
+    return ret;
+}
 
-	return ret;
+void GLWorkQueue::runOne()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    Work w = pop();
+    w();
+    //mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 }
 
-void LLVBOPool::release(U32 name, U8* buffer, U32 size)
+void GLWorkQueue::runUntilClose()
 {
-	llassert(vbo_block_size(size) == size);
+    while (!isClosed())
+    {
+        runOne();
+    }
+}
 
-	deleteBuffer(name);
-	ll_aligned_free_fallback((U8*) buffer);
+void GLWorkQueue::close()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    {
+        std::lock_guard<std::mutex> lock(mMutex);
+        mClosed = true;
+    }
 
-	if (mType == GL_ARRAY_BUFFER)
-	{
-		LLVertexBuffer::sAllocatedBytes -= size;
-	}
-	else
-	{
-		LLVertexBuffer::sAllocatedIndexBytes -= size;
-	}
+    mCondition.notify_all();
 }
 
-void LLVBOPool::seedPool()
+bool GLWorkQueue::isClosed()
 {
-	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
-    if (mMissCountDirty)
-	{
-	U32 dummy_name = 0;
-        U32 size       = LL_VBO_BLOCK_SIZE;
-
-	for (U32 i = 0; i < LL_VBO_POOL_SEED_COUNT; i++)
-	{
-		if (mMissCount[i] > mFreeList[i].size())
-		{ 
-			S32 count = mMissCount[i] - mFreeList[i].size();
-			for (U32 j = 0; j < count; ++j)
-			{
-				allocate(dummy_name, size, true);
-			}
-		}
-            size += LL_VBO_BLOCK_SIZE;
-        }
-        mMissCountDirty = false;
-	}
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    std::lock_guard<std::mutex> lock(mMutex);
+    return mClosed;
 }
 
-void LLVBOPool::cleanup()
+#include "llwindow.h"
+
+class LLGLWorkerThread : public LLThread
 {
-	U32 size = LL_VBO_BLOCK_SIZE;
+public:
+    LLGLWorkerThread(const std::string& name, GLWorkQueue* queue, LLWindow* window)
+        : LLThread(name)
+    {
+        mWindow = window;
+        mContext = mWindow->createSharedContext();
+        mQueue = queue;
+    }
 
-	for (U32 i = 0; i < mFreeList.size(); ++i)
-	{
-		record_list_t& l = mFreeList[i];
+    void run() override
+    {
+        mWindow->makeContextCurrent(mContext);
+        gGL.init(false);
+        mQueue->runUntilClose();
+        gGL.shutdown();
+        mWindow->destroySharedContext(mContext);
+    }
 
-		while (!l.empty())
-		{
-			Record& r = l.front();
+    GLWorkQueue* mQueue;
+    LLWindow* mWindow;
+    void* mContext = nullptr;
+};
 
-			deleteBuffer(r.mGLName);
-			
-			if (r.mClientData)
-			{
-				ll_aligned_free<64>((void*) r.mClientData);
-			}
 
-			l.pop_front();
+static LLGLWorkerThread* sVBOThread[THREAD_COUNT];
+static GLWorkQueue* sQueue = nullptr;
 
-			if (mType == GL_ARRAY_BUFFER)
-			{
-				sBytesPooled -= size;
-				LLVertexBuffer::sAllocatedBytes -= size;
-			}
-			else
-			{
-				sIndexBytesPooled -= size;
-				LLVertexBuffer::sAllocatedIndexBytes -= size;
-			}
-		}
+#endif
 
-		size += LL_VBO_BLOCK_SIZE;
-	}
+//============================================================================
 
-	//reset miss counts
-	std::fill(mMissCount.begin(), mMissCount.end(), 0);
-}
+//static
+std::list<U32> LLVertexBuffer::sAvailableVAOName;
+U32 LLVertexBuffer::sCurVAOName = 1;
+
+U32 LLVertexBuffer::sAllocatedIndexBytes = 0;
+U32 LLVertexBuffer::sIndexCount = 0;
+
+U32 LLVertexBuffer::sBindCount = 0;
+U32 LLVertexBuffer::sSetCount = 0;
+S32 LLVertexBuffer::sCount = 0;
+S32 LLVertexBuffer::sGLCount = 0;
+S32 LLVertexBuffer::sMappedCount = 0;
+bool LLVertexBuffer::sDisableVBOMapping = false;
+bool LLVertexBuffer::sEnableVBOs = true;
+U32 LLVertexBuffer::sGLRenderBuffer = 0;
+U32 LLVertexBuffer::sGLRenderArray = 0;
+U32 LLVertexBuffer::sGLRenderIndices = 0;
+U32 LLVertexBuffer::sLastMask = 0;
+bool LLVertexBuffer::sVBOActive = false;
+bool LLVertexBuffer::sIBOActive = false;
+U32 LLVertexBuffer::sAllocatedBytes = 0;
+U32 LLVertexBuffer::sVertexCount = 0;
+bool LLVertexBuffer::sMapped = false;
+bool LLVertexBuffer::sUseStreamDraw = true;
+bool LLVertexBuffer::sUseVAO = false;
+bool LLVertexBuffer::sPreferStreamDraw = false;
 
 
 //NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware
@@ -420,17 +379,6 @@ void LLVertexBuffer::releaseVAOName(U32 name)
 
 
 //static
-void LLVertexBuffer::seedPools()
-{
-	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
-	sStreamVBOPool.seedPool();
-	sDynamicVBOPool.seedPool();
-	sDynamicCopyVBOPool.seedPool();
-	sStreamIBOPool.seedPool();
-	sDynamicIBOPool.seedPool();
-}
-
-//static
 void LLVertexBuffer::setupClientArrays(U32 data_mask)
 {
 	if (sLastMask != data_mask)
@@ -473,7 +421,7 @@ void LLVertexBuffer::drawArrays(U32 mode, const std::vector<LLVector3>& pos)
     }
     gGL.end();
     gGL.flush();
-		}
+}
 
 //static
 void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVector2* tc, S32 num_indices, const U16* indicesp)
@@ -704,10 +652,20 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
 }
 
 //static
-void LLVertexBuffer::initClass(bool use_vbo, bool no_vbo_mapping)
+void LLVertexBuffer::initClass(LLWindow* window)
 {
-    sEnableVBOs = use_vbo;
-	sDisableVBOMapping = sEnableVBOs && no_vbo_mapping;
+    sEnableVBOs = true;
+    sDisableVBOMapping = true;
+
+#if ENABLE_GL_WORK_QUEUE
+    sQueue = new GLWorkQueue();
+
+    for (int i = 0; i < THREAD_COUNT; ++i)
+    {
+        sVBOThread[i] = new LLGLWorkerThread("VBO Worker", sQueue, window);
+        sVBOThread[i]->start();
+    }
+#endif
 }
 
 //static 
@@ -743,14 +701,19 @@ void LLVertexBuffer::cleanupClass()
 {
 	unbind();
 	
-	sStreamIBOPool.cleanup();
-	sDynamicIBOPool.cleanup();
-	sStreamVBOPool.cleanup();
-	sDynamicVBOPool.cleanup();
-	sDynamicCopyVBOPool.cleanup();
-
-    llassert(0 == LLVBOPool::sBytesPooled);
-    llassert(0 == LLVBOPool::sIndexBytesPooled);
+#if ENABLE_GL_WORK_QUEUE
+    sQueue->close();
+    for (int i = 0; i < THREAD_COUNT; ++i)
+    {
+        sVBOThread[i]->shutdown();
+        delete sVBOThread[i];
+        sVBOThread[i] = nullptr;
+    }
+
+    delete sQueue;
+    sQueue = nullptr;
+#endif
+
     //llassert(0 == sAllocatedBytes);
     //llassert(0 == sAllocatedIndexBytes);
 }
@@ -781,21 +744,6 @@ S32 LLVertexBuffer::determineUsage(S32 usage)
 		ret_usage = GL_STREAM_DRAW;
 	}
 	
-	if (ret_usage && ret_usage != GL_STREAM_DRAW)
-	{ //only stream_draw and dynamic_draw are supported when using VBOs, dynamic draw is the default
-		if (ret_usage != GL_DYNAMIC_COPY)
-		{
-		    if (sDisableVBOMapping)
-		    { //always use stream draw if VBO mapping is disabled
-			    ret_usage = GL_STREAM_DRAW;
-		    }
-		    else
-		    {
-			    ret_usage = GL_DYNAMIC_DRAW;
-		    }
-	    }
-	}
-	
 	return ret_usage;
 }
 
@@ -848,7 +796,7 @@ S32 LLVertexBuffer::calcOffsets(const U32& typemask, S32* offsets, S32 num_verti
 
 	offsets[TYPE_TEXTURE_INDEX] = offsets[TYPE_VERTEX] + 12;
 	
-	return offset+16;
+	return offset;
 }
 
 //static 
@@ -896,74 +844,101 @@ LLVertexBuffer::~LLVertexBuffer()
 
 //----------------------------------------------------------------------------
 
+// batch glGenBuffers
+static GLuint gen_buffer()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    constexpr U32 pool_size = 4096;
+
+    thread_local static GLuint sNamePool[pool_size];
+    thread_local static U32 sIndex = 0;
+
+    if (sIndex == 0)
+    {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo");
+        sIndex = pool_size;
+        glGenBuffers(pool_size, sNamePool);
+    }
+
+    return sNamePool[--sIndex];
+}
+
+// batch glDeleteBuffers
+static void release_buffer(U32 buff)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+#if 0
+
+    constexpr U32 pool_size = 4096;
+
+    thread_local static GLuint sNamePool[pool_size];
+    thread_local static U32 sIndex = 0;
+
+    if (sIndex == pool_size)
+    {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo");
+        sIndex = 0;
+        glDeleteBuffers(pool_size, sNamePool);
+    }
+
+    sNamePool[sIndex++] = buff;
+#else
+    glDeleteBuffers(1, &buff);
+#endif
+}
+
 void LLVertexBuffer::genBuffer(U32 size)
 {
-	mSize = vbo_block_size(size);
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
 
-	if (mUsage == GL_STREAM_DRAW)
-	{
-		mMappedData = sStreamVBOPool.allocate(mGLBuffer, mSize);
-	}
-	else if (mUsage == GL_DYNAMIC_DRAW)
-	{
-		mMappedData = sDynamicVBOPool.allocate(mGLBuffer, mSize);
-	}
-	else
-	{
-		mMappedData = sDynamicCopyVBOPool.allocate(mGLBuffer, mSize);
-	}
-	
-	
-	sGLCount++;
+    mSize = size;
+    mMappedData = (U8*) ll_aligned_malloc_16(size);
+    mGLBuffer = gen_buffer();
+
+    glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+    glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage);
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+    sGLCount++;
 }
 
 void LLVertexBuffer::genIndices(U32 size)
 {
-	mIndicesSize = vbo_block_size(size);
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+
+    mIndicesSize = size;
+    mMappedIndexData = (U8*) ll_aligned_malloc_16(size);
+
+    mGLIndices = gen_buffer();
+
+    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+    glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, nullptr, mUsage);
+    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
 
-	if (mUsage == GL_STREAM_DRAW)
-	{
-		mMappedIndexData = sStreamIBOPool.allocate(mGLIndices, mIndicesSize);
-	}
-	else
-	{
-		mMappedIndexData = sDynamicIBOPool.allocate(mGLIndices, mIndicesSize);
-	}
-	
 	sGLCount++;
 }
 
 void LLVertexBuffer::releaseBuffer()
 {
-	if (mUsage == GL_STREAM_DRAW)
-	{
-		sStreamVBOPool.release(mGLBuffer, mMappedData, mSize);
-	}
-	else
-	{
-		sDynamicVBOPool.release(mGLBuffer, mMappedData, mSize);
-	}
-	
-	mGLBuffer = 0;
-	mMappedData = NULL;
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    release_buffer(mGLBuffer);
+    mGLBuffer = 0;
 
+    ll_aligned_free_16(mMappedData);
+    mMappedData = nullptr;
+	
 	sGLCount--;
 }
 
 void LLVertexBuffer::releaseIndices()
 {
-	if (mUsage == GL_STREAM_DRAW)
-	{
-		sStreamIBOPool.release(mGLIndices, mMappedIndexData, mIndicesSize);
-	}
-	else
-	{
-		sDynamicIBOPool.release(mGLIndices, mMappedIndexData, mIndicesSize);
-	}
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    release_buffer(mGLIndices);
+    mGLIndices = 0;
+
+    ll_aligned_free_16(mMappedIndexData);
+    mMappedIndexData = nullptr;
 
-	mGLIndices = 0;
-	mMappedIndexData = NULL;
-	
 	sGLCount--;
 }
 
@@ -1183,21 +1158,20 @@ bool LLVertexBuffer::useVBOs() const
 
 //----------------------------------------------------------------------------
 
-bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count)
+// if no gap between region and given range exists, expand region to cover given range and return true
+// otherwise return false
+bool expand_region(LLVertexBuffer::MappedRegion& region, S32 start, S32 end)
 {
-	S32 end = index+count;
-	S32 region_end = region.mIndex+region.mCount;
 	
-	if (end < region.mIndex ||
-		index > region_end)
+	if (end < region.mStart ||
+		start > region.mEnd)
 	{ //gap exists, do not merge
 		return false;
 	}
 
-	S32 new_end = llmax(end, region_end);
-	S32 new_index = llmin(index, region.mIndex);
-	region.mIndex = new_index;
-	region.mCount = new_end-new_index;
+    region.mStart = llmin(region.mStart, start);
+    region.mEnd = llmax(region.mEnd, end);
+
 	return true;
 }
 
@@ -1215,34 +1189,34 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran
 	{
 		LL_ERRS() << "LLVertexBuffer::mapVertexBuffer() called on unallocated buffer." << LL_ENDL;
 	}
-		
-	if (useVBOs())
-	{
-		if (count == -1)
-		{
-			count = mNumVerts-index;
-		}
 
-		bool mapped = false;
-		//see if range is already mapped
+
+    if (useVBOs())
+    {
+        if (count == -1)
+        {
+            count = mNumVerts - index;
+        }
+
+        S32 start = mOffsets[type] + sTypeSize[type] * index;
+        S32 end = start + sTypeSize[type] * count;
+
+		bool flagged = false;
+		// flag region as mapped
 		for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
 		{
 			MappedRegion& region = mMappedVertexRegions[i];
-			if (region.mType == type)
-			{
-				if (expand_region(region, index, count))
-				{
-					mapped = true;
-					break;
-				}
-			}
+            if (expand_region(region, start, end))
+            {
+                flagged = true;
+                break;
+            }
 		}
 
-		if (!mapped)
+		if (!flagged)
 		{
-			//not already mapped, map new region
-			MappedRegion region(type, index, count);
-			mMappedVertexRegions.push_back(region);
+			//didn't expand an existing region, make a new one
+            mMappedVertexRegions.push_back({ start, end });
 		}
 
 		if (mVertexLocked && map_range)
@@ -1299,25 +1273,26 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
 			count = mNumIndices-index;
 		}
 
-		bool mapped = false;
-		//see if range is already mapped
-		for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
-		{
-			MappedRegion& region = mMappedIndexRegions[i];
-			if (expand_region(region, index, count))
-			{
-				mapped = true;
-				break;
-			}
-		}
+        S32 start = sizeof(U16) * index;
+        S32 end = start + sizeof(U16) * count;
 
-		if (!mapped)
-		{
-			//not already mapped, map new region
-			MappedRegion region(TYPE_INDEX, index, count);
-			mMappedIndexRegions.push_back(region);
-		}
-		
+        bool flagged = false;
+        // flag region as mapped
+        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+        {
+            MappedRegion& region = mMappedIndexRegions[i];
+            if (expand_region(region, start, end))
+            {
+                flagged = true;
+                break;
+            }
+        }
+
+        if (!flagged)
+        {
+            //didn't expand an existing region, make a new one
+            mMappedIndexRegions.push_back({ start, end });
+        }
 
 		if (mIndexLocked && map_range)
 		{
@@ -1360,6 +1335,27 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
     return mMappedIndexData + sizeof(U16)*index;
 }
 
+static void flush_vbo(GLenum target, S32 start, S32 end, void* data)
+{
+    if (end != 0)
+    {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
+        LL_PROFILE_ZONE_NUM(start);
+        LL_PROFILE_ZONE_NUM(end);
+        LL_PROFILE_ZONE_NUM(end-start);
+
+        constexpr S32 block_size = 65536;
+
+        for (S32 i = start; i < end; i += block_size)
+        {
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+            LL_PROFILE_GPU_ZONE("glBufferSubData");
+            S32 tend = llmin(i + block_size, end);
+            glBufferSubData(target, i, tend - i, (U8*) data + (i-start));
+        }
+    }
+}
+
 void LLVertexBuffer::unmapBuffer()
 {
 	if (!useVBOs())
@@ -1377,37 +1373,31 @@ void LLVertexBuffer::unmapBuffer()
 
 		if (!mMappedVertexRegions.empty())
 		{
-			stop_glerror();
+            S32 start = 0;
+            S32 end = 0;
+
 			for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
 			{
 				const MappedRegion& region = mMappedVertexRegions[i];
-				S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0;
-				S32 length = sTypeSize[region.mType]*region.mCount;
-				if (mSize >= length + offset)
-				{
-					glBufferSubData(GL_ARRAY_BUFFER, offset, length, (U8*)mMappedData + offset);
-				}
-				else
-				{
-					GLint size = 0;
-					glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &size);
-					LL_WARNS() << "Attempted to map regions to a buffer that is too small, " 
-						<< "mapped size: " << mSize
-						<< ", gl buffer size: " << size
-						<< ", length: " << length
-						<< ", offset: " << offset
-						<< LL_ENDL;
-				}
-				stop_glerror();
+                if (region.mStart == end + 1)
+                {
+                    end = region.mEnd;
+                }
+                else
+                {
+                    flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+                    start = region.mStart;
+                    end = region.mEnd;
+                }
 			}
 
+            flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+
 			mMappedVertexRegions.clear();
 		}
 		else
 		{
-			stop_glerror();
-			glBufferSubData(GL_ARRAY_BUFFER, 0, getSize(), (U8*) mMappedData);
-			stop_glerror();
+            llassert(false); // this shouldn't happen -- a buffer must always be explicitly mapped
 		}
 		
 		mVertexLocked = false;
@@ -1421,36 +1411,31 @@ void LLVertexBuffer::unmapBuffer()
 		
 		if (!mMappedIndexRegions.empty())
 		{
-			for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
-			{
-				const MappedRegion& region = mMappedIndexRegions[i];
-				S32 offset = region.mIndex >= 0 ? sizeof(U16)*region.mIndex : 0;
-				S32 length = sizeof(U16)*region.mCount;
-				if (mIndicesSize >= length + offset)
-				{
-					glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, offset, length, (U8*) mMappedIndexData+offset);
-				}
-				else
-				{
-					GLint size = 0;
-					glGetBufferParameteriv(GL_ELEMENT_ARRAY_BUFFER, GL_BUFFER_SIZE, &size);
-					LL_WARNS() << "Attempted to map regions to a buffer that is too small, " 
-						<< "mapped size: " << mIndicesSize
-						<< ", gl buffer size: " << size
-						<< ", length: " << length
-						<< ", offset: " << offset
-						<< LL_ENDL;
-				}
-				stop_glerror();
-			}
+            S32 start = 0;
+            S32 end = 0;
+
+            for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+            {
+                const MappedRegion& region = mMappedIndexRegions[i];
+                if (region.mStart == end + 1)
+                {
+                    end = region.mEnd;
+                }
+                else
+                {
+                    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
+                    start = region.mStart;
+                    end = region.mEnd;
+                }
+            }
+
+            flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
 
 			mMappedIndexRegions.clear();
 		}
 		else
 		{
-			stop_glerror();
-			glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, getIndicesSize(), (U8*) mMappedIndexData);
-			stop_glerror();
+            llassert(false); // this shouldn't happen -- a buffer must always be explicitly mapped
 		}
 		
 		mIndexLocked = false;
@@ -1640,11 +1625,53 @@ bool LLVertexBuffer::bindGLIndicesFast()
     return false;
 }
 
-void LLVertexBuffer::flush()
+void LLVertexBuffer::flush(bool discard)
 {
 	if (useVBOs())
 	{
-		unmapBuffer();
+        if (discard)
+        { // discard existing VBO data if the buffer must be updated
+            
+            if (!mMappedVertexRegions.empty())
+            {
+                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard vbo");
+                LL_PROFILE_ZONE_NUM(mSize);
+                release_buffer(mGLBuffer);
+                mGLBuffer = gen_buffer();
+                bindGLBuffer();
+                {
+                    LL_PROFILE_GPU_ZONE("glBufferData");
+                    glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage);
+
+                    for (int i = 0; i < mSize; i += 65536)
+                    {
+                        LL_PROFILE_GPU_ZONE("glBufferSubData");
+                        S32 end = llmin(i + 65536, mSize);
+                        S32 count = end - i;
+                        glBufferSubData(GL_ARRAY_BUFFER, i, count, mMappedData + i);
+                    }
+                }
+                mMappedVertexRegions.clear();
+            }
+            if (!mMappedIndexRegions.empty())
+            {
+                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard ibo");
+                LL_PROFILE_ZONE_NUM(mIndicesSize);
+                release_buffer(mGLIndices);
+                mGLIndices = gen_buffer();
+                bindGLIndices();
+                {
+                    LL_PROFILE_GPU_ZONE("glBufferData (ibo)");
+                    glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, mUsage);
+                }
+                mMappedIndexRegions.clear();
+            }
+        }
+        else
+        {
+            unmapBuffer();
+        }
+
 	}
 }
 
@@ -2045,10 +2072,4 @@ void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)
     }
 	}
 
-LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count)
-: mType(type), mIndex(index), mCount(count)
-{ 
-	mEnd = mIndex+mCount;	
-}	
-
author	Dave Parks <davep@lindenlab.com>	2023-01-10 17:36:05 -0600
committer	Dave Parks <davep@lindenlab.com>	2023-01-10 17:36:05 -0600
commit	fdc0ea64f050ad09a84442f40396bb9e6497ce52 (patch)
tree	306e79ea83514488f28402ee8a65f2dc603b4f4f /indra/llrender/llvertexbuffer.cpp
parent	4abf39c968c31a9da943a53434388102b99d487f (diff)