summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2023-01-10 17:36:05 -0600
committerDave Parks <davep@lindenlab.com>2023-01-10 17:36:05 -0600
commitfdc0ea64f050ad09a84442f40396bb9e6497ce52 (patch)
tree306e79ea83514488f28402ee8a65f2dc603b4f4f
parent4abf39c968c31a9da943a53434388102b99d487f (diff)
SL-18869 Optimizations -- LLVertexBuffer overhaul and shuffle of shadow map rendering to a place where the main camera has taken a stab at object updates for this frame before shadow map rendering has at them.
-rw-r--r--indra/llrender/llvertexbuffer.cpp825
-rw-r--r--indra/llrender/llvertexbuffer.h69
-rw-r--r--indra/newview/llviewerdisplay.cpp17
-rw-r--r--indra/newview/llviewerwindow.cpp8
-rw-r--r--indra/newview/pipeline.cpp17
5 files changed, 446 insertions, 490 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 7b8f85acba..20261dcb8a 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -62,6 +62,14 @@ U32 wpo2(U32 i)
return r;
}
+struct CompareMappedRegion
+{
+ bool operator()(const LLVertexBuffer::MappedRegion& lhs, const LLVertexBuffer::MappedRegion& rhs)
+ {
+ return lhs.mStart < rhs.mStart;
+ }
+};
+
const U32 LL_VBO_BLOCK_SIZE = 2048;
const U32 LL_VBO_POOL_MAX_SEED_SIZE = 256*1024;
@@ -81,266 +89,217 @@ U32 vbo_block_index(U32 size)
const U32 LL_VBO_POOL_SEED_COUNT = vbo_block_index(LL_VBO_POOL_MAX_SEED_SIZE) + 1;
+#define ENABLE_GL_WORK_QUEUE 0
+
+#if ENABLE_GL_WORK_QUEUE
+
+#define THREAD_COUNT 1
//============================================================================
-//static
-LLVBOPool LLVertexBuffer::sStreamVBOPool(GL_STREAM_DRAW, GL_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sDynamicVBOPool(GL_DYNAMIC_DRAW, GL_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sDynamicCopyVBOPool(GL_DYNAMIC_COPY, GL_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sStreamIBOPool(GL_STREAM_DRAW, GL_ELEMENT_ARRAY_BUFFER);
-LLVBOPool LLVertexBuffer::sDynamicIBOPool(GL_DYNAMIC_DRAW, GL_ELEMENT_ARRAY_BUFFER);
+// High performance WorkQueue for usage in real-time rendering work
+class GLWorkQueue
+{
+public:
+ using Work = std::function<void()>;
-U32 LLVBOPool::sBytesPooled = 0;
-U32 LLVBOPool::sIndexBytesPooled = 0;
-U32 LLVBOPool::sNameIdx = 0;
-U32 LLVBOPool::sNamePool[1024];
+ GLWorkQueue();
-std::list<U32> LLVertexBuffer::sAvailableVAOName;
-U32 LLVertexBuffer::sCurVAOName = 1;
+ void post(const Work& value);
-U32 LLVertexBuffer::sAllocatedIndexBytes = 0;
-U32 LLVertexBuffer::sIndexCount = 0;
+ size_t size();
-U32 LLVertexBuffer::sBindCount = 0;
-U32 LLVertexBuffer::sSetCount = 0;
-S32 LLVertexBuffer::sCount = 0;
-S32 LLVertexBuffer::sGLCount = 0;
-S32 LLVertexBuffer::sMappedCount = 0;
-bool LLVertexBuffer::sDisableVBOMapping = false;
-bool LLVertexBuffer::sEnableVBOs = true;
-U32 LLVertexBuffer::sGLRenderBuffer = 0;
-U32 LLVertexBuffer::sGLRenderArray = 0;
-U32 LLVertexBuffer::sGLRenderIndices = 0;
-U32 LLVertexBuffer::sLastMask = 0;
-bool LLVertexBuffer::sVBOActive = false;
-bool LLVertexBuffer::sIBOActive = false;
-U32 LLVertexBuffer::sAllocatedBytes = 0;
-U32 LLVertexBuffer::sVertexCount = 0;
-bool LLVertexBuffer::sMapped = false;
-bool LLVertexBuffer::sUseStreamDraw = true;
-bool LLVertexBuffer::sUseVAO = false;
-bool LLVertexBuffer::sPreferStreamDraw = false;
+ bool done();
-U32 LLVBOPool::genBuffer()
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
+ // Get the next element from the queue
+ Work pop();
- if (sNameIdx == 0)
- {
- glGenBuffers(1024, sNamePool);
- sNameIdx = 1024;
- }
+ void runOne();
- return sNamePool[--sNameIdx];
-}
+ bool runPending();
-void LLVBOPool::deleteBuffer(U32 name)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
- if (gGLManager.mInited)
- {
- LLVertexBuffer::unbind();
+ void runUntilClose();
- glBindBuffer(mType, name);
- glBufferData(mType, 0, NULL, mUsage);
- glBindBuffer(mType, 0);
+ void close();
- glDeleteBuffers(1, &name);
- }
-}
+ bool isClosed();
+ void syncGL();
-LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType)
-: mUsage(vboUsage), mType(vboType), mMissCountDirty(true)
-{
- mFreeList.resize(LL_VBO_POOL_SEED_COUNT);
- mMissCount.resize(LL_VBO_POOL_SEED_COUNT);
- std::fill(mMissCount.begin(), mMissCount.end(), 0);
-}
+private:
+ std::mutex mMutex;
+ std::condition_variable mCondition;
+ std::queue<Work> mQueue;
+ bool mClosed = false;
+};
-U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed)
+GLWorkQueue::GLWorkQueue()
{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
- llassert(vbo_block_size(size) == size);
-
- U8* ret = NULL;
- U32 i = vbo_block_index(size);
+}
- if (mFreeList.size() <= i)
- {
- mFreeList.resize(i+1);
- }
+void GLWorkQueue::syncGL()
+{
+ /*if (mSync)
+ {
+ std::lock_guard<std::mutex> lock(mMutex);
+ glWaitSync(mSync, 0, GL_TIMEOUT_IGNORED);
+ mSync = 0;
+ }*/
+}
- if (mFreeList[i].empty() || for_seed)
- {
- //make a new buffer
- name = genBuffer();
+size_t GLWorkQueue::size()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+ std::lock_guard<std::mutex> lock(mMutex);
+ return mQueue.size();
+}
- glBindBuffer(mType, name);
+bool GLWorkQueue::done()
+{
+ return size() == 0 && isClosed();
+}
- if (!for_seed && i < LL_VBO_POOL_SEED_COUNT)
- { //record this miss
- mMissCount[i]++;
- mMissCountDirty = true; // signal to ::seedPool()
- }
+void GLWorkQueue::post(const GLWorkQueue::Work& value)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+ {
+ std::lock_guard<std::mutex> lock(mMutex);
+ mQueue.push(std::move(value));
+ }
- if (mType == GL_ARRAY_BUFFER)
- {
- LLVertexBuffer::sAllocatedBytes += size;
- }
- else
- {
- LLVertexBuffer::sAllocatedIndexBytes += size;
- }
+ mCondition.notify_one();
+}
- if (LLVertexBuffer::sDisableVBOMapping || mUsage != GL_DYNAMIC_DRAW)
- {
- glBufferData(mType, size, 0, mUsage);
- if (mUsage != GL_DYNAMIC_COPY)
- { //data will be provided by application
- ret = (U8*) ll_aligned_malloc<64>(size);
- if (!ret)
- {
- LL_ERRS()
- << "Failed to allocate " << size << " bytes for LLVBOPool buffer " << name << "." << LL_NEWLINE
- << "Free list size: "
- << mFreeList.size() // this happens if we are out of memory so a solution might be to clear some from freelist
- << " Allocated Bytes: " << LLVertexBuffer::sAllocatedBytes
- << " Allocated Index Bytes: " << LLVertexBuffer::sAllocatedIndexBytes << " Pooled Bytes: " << sBytesPooled
- << " Pooled Index Bytes: " << sIndexBytesPooled << LL_ENDL;
- }
- }
- }
- else
- { //always use a true hint of static draw when allocating non-client-backed buffers
- glBufferData(mType, size, 0, GL_STATIC_DRAW);
- }
+// Get the next element from the queue
+GLWorkQueue::Work GLWorkQueue::pop()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+ // Lock the mutex
+ {
+ std::unique_lock<std::mutex> lock(mMutex);
- glBindBuffer(mType, 0);
+ // Wait for a new element to become available or for the queue to close
+ {
+ mCondition.wait(lock, [=] { return !mQueue.empty() || mClosed; });
+ }
+ }
- if (for_seed)
- { //put into pool for future use
- llassert(mFreeList.size() > i);
+ Work ret;
- Record rec;
- rec.mGLName = name;
- rec.mClientData = ret;
-
- if (mType == GL_ARRAY_BUFFER)
- {
- sBytesPooled += size;
- }
- else
- {
- sIndexBytesPooled += size;
- }
- mFreeList[i].push_back(rec);
- mMissCountDirty = true; // signal to ::seedPool()
- }
- }
- else
- {
- name = mFreeList[i].front().mGLName;
- ret = mFreeList[i].front().mClientData;
+ {
+ std::lock_guard<std::mutex> lock(mMutex);
- if (mType == GL_ARRAY_BUFFER)
- {
- sBytesPooled -= size;
- }
- else
- {
- sIndexBytesPooled -= size;
- }
+ // Get the next element from the queue
+ if (mQueue.size() > 0)
+ {
+ ret = mQueue.front();
+ mQueue.pop();
+ }
+ else
+ {
+ ret = []() {};
+ }
+ }
- mFreeList[i].pop_front();
- mMissCountDirty = true; // signal to ::seedPool()
- }
+ return ret;
+}
- return ret;
+void GLWorkQueue::runOne()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+ Work w = pop();
+ w();
+ //mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
-void LLVBOPool::release(U32 name, U8* buffer, U32 size)
+void GLWorkQueue::runUntilClose()
{
- llassert(vbo_block_size(size) == size);
+ while (!isClosed())
+ {
+ runOne();
+ }
+}
- deleteBuffer(name);
- ll_aligned_free_fallback((U8*) buffer);
+void GLWorkQueue::close()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+ {
+ std::lock_guard<std::mutex> lock(mMutex);
+ mClosed = true;
+ }
- if (mType == GL_ARRAY_BUFFER)
- {
- LLVertexBuffer::sAllocatedBytes -= size;
- }
- else
- {
- LLVertexBuffer::sAllocatedIndexBytes -= size;
- }
+ mCondition.notify_all();
}
-void LLVBOPool::seedPool()
+bool GLWorkQueue::isClosed()
{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
- if (mMissCountDirty)
- {
- U32 dummy_name = 0;
- U32 size = LL_VBO_BLOCK_SIZE;
-
- for (U32 i = 0; i < LL_VBO_POOL_SEED_COUNT; i++)
- {
- if (mMissCount[i] > mFreeList[i].size())
- {
- S32 count = mMissCount[i] - mFreeList[i].size();
- for (U32 j = 0; j < count; ++j)
- {
- allocate(dummy_name, size, true);
- }
- }
- size += LL_VBO_BLOCK_SIZE;
- }
- mMissCountDirty = false;
- }
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+ std::lock_guard<std::mutex> lock(mMutex);
+ return mClosed;
}
-void LLVBOPool::cleanup()
+#include "llwindow.h"
+
+class LLGLWorkerThread : public LLThread
{
- U32 size = LL_VBO_BLOCK_SIZE;
+public:
+ LLGLWorkerThread(const std::string& name, GLWorkQueue* queue, LLWindow* window)
+ : LLThread(name)
+ {
+ mWindow = window;
+ mContext = mWindow->createSharedContext();
+ mQueue = queue;
+ }
- for (U32 i = 0; i < mFreeList.size(); ++i)
- {
- record_list_t& l = mFreeList[i];
+ void run() override
+ {
+ mWindow->makeContextCurrent(mContext);
+ gGL.init(false);
+ mQueue->runUntilClose();
+ gGL.shutdown();
+ mWindow->destroySharedContext(mContext);
+ }
- while (!l.empty())
- {
- Record& r = l.front();
+ GLWorkQueue* mQueue;
+ LLWindow* mWindow;
+ void* mContext = nullptr;
+};
- deleteBuffer(r.mGLName);
-
- if (r.mClientData)
- {
- ll_aligned_free<64>((void*) r.mClientData);
- }
- l.pop_front();
+static LLGLWorkerThread* sVBOThread[THREAD_COUNT];
+static GLWorkQueue* sQueue = nullptr;
- if (mType == GL_ARRAY_BUFFER)
- {
- sBytesPooled -= size;
- LLVertexBuffer::sAllocatedBytes -= size;
- }
- else
- {
- sIndexBytesPooled -= size;
- LLVertexBuffer::sAllocatedIndexBytes -= size;
- }
- }
+#endif
- size += LL_VBO_BLOCK_SIZE;
- }
+//============================================================================
- //reset miss counts
- std::fill(mMissCount.begin(), mMissCount.end(), 0);
-}
+//static
+std::list<U32> LLVertexBuffer::sAvailableVAOName;
+U32 LLVertexBuffer::sCurVAOName = 1;
+
+U32 LLVertexBuffer::sAllocatedIndexBytes = 0;
+U32 LLVertexBuffer::sIndexCount = 0;
+
+U32 LLVertexBuffer::sBindCount = 0;
+U32 LLVertexBuffer::sSetCount = 0;
+S32 LLVertexBuffer::sCount = 0;
+S32 LLVertexBuffer::sGLCount = 0;
+S32 LLVertexBuffer::sMappedCount = 0;
+bool LLVertexBuffer::sDisableVBOMapping = false;
+bool LLVertexBuffer::sEnableVBOs = true;
+U32 LLVertexBuffer::sGLRenderBuffer = 0;
+U32 LLVertexBuffer::sGLRenderArray = 0;
+U32 LLVertexBuffer::sGLRenderIndices = 0;
+U32 LLVertexBuffer::sLastMask = 0;
+bool LLVertexBuffer::sVBOActive = false;
+bool LLVertexBuffer::sIBOActive = false;
+U32 LLVertexBuffer::sAllocatedBytes = 0;
+U32 LLVertexBuffer::sVertexCount = 0;
+bool LLVertexBuffer::sMapped = false;
+bool LLVertexBuffer::sUseStreamDraw = true;
+bool LLVertexBuffer::sUseVAO = false;
+bool LLVertexBuffer::sPreferStreamDraw = false;
//NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware
@@ -420,17 +379,6 @@ void LLVertexBuffer::releaseVAOName(U32 name)
//static
-void LLVertexBuffer::seedPools()
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
- sStreamVBOPool.seedPool();
- sDynamicVBOPool.seedPool();
- sDynamicCopyVBOPool.seedPool();
- sStreamIBOPool.seedPool();
- sDynamicIBOPool.seedPool();
-}
-
-//static
void LLVertexBuffer::setupClientArrays(U32 data_mask)
{
if (sLastMask != data_mask)
@@ -473,7 +421,7 @@ void LLVertexBuffer::drawArrays(U32 mode, const std::vector<LLVector3>& pos)
}
gGL.end();
gGL.flush();
- }
+}
//static
void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVector2* tc, S32 num_indices, const U16* indicesp)
@@ -704,10 +652,20 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
}
//static
-void LLVertexBuffer::initClass(bool use_vbo, bool no_vbo_mapping)
+void LLVertexBuffer::initClass(LLWindow* window)
{
- sEnableVBOs = use_vbo;
- sDisableVBOMapping = sEnableVBOs && no_vbo_mapping;
+ sEnableVBOs = true;
+ sDisableVBOMapping = true;
+
+#if ENABLE_GL_WORK_QUEUE
+ sQueue = new GLWorkQueue();
+
+ for (int i = 0; i < THREAD_COUNT; ++i)
+ {
+ sVBOThread[i] = new LLGLWorkerThread("VBO Worker", sQueue, window);
+ sVBOThread[i]->start();
+ }
+#endif
}
//static
@@ -743,14 +701,19 @@ void LLVertexBuffer::cleanupClass()
{
unbind();
- sStreamIBOPool.cleanup();
- sDynamicIBOPool.cleanup();
- sStreamVBOPool.cleanup();
- sDynamicVBOPool.cleanup();
- sDynamicCopyVBOPool.cleanup();
-
- llassert(0 == LLVBOPool::sBytesPooled);
- llassert(0 == LLVBOPool::sIndexBytesPooled);
+#if ENABLE_GL_WORK_QUEUE
+ sQueue->close();
+ for (int i = 0; i < THREAD_COUNT; ++i)
+ {
+ sVBOThread[i]->shutdown();
+ delete sVBOThread[i];
+ sVBOThread[i] = nullptr;
+ }
+
+ delete sQueue;
+ sQueue = nullptr;
+#endif
+
//llassert(0 == sAllocatedBytes);
//llassert(0 == sAllocatedIndexBytes);
}
@@ -781,21 +744,6 @@ S32 LLVertexBuffer::determineUsage(S32 usage)
ret_usage = GL_STREAM_DRAW;
}
- if (ret_usage && ret_usage != GL_STREAM_DRAW)
- { //only stream_draw and dynamic_draw are supported when using VBOs, dynamic draw is the default
- if (ret_usage != GL_DYNAMIC_COPY)
- {
- if (sDisableVBOMapping)
- { //always use stream draw if VBO mapping is disabled
- ret_usage = GL_STREAM_DRAW;
- }
- else
- {
- ret_usage = GL_DYNAMIC_DRAW;
- }
- }
- }
-
return ret_usage;
}
@@ -848,7 +796,7 @@ S32 LLVertexBuffer::calcOffsets(const U32& typemask, S32* offsets, S32 num_verti
offsets[TYPE_TEXTURE_INDEX] = offsets[TYPE_VERTEX] + 12;
- return offset+16;
+ return offset;
}
//static
@@ -896,74 +844,101 @@ LLVertexBuffer::~LLVertexBuffer()
//----------------------------------------------------------------------------
+// batch glGenBuffers
+static GLuint gen_buffer()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ constexpr U32 pool_size = 4096;
+
+ thread_local static GLuint sNamePool[pool_size];
+ thread_local static U32 sIndex = 0;
+
+ if (sIndex == 0)
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo");
+ sIndex = pool_size;
+ glGenBuffers(pool_size, sNamePool);
+ }
+
+ return sNamePool[--sIndex];
+}
+
+// batch glDeleteBuffers
+static void release_buffer(U32 buff)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+#if 0
+
+ constexpr U32 pool_size = 4096;
+
+ thread_local static GLuint sNamePool[pool_size];
+ thread_local static U32 sIndex = 0;
+
+ if (sIndex == pool_size)
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo");
+ sIndex = 0;
+ glDeleteBuffers(pool_size, sNamePool);
+ }
+
+ sNamePool[sIndex++] = buff;
+#else
+ glDeleteBuffers(1, &buff);
+#endif
+}
+
void LLVertexBuffer::genBuffer(U32 size)
{
- mSize = vbo_block_size(size);
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
- if (mUsage == GL_STREAM_DRAW)
- {
- mMappedData = sStreamVBOPool.allocate(mGLBuffer, mSize);
- }
- else if (mUsage == GL_DYNAMIC_DRAW)
- {
- mMappedData = sDynamicVBOPool.allocate(mGLBuffer, mSize);
- }
- else
- {
- mMappedData = sDynamicCopyVBOPool.allocate(mGLBuffer, mSize);
- }
-
-
- sGLCount++;
+ mSize = size;
+ mMappedData = (U8*) ll_aligned_malloc_16(size);
+ mGLBuffer = gen_buffer();
+
+ glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+ glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage);
+ glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+ sGLCount++;
}
void LLVertexBuffer::genIndices(U32 size)
{
- mIndicesSize = vbo_block_size(size);
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+
+ mIndicesSize = size;
+ mMappedIndexData = (U8*) ll_aligned_malloc_16(size);
+
+ mGLIndices = gen_buffer();
+
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, nullptr, mUsage);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
- if (mUsage == GL_STREAM_DRAW)
- {
- mMappedIndexData = sStreamIBOPool.allocate(mGLIndices, mIndicesSize);
- }
- else
- {
- mMappedIndexData = sDynamicIBOPool.allocate(mGLIndices, mIndicesSize);
- }
-
sGLCount++;
}
void LLVertexBuffer::releaseBuffer()
{
- if (mUsage == GL_STREAM_DRAW)
- {
- sStreamVBOPool.release(mGLBuffer, mMappedData, mSize);
- }
- else
- {
- sDynamicVBOPool.release(mGLBuffer, mMappedData, mSize);
- }
-
- mGLBuffer = 0;
- mMappedData = NULL;
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ release_buffer(mGLBuffer);
+ mGLBuffer = 0;
+ ll_aligned_free_16(mMappedData);
+ mMappedData = nullptr;
+
sGLCount--;
}
void LLVertexBuffer::releaseIndices()
{
- if (mUsage == GL_STREAM_DRAW)
- {
- sStreamIBOPool.release(mGLIndices, mMappedIndexData, mIndicesSize);
- }
- else
- {
- sDynamicIBOPool.release(mGLIndices, mMappedIndexData, mIndicesSize);
- }
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ release_buffer(mGLIndices);
+ mGLIndices = 0;
+
+ ll_aligned_free_16(mMappedIndexData);
+ mMappedIndexData = nullptr;
- mGLIndices = 0;
- mMappedIndexData = NULL;
-
sGLCount--;
}
@@ -1183,21 +1158,20 @@ bool LLVertexBuffer::useVBOs() const
//----------------------------------------------------------------------------
-bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count)
+// if no gap between region and given range exists, expand region to cover given range and return true
+// otherwise return false
+bool expand_region(LLVertexBuffer::MappedRegion& region, S32 start, S32 end)
{
- S32 end = index+count;
- S32 region_end = region.mIndex+region.mCount;
- if (end < region.mIndex ||
- index > region_end)
+ if (end < region.mStart ||
+ start > region.mEnd)
{ //gap exists, do not merge
return false;
}
- S32 new_end = llmax(end, region_end);
- S32 new_index = llmin(index, region.mIndex);
- region.mIndex = new_index;
- region.mCount = new_end-new_index;
+ region.mStart = llmin(region.mStart, start);
+ region.mEnd = llmax(region.mEnd, end);
+
return true;
}
@@ -1215,34 +1189,34 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran
{
LL_ERRS() << "LLVertexBuffer::mapVertexBuffer() called on unallocated buffer." << LL_ENDL;
}
-
- if (useVBOs())
- {
- if (count == -1)
- {
- count = mNumVerts-index;
- }
- bool mapped = false;
- //see if range is already mapped
+
+ if (useVBOs())
+ {
+ if (count == -1)
+ {
+ count = mNumVerts - index;
+ }
+
+ S32 start = mOffsets[type] + sTypeSize[type] * index;
+ S32 end = start + sTypeSize[type] * count;
+
+ bool flagged = false;
+ // flag region as mapped
for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
{
MappedRegion& region = mMappedVertexRegions[i];
- if (region.mType == type)
- {
- if (expand_region(region, index, count))
- {
- mapped = true;
- break;
- }
- }
+ if (expand_region(region, start, end))
+ {
+ flagged = true;
+ break;
+ }
}
- if (!mapped)
+ if (!flagged)
{
- //not already mapped, map new region
- MappedRegion region(type, index, count);
- mMappedVertexRegions.push_back(region);
+ //didn't expand an existing region, make a new one
+ mMappedVertexRegions.push_back({ start, end });
}
if (mVertexLocked && map_range)
@@ -1299,25 +1273,26 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
count = mNumIndices-index;
}
- bool mapped = false;
- //see if range is already mapped
- for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
- {
- MappedRegion& region = mMappedIndexRegions[i];
- if (expand_region(region, index, count))
- {
- mapped = true;
- break;
- }
- }
+ S32 start = sizeof(U16) * index;
+ S32 end = start + sizeof(U16) * count;
- if (!mapped)
- {
- //not already mapped, map new region
- MappedRegion region(TYPE_INDEX, index, count);
- mMappedIndexRegions.push_back(region);
- }
-
+ bool flagged = false;
+ // flag region as mapped
+ for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+ {
+ MappedRegion& region = mMappedIndexRegions[i];
+ if (expand_region(region, start, end))
+ {
+ flagged = true;
+ break;
+ }
+ }
+
+ if (!flagged)
+ {
+ //didn't expand an existing region, make a new one
+ mMappedIndexRegions.push_back({ start, end });
+ }
if (mIndexLocked && map_range)
{
@@ -1360,6 +1335,27 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)
return mMappedIndexData + sizeof(U16)*index;
}
+static void flush_vbo(GLenum target, S32 start, S32 end, void* data)
+{
+ if (end != 0)
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
+ LL_PROFILE_ZONE_NUM(start);
+ LL_PROFILE_ZONE_NUM(end);
+ LL_PROFILE_ZONE_NUM(end-start);
+
+ constexpr S32 block_size = 65536;
+
+ for (S32 i = start; i < end; i += block_size)
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+ LL_PROFILE_GPU_ZONE("glBufferSubData");
+ S32 tend = llmin(i + block_size, end);
+ glBufferSubData(target, i, tend - i, (U8*) data + (i-start));
+ }
+ }
+}
+
void LLVertexBuffer::unmapBuffer()
{
if (!useVBOs())
@@ -1377,37 +1373,31 @@ void LLVertexBuffer::unmapBuffer()
if (!mMappedVertexRegions.empty())
{
- stop_glerror();
+ S32 start = 0;
+ S32 end = 0;
+
for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
{
const MappedRegion& region = mMappedVertexRegions[i];
- S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0;
- S32 length = sTypeSize[region.mType]*region.mCount;
- if (mSize >= length + offset)
- {
- glBufferSubData(GL_ARRAY_BUFFER, offset, length, (U8*)mMappedData + offset);
- }
- else
- {
- GLint size = 0;
- glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &size);
- LL_WARNS() << "Attempted to map regions to a buffer that is too small, "
- << "mapped size: " << mSize
- << ", gl buffer size: " << size
- << ", length: " << length
- << ", offset: " << offset
- << LL_ENDL;
- }
- stop_glerror();
+ if (region.mStart == end + 1)
+ {
+ end = region.mEnd;
+ }
+ else
+ {
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+ start = region.mStart;
+ end = region.mEnd;
+ }
}
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+
mMappedVertexRegions.clear();
}
else
{
- stop_glerror();
- glBufferSubData(GL_ARRAY_BUFFER, 0, getSize(), (U8*) mMappedData);
- stop_glerror();
+ llassert(false); // this shouldn't happen -- a buffer must always be explicitly mapped
}
mVertexLocked = false;
@@ -1421,36 +1411,31 @@ void LLVertexBuffer::unmapBuffer()
if (!mMappedIndexRegions.empty())
{
- for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
- {
- const MappedRegion& region = mMappedIndexRegions[i];
- S32 offset = region.mIndex >= 0 ? sizeof(U16)*region.mIndex : 0;
- S32 length = sizeof(U16)*region.mCount;
- if (mIndicesSize >= length + offset)
- {
- glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, offset, length, (U8*) mMappedIndexData+offset);
- }
- else
- {
- GLint size = 0;
- glGetBufferParameteriv(GL_ELEMENT_ARRAY_BUFFER, GL_BUFFER_SIZE, &size);
- LL_WARNS() << "Attempted to map regions to a buffer that is too small, "
- << "mapped size: " << mIndicesSize
- << ", gl buffer size: " << size
- << ", length: " << length
- << ", offset: " << offset
- << LL_ENDL;
- }
- stop_glerror();
- }
+ S32 start = 0;
+ S32 end = 0;
+
+ for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+ {
+ const MappedRegion& region = mMappedIndexRegions[i];
+ if (region.mStart == end + 1)
+ {
+ end = region.mEnd;
+ }
+ else
+ {
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
+ start = region.mStart;
+ end = region.mEnd;
+ }
+ }
+
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
mMappedIndexRegions.clear();
}
else
{
- stop_glerror();
- glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, getIndicesSize(), (U8*) mMappedIndexData);
- stop_glerror();
+ llassert(false); // this shouldn't happen -- a buffer must always be explicitly mapped
}
mIndexLocked = false;
@@ -1640,11 +1625,53 @@ bool LLVertexBuffer::bindGLIndicesFast()
return false;
}
-void LLVertexBuffer::flush()
+void LLVertexBuffer::flush(bool discard)
{
if (useVBOs())
{
- unmapBuffer();
+ if (discard)
+ { // discard existing VBO data if the buffer must be updated
+
+ if (!mMappedVertexRegions.empty())
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard vbo");
+ LL_PROFILE_ZONE_NUM(mSize);
+ release_buffer(mGLBuffer);
+ mGLBuffer = gen_buffer();
+ bindGLBuffer();
+ {
+ LL_PROFILE_GPU_ZONE("glBufferData");
+ glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage);
+
+ for (int i = 0; i < mSize; i += 65536)
+ {
+ LL_PROFILE_GPU_ZONE("glBufferSubData");
+ S32 end = llmin(i + 65536, mSize);
+ S32 count = end - i;
+ glBufferSubData(GL_ARRAY_BUFFER, i, count, mMappedData + i);
+ }
+ }
+ mMappedVertexRegions.clear();
+ }
+ if (!mMappedIndexRegions.empty())
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard ibo");
+ LL_PROFILE_ZONE_NUM(mIndicesSize);
+ release_buffer(mGLIndices);
+ mGLIndices = gen_buffer();
+ bindGLIndices();
+ {
+ LL_PROFILE_GPU_ZONE("glBufferData (ibo)");
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, mUsage);
+ }
+ mMappedIndexRegions.clear();
+ }
+ }
+ else
+ {
+ unmapBuffer();
+ }
+
}
}
@@ -2045,10 +2072,4 @@ void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)
}
}
-LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count)
-: mType(type), mIndex(index), mCount(count)
-{
- mEnd = mIndex+mCount;
-}
-
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index bb7460fb2a..74b951884d 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -51,66 +51,15 @@
//============================================================================
-// gl name pools for dynamic and streaming buffers
-class LLVBOPool
-{
-public:
- static U32 sBytesPooled;
- static U32 sIndexBytesPooled;
-
- LLVBOPool(U32 vboUsage, U32 vboType);
-
- const U32 mUsage;
- const U32 mType;
-
- //size MUST be a power of 2
- U8* allocate(U32& name, U32 size, bool for_seed = false);
-
- //size MUST be the size provided to allocate that returned the given name
- void release(U32 name, U8* buffer, U32 size);
-
- //batch allocate buffers to be provided to the application on demand
- void seedPool();
-
- //destroy all records in mFreeList
- void cleanup();
-
- U32 genBuffer();
- void deleteBuffer(U32 name);
-
- class Record
- {
- public:
- U32 mGLName;
- U8* mClientData;
- };
-
- typedef std::list<Record> record_list_t;
- std::vector<record_list_t> mFreeList;
- std::vector<U32> mMissCount;
- bool mMissCountDirty; // flag any changes to mFreeList or mMissCount
-
- //used to avoid calling glGenBuffers for every VBO creation
- static U32 sNamePool[1024];
- static U32 sNameIdx;
-};
-
-
-//============================================================================
// base class
class LLPrivateMemoryPool;
class LLVertexBuffer : public LLRefCount
{
public:
- class MappedRegion
+ struct MappedRegion
{
- public:
- S32 mType;
- S32 mIndex;
- S32 mCount;
- S32 mEnd;
-
- MappedRegion(S32 type, S32 index, S32 count);
+ S32 mStart;
+ S32 mEnd;
};
LLVertexBuffer(const LLVertexBuffer& rhs)
@@ -125,12 +74,6 @@ public:
return *this;
}
- static LLVBOPool sStreamVBOPool;
- static LLVBOPool sDynamicVBOPool;
- static LLVBOPool sDynamicCopyVBOPool;
- static LLVBOPool sStreamIBOPool;
- static LLVBOPool sDynamicIBOPool;
-
static std::list<U32> sAvailableVAOName;
static U32 sCurVAOName;
@@ -138,12 +81,10 @@ public:
static bool sUseVAO;
static bool sPreferStreamDraw;
- static void seedPools();
-
static U32 getVAOName();
static void releaseVAOName(U32 name);
- static void initClass(bool use_vbo, bool no_vbo_mapping);
+ static void initClass(LLWindow* window);
static void cleanupClass();
static void setupClientArrays(U32 data_mask);
static void drawArrays(U32 mode, const std::vector<LLVector3>& pos);
@@ -240,7 +181,7 @@ public:
virtual void setBuffer(U32 data_mask); // calls setupVertexBuffer() if data_mask is not 0
void setBufferFast(U32 data_mask); // calls setupVertexBufferFast(), assumes data_mask is not 0 among other assumptions
- void flush(); //flush pending data to GL memory
+ void flush(bool discard = false); //flush pending data to GL memory, if discard is true, discard previous VBO
// allocate buffer
bool allocateBuffer(S32 nverts, S32 nindices, bool create);
virtual bool resizeBuffer(S32 newnverts, S32 newnindices);
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 01fca47184..c6d2b476db 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -710,12 +710,6 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
if (!for_snapshot)
{
- if (gFrameCount > 1)
- { //for some reason, ATI 4800 series will error out if you
- //try to generate a shadow before the first frame is through
- gPipeline.generateSunShadow(*LLViewerCamera::getInstance());
- }
-
LLVertexBuffer::unbind();
LLGLState::checkStates();
@@ -936,8 +930,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
else
{
gPipeline.renderGeom(*LLViewerCamera::getInstance(), TRUE);
- }
-
+ }
gGL.setColorMask(true, true);
//store this frame's modelview matrix for use
@@ -967,6 +960,14 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
LLRenderTarget &rt = (gPipeline.sRenderDeferred ? gPipeline.mRT->deferredScreen : gPipeline.mRT->screen);
rt.flush();
+
+ if (gFrameCount > 1 && !for_snapshot)
+ { //for some reason, ATI 4800 series will error out if you
+ //try to generate a shadow before the first frame is through
+ gPipeline.generateSunShadow(*LLViewerCamera::getInstance());
+ }
+
+
if (LLPipeline::sRenderDeferred)
{
gPipeline.renderDeferredLighting();
diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp
index bc4f00bd3f..5848cbfd9d 100644
--- a/indra/newview/llviewerwindow.cpp
+++ b/indra/newview/llviewerwindow.cpp
@@ -658,12 +658,6 @@ public:
}
- addText(xpos, ypos, llformat("%d MB Index Data (%d MB Pooled, %d KIndices)", LLVertexBuffer::sAllocatedIndexBytes/(1024*1024), LLVBOPool::sIndexBytesPooled/(1024*1024), LLVertexBuffer::sIndexCount/1024));
- ypos += y_inc;
-
- addText(xpos, ypos, llformat("%d MB Vertex Data (%d MB Pooled, %d KVerts)", LLVertexBuffer::sAllocatedBytes/(1024*1024), LLVBOPool::sBytesPooled/(1024*1024), LLVertexBuffer::sVertexCount/1024));
- ypos += y_inc;
-
addText(xpos, ypos, llformat("%d Vertex Buffers", LLVertexBuffer::sGLCount));
ypos += y_inc;
@@ -1974,7 +1968,7 @@ LLViewerWindow::LLViewerWindow(const Params& p)
LL_DEBUGS("Window") << "Loading feature tables." << LL_ENDL;
// Initialize OpenGL Renderer
- LLVertexBuffer::initClass(gSavedSettings.getBOOL("RenderVBOEnable"), gSavedSettings.getBOOL("RenderVBOMappingDisable"));
+ LLVertexBuffer::initClass(mWindow);
LL_INFOS("RenderInit") << "LLVertexBuffer initialization done." << LL_ENDL ;
gGL.init(true);
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 5e585852f4..d56b31a372 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -2338,6 +2338,7 @@ static LLTrace::BlockTimerStatHandle FTM_CULL("Object Culling");
void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; //LL_RECORD_BLOCK_TIME(FTM_CULL);
+ LL_PROFILE_GPU_ZONE("updateCull"); // should always be zero GPU time, but drop a timer to flush stuff out
bool water_clip = !sRenderTransparentWater;
@@ -2649,10 +2650,6 @@ void LLPipeline::updateGL()
LLGLUpdate::sGLQ.pop_front();
}
}
-
- { //seed VBO Pools
- LLVertexBuffer::seedPools();
- }
}
void LLPipeline::clearRebuildGroups()
@@ -3229,6 +3226,7 @@ void LLPipeline::markRebuild(LLDrawable *drawablep, LLDrawable::EDrawableFlags f
void LLPipeline::stateSort(LLCamera& camera, LLCullResult &result)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
+ LL_PROFILE_GPU_ZONE("stateSort");
if (hasAnyRenderType(LLPipeline::RENDER_TYPE_AVATAR,
LLPipeline::RENDER_TYPE_CONTROL_AV,
@@ -3837,6 +3835,7 @@ void LLPipeline::postSort(LLCamera &camera)
// flush particle VB
if (LLVOPartGroup::sVB)
{
+ LL_PROFILE_GPU_ZONE("flush particle vb");
LLVOPartGroup::sVB->flush();
}
else
@@ -3860,9 +3859,12 @@ void LLPipeline::postSort(LLCamera &camera)
}*/
// pack vertex buffers for groups that chose to delay their updates
- for (LLSpatialGroup::sg_vector_t::iterator iter = mMeshDirtyGroup.begin(); iter != mMeshDirtyGroup.end(); ++iter)
{
- (*iter)->rebuildMesh();
+ LL_PROFILE_GPU_ZONE("rebuildMesh");
+ for (LLSpatialGroup::sg_vector_t::iterator iter = mMeshDirtyGroup.begin(); iter != mMeshDirtyGroup.end(); ++iter)
+ {
+ (*iter)->rebuildMesh();
+ }
}
/*if (use_transform_feedback)
@@ -7259,8 +7261,6 @@ void LLPipeline::doResetVertexBuffers(bool forced)
LLVOPartGroup::destroyGL();
gGL.resetVertexBuffer();
- SUBSYSTEM_CLEANUP(LLVertexBuffer);
-
if (LLVertexBuffer::sGLCount != 0)
{
LL_WARNS() << "VBO wipe failed -- " << LLVertexBuffer::sGLCount << " buffers remaining." << LL_ENDL;
@@ -7280,7 +7280,6 @@ void LLPipeline::doResetVertexBuffers(bool forced)
sNoAlpha = gSavedSettings.getBOOL("RenderNoAlpha");
LLPipeline::sTextureBindTest = gSavedSettings.getBOOL("RenderDebugTextureBind");
- LLVertexBuffer::initClass(LLVertexBuffer::sEnableVBOs, LLVertexBuffer::sDisableVBOMapping);
gGL.initVertexBuffer();
mDeferredVB = new LLVertexBuffer(DEFERRED_VB_MASK, 0);