diff options
author | Erik Kundiman <erik@megapahit.org> | 2024-07-01 13:31:14 +0800 |
---|---|---|
committer | Erik Kundiman <erik@megapahit.org> | 2024-07-01 13:31:14 +0800 |
commit | 29d68ecb229ecb19e6d0da4cf5fbd8e3bbdde2cf (patch) | |
tree | e60f85379679aa2b07c8a17a1fcd4954572149c2 /indra/llrender/llvertexbuffer.cpp | |
parent | a38b53cc5f456593e8ca21c5d5e20542c1723525 (diff) | |
parent | 1478ec8e9c50eba8b3a64fb9c2fc54a3a328510b (diff) |
Merge branch 'main' into maint-b
Diffstat (limited to 'indra/llrender/llvertexbuffer.cpp')
-rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 78 |
1 files changed, 73 insertions, 5 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 7caf20f40b..52fb58187a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -359,7 +359,8 @@ public: mMisses++; name = gen_buffer(); glBindBuffer(type, name); - glBufferData(type, size, nullptr, GL_DYNAMIC_DRAW); + //glBufferData(type, size, nullptr, GL_DYNAMIC_DRAW); + glBufferData(type, size, nullptr, GL_STREAM_DRAW); if (type == GL_ELEMENT_ARRAY_BUFFER) { LLVertexBuffer::sGLRenderIndices = name; @@ -1147,20 +1148,87 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) { if (end != 0) { + //Note (observeur): I maintained the profile "glBufferSubData" names because i'm not sure if it would impact any statistics part somewhere in the code. LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); LL_PROFILE_ZONE_NUM(start); LL_PROFILE_ZONE_NUM(end); LL_PROFILE_ZONE_NUM(end-start); - constexpr U32 block_size = 8192; + U32 size = end-start+1; + U32 block_size = 65536; + + //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). + if(!gGLManager.mIsApple) + { + for (U32 i = start; i <= end; i += block_size) + { + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); + U32 tend = llmin(i + block_size, end); + U32 size = tend - i + 1; + glBufferSubData(target, i, size, (U8*) data + (i-start)); + } + + return; + } + + //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. + + //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. + U32 MapBits = GL_MAP_WRITE_BIT; + + //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. + block_size = 524288; + + //Note (observeur): This is called in case block_size is set to 0 (All in one mapping). + if(block_size == 0) + { + U8 * mptr = NULL; + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); + + mptr = (U8*) glMapBufferRange( target, start, size, MapBits); + + if(mptr) + { + std::memcpy(mptr, (U8*) data, size); + glUnmapBuffer(target); + } + else + { + LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; + } + return; + } + + //Note (observeur): The following code is executed in case of block_size is superior to 0 + + //Note (observeur): This is for analysis purpose only + //if(size > block_size) + //{ + // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; + //} + + U8 * mptr = NULL; for (U32 i = start; i <= end; i += block_size) { LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - //LL_PROFILE_GPU_ZONE("glBufferSubData"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); U32 tend = llmin(i + block_size, end); - U32 size = tend - i + 1; - glBufferSubData(target, i, size, (U8*) data + (i-start)); + size = tend - i + 1; + + mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); + + if(mptr) + { + std::memcpy(mptr, (U8*) data + (i-start), size); + glUnmapBuffer(target); + } + else + { + LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; + } } } } |