summaryrefslogtreecommitdiff
path: root/indra/llrender/llvertexbuffer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llrender/llvertexbuffer.cpp')
-rw-r--r--indra/llrender/llvertexbuffer.cpp132
1 files changed, 52 insertions, 80 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 52fb58187a..30a7ed796a 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -37,6 +37,10 @@
#include "llglslshader.h"
#include "llmemory.h"
+#include "llcontrol.h"
+
+extern LLControlGroup gSavedSettings;
+
//Next Highest Power Of Two
//helper function, returns first number > v that is a power of 2, or v if v is already a power of 2
U32 nhpo2(U32 v)
@@ -530,6 +534,7 @@ U32 LLVertexBuffer::sGLRenderIndices = 0;
U32 LLVertexBuffer::sLastMask = 0;
U32 LLVertexBuffer::sVertexCount = 0;
+U32 LLVertexBuffer::sMappingMode = gSavedSettings.getU32("MPVBufferOptiMode");
//NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware
const U32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] =
@@ -1144,93 +1149,58 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
// start -- first byte to copy
// end -- last byte to copy (NOT last byte + 1)
// data -- mMappedData or mMappedIndexData
-static void flush_vbo(GLenum target, U32 start, U32 end, void* data)
+static void flush_vbo(GLenum target, U32 start, U32 end, void* data, S16 mode)
{
- if (end != 0)
+ if (end == 0) return;
+
+ if (mode == 0)
+ {
+ if(gGLManager.mIsApple) mode = 2;
+ else mode = 1;
+ }
+
+ if (mode == 1)
{
- //Note (observeur): I maintained the profile "glBufferSubData" names because i'm not sure if it would impact any statistics part somewhere in the code.
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
LL_PROFILE_ZONE_NUM(start);
LL_PROFILE_ZONE_NUM(end);
LL_PROFILE_ZONE_NUM(end-start);
- U32 size = end-start+1;
- U32 block_size = 65536;
+ const U32 block_size = 65536;
- //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus).
- if(!gGLManager.mIsApple)
- {
- for (U32 i = start; i <= end; i += block_size)
- {
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
- LL_PROFILE_GPU_ZONE("glBufferSubData");
- U32 tend = llmin(i + block_size, end);
- U32 size = tend - i + 1;
- glBufferSubData(target, i, size, (U8*) data + (i-start));
- }
-
- return;
- }
-
- //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously.
-
- //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way.
- U32 MapBits = GL_MAP_WRITE_BIT;
-
- //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes.
- block_size = 524288;
-
- //Note (observeur): This is called in case block_size is set to 0 (All in one mapping).
- if(block_size == 0)
+ for (U32 i = start; i <= end; i += block_size)
{
- U8 * mptr = NULL;
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
LL_PROFILE_GPU_ZONE("glBufferSubData");
-
- mptr = (U8*) glMapBufferRange( target, start, size, MapBits);
-
- if(mptr)
- {
- std::memcpy(mptr, (U8*) data, size);
- glUnmapBuffer(target);
- }
- else
- {
- LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL;
- }
- return;
+ U32 tend = llmin(i + block_size, end);
+ //U32 size = tend - i + 1;
+ glBufferSubData(target, i, tend - i +1, (U8*) data + (i-start));
}
- //Note (observeur): The following code is executed in case of block_size is superior to 0
-
- //Note (observeur): This is for analysis purpose only
- //if(size > block_size)
- //{
- // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL;
- //}
+ return;
+ }
- U8 * mptr = NULL;
+ U32 MapBits = GL_MAP_WRITE_BIT;
+ if (mode>2) MapBits = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
- for (U32 i = start; i <= end; i += block_size)
- {
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
- LL_PROFILE_GPU_ZONE("glBufferSubData");
- U32 tend = llmin(i + block_size, end);
- size = tend - i + 1;
+ U32 buffer_size = end-start+1;
- mptr = (U8*) glMapBufferRange( target, i, size, MapBits );
+ U8 * mptr = NULL;
+ mptr = (U8*) glMapBufferRange( target, start, end-start+1, MapBits);
- if(mptr)
- {
- std::memcpy(mptr, (U8*) data + (i-start), size);
- glUnmapBuffer(target);
- }
- else
- {
- LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL;
- }
+ if (mptr)
+ {
+ std::memcpy(mptr, (U8*) data, buffer_size);
+ if(!glUnmapBuffer(target))
+ {
+ LL_WARNS() << "glUnmapBuffer() failed" << LL_ENDL;
}
}
+ else
+ {
+ LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL;
+ }
+
}
void LLVertexBuffer::unmapBuffer()
@@ -1266,13 +1236,13 @@ void LLVertexBuffer::unmapBuffer()
}
else
{
- flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, sMappingMode);
start = region.mStart;
end = region.mEnd;
}
}
- flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, sMappingMode);
mMappedVertexRegions.clear();
}
@@ -1300,13 +1270,14 @@ void LLVertexBuffer::unmapBuffer()
}
else
{
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, sMappingMode);
+
start = region.mStart;
end = region.mEnd;
}
}
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, sMappingMode);
mMappedIndexRegions.clear();
}
@@ -1562,43 +1533,43 @@ void LLVertexBuffer::setupVertexBuffer()
void LLVertexBuffer::setPositionData(const LLVector4a* data)
{
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data);
+ flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setTexCoordData(const LLVector2* data)
{
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data);
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setColorData(const LLColor4U* data)
{
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data);
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setNormalData(const LLVector4a* data)
{
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data);
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setTangentData(const LLVector4a* data)
{
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data);
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setWeight4Data(const LLVector4a* data)
{
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data);
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setIndexData(const U16* data)
{
llassert(sGLRenderIndices == mGLIndices);
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data);
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data, sMappingMode);
}
void LLVertexBuffer::setIndexData(const U32* data)
@@ -1610,6 +1581,7 @@ void LLVertexBuffer::setIndexData(const U32* data)
mIndicesStride = 4;
mNumIndices /= 2;
}
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data);
+
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*) data, sMappingMode);
}