summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2024-06-05 15:14:13 -0500
committerGitHub <noreply@github.com>2024-06-05 15:14:13 -0500
commit24586f810eb7ef8048a55687333d51c53aa2bed8 (patch)
treec1213aae9efb9a5b6afd89ab6ce6ffe3dd02c277
parentf568e6036bfa133ee8496a751f3269ec772fe5d3 (diff)
#1527 Improve performance on Apple silicon (#1632)
-rw-r--r--indra/llcommon/llprofiler.h2
-rw-r--r--indra/llrender/llgl.h15
-rw-r--r--indra/llrender/llrender.cpp8
-rw-r--r--indra/llrender/llvertexbuffer.cpp187
-rw-r--r--indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl2
-rw-r--r--indra/newview/llface.cpp7
6 files changed, 183 insertions, 38 deletions
diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h
index af5e5777bf..722d9afca2 100644
--- a/indra/llcommon/llprofiler.h
+++ b/indra/llcommon/llprofiler.h
@@ -162,7 +162,7 @@ extern thread_local bool gProfilerEnabled;
#define LL_LABEL_OBJECT_GL(type, name, length, label)
-#if LL_PROFILER_CONFIGURATION > 1
+#if !LL_DARWIN && LL_PROFILER_CONFIGURATION > 1
#define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size)
#define LL_PROFILE_FREE(ptr) TracyFree(ptr)
#else
diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h
index 2f538d0844..254c983110 100644
--- a/indra/llrender/llgl.h
+++ b/indra/llrender/llgl.h
@@ -156,13 +156,18 @@ void assert_glerror();
void clear_glerror();
-//#if LL_DEBUG
+
# define stop_glerror() assert_glerror()
# define llglassertok() assert_glerror()
-//#else
-//# define stop_glerror()
-//# define llglassertok()
-//#endif
+
+// stop_glerror is still needed on OS X but has performance implications
+// use macro below to conditionally add stop_glerror to non-release builds
+// on OS X
+#if LL_DARWIN && !LL_RELEASE_FOR_DOWNLOAD
+#define STOP_GLERROR stop_glerror()
+#else
+#define STOP_GLERROR
+#endif
#define llglassertok_always() assert_glerror()
diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp
index 399281be84..51028e5667 100644
--- a/indra/llrender/llrender.cpp
+++ b/indra/llrender/llrender.cpp
@@ -990,6 +990,7 @@ void LLRender::syncLightState()
void LLRender::syncMatrices()
{
+ STOP_GLERROR;
static const U32 name[] =
{
LLShaderMgr::MODELVIEW_MATRIX,
@@ -1012,8 +1013,6 @@ void LLRender::syncMatrices()
if (shader)
{
- //llassert(shader);
-
bool mvp_done = false;
U32 i = MM_MODELVIEW;
@@ -1134,6 +1133,7 @@ void LLRender::syncMatrices()
syncLightState();
}
}
+ STOP_GLERROR;
}
void LLRender::translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z)
@@ -1585,6 +1585,7 @@ void LLRender::end()
}
void LLRender::flush()
{
+ STOP_GLERROR;
if (mCount > 0)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
@@ -1693,6 +1694,9 @@ void LLRender::flush()
vb->setColorData(mColorsp.get());
}
+#if LL_DARWIN
+ vb->unmapBuffer();
+#endif
vb->unbind();
sVBCache[vhash] = { vb , std::chrono::steady_clock::now() };
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 8cb124d406..fa3b2df6e0 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -290,6 +290,62 @@ static GLuint gen_buffer()
#define ANALYZE_VBO_POOL 0
+#if LL_DARWIN
+
+// experimental -- disable VBO pooling on OS X and use glMapBuffer
+class LLVBOPool
+{
+public:
+ U64 mAllocated = 0;
+
+ U64 getVramBytesUsed()
+ {
+ return mAllocated;
+ }
+
+ void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+ {
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ STOP_GLERROR;
+ llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
+ llassert(name == 0); // non zero name indicates a gl name that wasn't freed
+ llassert(data == nullptr); // non null data indicates a buffer that wasn't freed
+ llassert(size >= 2); // any buffer size smaller than a single index is nonsensical
+
+ mAllocated += size;
+
+ { //allocate a new buffer
+ LL_PROFILE_GPU_ZONE("vbo alloc");
+ // ON OS X, we don't allocate a VBO until the last possible moment
+ // in unmapBuffer
+ data = (U8*) ll_aligned_malloc_16(size);
+ STOP_GLERROR;
+ }
+ }
+
+ void free(GLenum type, U32 size, GLuint name, U8* data)
+ {
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
+ llassert(size >= 2);
+
+ if (data)
+ {
+ ll_aligned_free_16(data);
+ }
+
+ mAllocated -= size;
+ STOP_GLERROR;
+ if (name)
+ {
+ glDeleteBuffers(1, &name);
+ }
+ STOP_GLERROR;
+ }
+};
+
+#else
+
class LLVBOPool
{
public:
@@ -509,9 +565,8 @@ public:
mIBOPool.clear();
mVBOPool.clear();
}
-
-
};
+#endif
static LLVBOPool* sVBOPool = nullptr;
@@ -629,6 +684,8 @@ void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVecto
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
llassert(LLGLSLShader::sCurBoundShaderPtr != NULL);
+ STOP_GLERROR;
+
gGL.syncMatrices();
U32 mask = LLVertexBuffer::MAP_VERTEX;
@@ -743,8 +800,10 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
llassert(mGLBuffer == sGLRenderBuffer);
llassert(mGLIndices == sGLRenderIndices);
gGL.syncMatrices();
+ STOP_GLERROR;
glDrawRangeElements(sGLMode[mode], start, end, count, mIndicesType,
(GLvoid*) (indices_offset * (size_t) mIndicesStride));
+ STOP_GLERROR;
}
void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const
@@ -760,7 +819,9 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
llassert(mGLIndices == sGLRenderIndices);
gGL.syncMatrices();
+ STOP_GLERROR;
glDrawArrays(sGLMode[mode], first, count);
+ STOP_GLERROR;
}
//static
@@ -783,9 +844,10 @@ void LLVertexBuffer::initClass(LLWindow* window)
//static
void LLVertexBuffer::unbind()
{
+ STOP_GLERROR;
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
-
+ STOP_GLERROR;
sGLRenderBuffer = 0;
sGLRenderIndices = 0;
}
@@ -1081,6 +1143,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
count = mNumVerts - index;
}
+#if !LL_DARWIN
U32 start = mOffsets[type] + sTypeSize[type] * index;
U32 end = start + sTypeSize[type] * count-1;
@@ -1101,7 +1164,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
//didn't expand an existing region, make a new one
mMappedVertexRegions.push_back({ start, end });
}
-
+#endif
return mMappedData+mOffsets[type]+sTypeSize[type]*index;
}
@@ -1115,6 +1178,7 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
count = mNumIndices-index;
}
+#if !LL_DARWIN
U32 start = sizeof(U16) * index;
U32 end = start + sizeof(U16) * count-1;
@@ -1135,6 +1199,7 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
//didn't expand an existing region, make a new one
mMappedIndexRegions.push_back({ start, end });
}
+#endif
return mMappedIndexData + sizeof(U16)*index;
}
@@ -1143,9 +1208,17 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
// target -- "target" parameter for glBufferSubData
// start -- first byte to copy
// end -- last byte to copy (NOT last byte + 1)
-// data -- mMappedData or mMappedIndexData
-static void flush_vbo(GLenum target, U32 start, U32 end, void* data)
-{
+// data -- data to be flushed
+// dst -- mMappedData or mMappedIndexData
+static void flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst)
+{
+#if LL_DARWIN
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy");
+ STOP_GLERROR;
+ // copy into mapped buffer
+ memcpy(dst+start, data, end-start+1);
+#else
+ // skip mapped data and stream to GPU via glBufferSubData
if (end != 0)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
@@ -1164,10 +1237,12 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)
glBufferSubData(target, i, size, (U8*) data + (i-start));
}
}
+#endif
}
void LLVertexBuffer::unmapBuffer()
{
+ STOP_GLERROR;
struct SortMappedRegion
{
bool operator()(const MappedRegion& lhs, const MappedRegion& rhs)
@@ -1176,9 +1251,51 @@ void LLVertexBuffer::unmapBuffer()
}
};
+#if LL_DARWIN
+ STOP_GLERROR;
+ if (mMappedData)
+ {
+ if (mGLBuffer)
+ {
+ glDeleteBuffers(1, &mGLBuffer);
+ }
+ mGLBuffer = gen_buffer();
+ glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+ sGLRenderBuffer = mGLBuffer;
+ glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);
+ }
+ else if (mGLBuffer != sGLRenderBuffer)
+ {
+ glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+ sGLRenderBuffer = mGLBuffer;
+ }
+ STOP_GLERROR;
+
+ if (mMappedIndexData)
+ {
+ if (mGLIndices)
+ {
+ glDeleteBuffers(1, &mGLIndices);
+ }
+
+ mGLIndices = gen_buffer();
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+ sGLRenderIndices = mGLIndices;
+
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW);
+ }
+ else if (mGLIndices != sGLRenderIndices)
+ {
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+ sGLRenderIndices = mGLIndices;
+ }
+ STOP_GLERROR;
+#else
+
if (!mMappedVertexRegions.empty())
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
+
if (sGLRenderBuffer != mGLBuffer)
{
glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
@@ -1199,14 +1316,13 @@ void LLVertexBuffer::unmapBuffer()
}
else
{
- flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
start = region.mStart;
end = region.mEnd;
}
}
- flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start);
-
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
mMappedVertexRegions.clear();
}
@@ -1233,16 +1349,16 @@ void LLVertexBuffer::unmapBuffer()
}
else
{
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
start = region.mStart;
end = region.mEnd;
}
}
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);
-
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
mMappedIndexRegions.clear();
}
+#endif
}
//----------------------------------------------------------------------------
@@ -1363,10 +1479,17 @@ bool LLVertexBuffer::getClothWeightStrider(LLStrider<LLVector4>& strider, U32 in
// Set for rendering
void LLVertexBuffer::setBuffer()
{
+ STOP_GLERROR;
+#if LL_DARWIN
+ if (!mGLBuffer)
+ { // OS X doesn't allocate a buffer until we call unmapBuffer
+ return;
+ }
+#endif
// no data may be pending
llassert(mMappedVertexRegions.empty());
llassert(mMappedIndexRegions.empty());
-
+
// a shader must be bound
llassert(LLGLSLShader::sCurBoundShaderPtr);
@@ -1395,12 +1518,15 @@ void LLVertexBuffer::setBuffer()
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
sGLRenderIndices = mGLIndices;
}
+
+ STOP_GLERROR;
}
// virtual (default)
void LLVertexBuffer::setupVertexBuffer()
{
+ STOP_GLERROR;
U8* base = nullptr;
U32 data_mask = LLGLSLShader::sCurBoundShaderPtr->mAttributeMask;
@@ -1490,59 +1616,76 @@ void LLVertexBuffer::setupVertexBuffer()
void* ptr = (void*)(base + mOffsets[TYPE_VERTEX]);
glVertexAttribPointer(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
}
+ STOP_GLERROR;
}
void LLVertexBuffer::setPositionData(const LLVector4a* data)
{
+#if !LL_DARWIN
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data);
+#endif
+ flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data, mMappedData);
}
void LLVertexBuffer::setTexCoordData(const LLVector2* data)
{
+#if !LL_DARWIN
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data);
+#endif
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data, mMappedData);
}
void LLVertexBuffer::setColorData(const LLColor4U* data)
{
+#if !LL_DARWIN
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data);
+#endif
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data, mMappedData);
}
void LLVertexBuffer::setNormalData(const LLVector4a* data)
{
+#if !LL_DARWIN
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data);
+#endif
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data, mMappedData);
}
void LLVertexBuffer::setTangentData(const LLVector4a* data)
{
+#if !LL_DARWIN
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data);
+#endif
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data, mMappedData);
}
void LLVertexBuffer::setWeight4Data(const LLVector4a* data)
{
+#if !LL_DARWIN
llassert(sGLRenderBuffer == mGLBuffer);
- flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data);
+#endif
+ flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data, mMappedData);
}
void LLVertexBuffer::setIndexData(const U16* data)
{
+#if !LL_DARWIN
llassert(sGLRenderIndices == mGLIndices);
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data);
+#endif
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data, mMappedIndexData);
}
void LLVertexBuffer::setIndexData(const U32* data)
{
+#if !LL_DARWIN
llassert(sGLRenderIndices == mGLIndices);
+#endif
if (mIndicesType != GL_UNSIGNED_INT)
{ // HACK -- vertex buffers are initialized as 16-bit indices, but can be switched to 32-bit indices
mIndicesType = GL_UNSIGNED_INT;
mIndicesStride = 4;
mNumIndices /= 2;
}
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data);
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data, mMappedIndexData);
}
diff --git a/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl b/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl
index 5f598f84a7..5c79fd7315 100644
--- a/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl
@@ -57,7 +57,7 @@ void main()
outColor.a = 0.0; // yes, downstream atmospherics
- frag_data[0] = outColor;
+ frag_data[0] = max(outColor, vec4(0));
frag_data[1] = vec4(0.0,0.0,0.0,-1.0);
vec3 nvn = normalize(vary_normal);
frag_data[2] = vec4(nvn.xyz, GBUFFER_FLAG_HAS_ATMOS);
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 0e2f19b3d9..cda73f59ed 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -628,13 +628,6 @@ void LLFace::renderOneWireframe(const LLColor4 &color, F32 fogCfx, bool wirefram
{
LLGLDisable depth(wireframe_selection ? 0 : GL_BLEND);
- //LLGLEnable stencil(wireframe_selection ? 0 : GL_STENCIL_TEST);
-
- if (!wireframe_selection)
- { //modify wireframe into outline selection mode
- glStencilFunc(GL_NOTEQUAL, 2, 0xffff);
- glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
- }
LLGLEnable offset(GL_POLYGON_OFFSET_LINE);
glPolygonOffset(3.f, 3.f);