summaryrefslogtreecommitdiff
path: root/indra/llrender
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2024-09-17 18:14:22 -0500
committerGitHub <noreply@github.com>2024-09-17 18:14:22 -0500
commit0a617904f98ab5960379099822e4891a08137e68 (patch)
tree6b3737e63160e739d38674192f02767ed3d69490 /indra/llrender
parentfd843d514a4e28f8e4a5d5595bba21ccad195e72 (diff)
#2590 Fix for horrible FPS on Intel Mac (#2591)
* Work around for GHA mac runners not playing nice with Tracy * Delay VBO deletion for a few frames * Enable multithreaded GL driver and multithreaded media textures on Apple silicon
Diffstat (limited to 'indra/llrender')
-rw-r--r--indra/llrender/llgl.cpp5
-rw-r--r--indra/llrender/llgl.h1
-rw-r--r--indra/llrender/llvertexbuffer.cpp374
3 files changed, 217 insertions, 163 deletions
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index c62cacdce6..c5c9d50dee 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -1170,6 +1170,11 @@ bool LLGLManager::initGL()
mGLVendorShort = "INTEL";
mIsIntel = true;
}
+ else if (mGLVendor.find("APPLE") != std::string::npos)
+ {
+ mGLVendorShort = "APPLE";
+ mIsApple = true;
+ }
else
{
mGLVendorShort = "MISC";
diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h
index 17f825bd71..f5b1e8d786 100644
--- a/indra/llrender/llgl.h
+++ b/indra/llrender/llgl.h
@@ -102,6 +102,7 @@ public:
bool mIsAMD;
bool mIsNVIDIA;
bool mIsIntel;
+ bool mIsApple = false;
// hints to the render pipe
U32 mDownScaleMethod = 0; // see settings.xml RenderDownScaleMethod
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index e9fa369b0c..0be799db9d 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -289,22 +289,58 @@ static GLuint gen_buffer()
return ret;
}
-#define ANALYZE_VBO_POOL 0
+static void delete_buffers(S32 count, GLuint* buffers)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ // wait a few frames before actually deleting the buffers to avoid
+ // synchronization issues with the GPU
+ static std::vector<GLuint> sFreeList[4];
+
+ if (gGLManager.mInited)
+ {
+ U32 idx = LLImageGL::sFrameCount % 4;
+
+ for (S32 i = 0; i < count; ++i)
+ {
+ sFreeList[idx].push_back(buffers[i]);
+ }
+
+ idx = (LLImageGL::sFrameCount + 3) % 4;
+
+ if (!sFreeList[idx].empty())
+ {
+ glDeleteBuffers((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
+ sFreeList[idx].resize(0);
+ }
+ }
+}
-#if LL_DARWIN
-// experimental -- disable VBO pooling on OS X and use glMapBuffer
+#define ANALYZE_VBO_POOL 0
+
+// VBO Pool interface
class LLVBOPool
{
+ public:
+ virtual ~LLVBOPool() = default;
+ virtual void allocate(GLenum type, U32 size, GLuint& name, U8*& data) = 0;
+ virtual void free(GLenum type, U32 size, GLuint name, U8* data) = 0;
+ virtual U64 getVramBytesUsed() = 0;
+};
+
+// VBO Pool for Apple GPUs (as in M1/M2 etc, not Intel macs)
+// Effectively disables VBO pooling
+class LLAppleVBOPool final: public LLVBOPool
+{
public:
U64 mAllocated = 0;
- U64 getVramBytesUsed()
+ U64 getVramBytesUsed() override
{
return mAllocated;
}
- void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+ void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
STOP_GLERROR;
@@ -324,7 +360,7 @@ public:
}
}
- void free(GLenum type, U32 size, GLuint name, U8* data)
+ void free(GLenum type, U32 size, GLuint name, U8* data) override
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -339,19 +375,17 @@ public:
STOP_GLERROR;
if (name)
{
- glDeleteBuffers(1, &name);
+ delete_buffers(1, &name);
}
STOP_GLERROR;
}
};
-#else
-
-class LLVBOPool
+// VBO Pool for GPUs that benefit from VBO pooling
+class LLDefaultVBOPool final : public LLVBOPool
{
public:
typedef std::chrono::steady_clock::time_point Time;
-
struct Entry
{
U8* mData;
@@ -359,7 +393,7 @@ public:
Time mAge;
};
- ~LLVBOPool()
+ ~LLDefaultVBOPool() override
{
clear();
}
@@ -377,7 +411,7 @@ public:
U32 mMisses = 0;
U32 mHits = 0;
- U64 getVramBytesUsed()
+ U64 getVramBytesUsed() override
{
return mAllocated + mReserved;
}
@@ -393,7 +427,7 @@ public:
size += block_size - (size % block_size);
}
- void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+ void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -449,7 +483,7 @@ public:
clean();
}
- void free(GLenum type, U32 size, GLuint name, U8* data)
+ void free(GLenum type, U32 size, GLuint name, U8* data) override
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -512,7 +546,7 @@ public:
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo cache timeout");
auto& entry = entries.back();
ll_aligned_free_16(entry.mData);
- glDeleteBuffers(1, &entry.mGLName);
+ delete_buffers(1, &entry.mGLName);
llassert(mReserved >= iter->first);
mReserved -= iter->first;
entries.pop_back();
@@ -548,7 +582,7 @@ public:
for (auto& entry : entries.second)
{
ll_aligned_free_16(entry.mData);
- glDeleteBuffers(1, &entry.mGLName);
+ delete_buffers(1, &entry.mGLName);
}
}
@@ -557,7 +591,7 @@ public:
for (auto& entry : entries.second)
{
ll_aligned_free_16(entry.mData);
- glDeleteBuffers(1, &entry.mGLName);
+ delete_buffers(1, &entry.mGLName);
}
}
@@ -567,7 +601,6 @@ public:
mVBOPool.clear();
}
};
-#endif
static LLVBOPool* sVBOPool = nullptr;
@@ -896,7 +929,16 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
void LLVertexBuffer::initClass(LLWindow* window)
{
llassert(sVBOPool == nullptr);
- sVBOPool = new LLVBOPool();
+ if (gGLManager.mIsApple)
+ {
+ LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL;
+ sVBOPool = new LLAppleVBOPool();
+ }
+ else
+ {
+ LL_INFOS() << "VBO Pooling Enabled" << LL_ENDL;
+ sVBOPool = new LLDefaultVBOPool();
+ }
#if ENABLE_GL_WORK_QUEUE
sQueue = new GLWorkQueue();
@@ -964,7 +1006,6 @@ void LLVertexBuffer::flushBuffers()
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
// must only be called from main thread
- llassert(LLCoros::on_main_thread_main_coro());
for (auto& buffer : sMappedBuffers)
{
buffer->_unmapBuffer();
@@ -1231,28 +1272,29 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
count = mNumVerts - index;
}
-#if !LL_DARWIN
- U32 start = mOffsets[type] + sTypeSize[type] * index;
- U32 end = start + sTypeSize[type] * count-1;
-
- bool flagged = false;
- // flag region as mapped
- for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+ if (!gGLManager.mIsApple)
{
- MappedRegion& region = mMappedVertexRegions[i];
- if (expand_region(region, start, end))
+ U32 start = mOffsets[type] + sTypeSize[type] * index;
+ U32 end = start + sTypeSize[type] * count-1;
+
+ bool flagged = false;
+ // flag region as mapped
+ for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
{
- flagged = true;
- break;
+ MappedRegion& region = mMappedVertexRegions[i];
+ if (expand_region(region, start, end))
+ {
+ flagged = true;
+ break;
+ }
}
- }
- if (!flagged)
- {
- //didn't expand an existing region, make a new one
- mMappedVertexRegions.push_back({ start, end });
+ if (!flagged)
+ {
+ //didn't expand an existing region, make a new one
+ mMappedVertexRegions.push_back({ start, end });
+ }
}
-#endif
return mMappedData+mOffsets[type]+sTypeSize[type]*index;
}
@@ -1267,28 +1309,29 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
count = mNumIndices-index;
}
-#if !LL_DARWIN
- U32 start = sizeof(U16) * index;
- U32 end = start + sizeof(U16) * count-1;
-
- bool flagged = false;
- // flag region as mapped
- for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+ if (!gGLManager.mIsApple)
{
- MappedRegion& region = mMappedIndexRegions[i];
- if (expand_region(region, start, end))
+ U32 start = sizeof(U16) * index;
+ U32 end = start + sizeof(U16) * count-1;
+
+ bool flagged = false;
+ // flag region as mapped
+ for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
{
- flagged = true;
- break;
+ MappedRegion& region = mMappedIndexRegions[i];
+ if (expand_region(region, start, end))
+ {
+ flagged = true;
+ break;
+ }
}
- }
- if (!flagged)
- {
- //didn't expand an existing region, make a new one
- mMappedIndexRegions.push_back({ start, end });
+ if (!flagged)
+ {
+ //didn't expand an existing region, make a new one
+ mMappedIndexRegions.push_back({ start, end });
+ }
}
-#endif
return mMappedIndexData + sizeof(U16)*index;
}
@@ -1301,37 +1344,40 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
// dst -- mMappedData or mMappedIndexData
void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst)
{
-#if LL_DARWIN
- // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call
- // _mapBuffer to tag the buffer for flushing to GL
- _mapBuffer();
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy");
- STOP_GLERROR;
- // copy into mapped buffer
- memcpy(dst+start, data, end-start+1);
-#else
- llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices);
-
- // skip mapped data and stream to GPU via glBufferSubData
- if (end != 0)
+ if (gGLManager.mIsApple)
{
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
- LL_PROFILE_ZONE_NUM(start);
- LL_PROFILE_ZONE_NUM(end);
- LL_PROFILE_ZONE_NUM(end-start);
-
- constexpr U32 block_size = 65536;
+ // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call
+ // _mapBuffer to tag the buffer for flushing to GL
+ _mapBuffer();
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy");
+ STOP_GLERROR;
+ // copy into mapped buffer
+ memcpy(dst+start, data, end-start+1);
+ }
+ else
+ {
+ llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices);
- for (U32 i = start; i <= end; i += block_size)
+ // skip mapped data and stream to GPU via glBufferSubData
+ if (end != 0)
{
- //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
- //LL_PROFILE_GPU_ZONE("glBufferSubData");
- U32 tend = llmin(i + block_size, end);
- U32 size = tend - i + 1;
- glBufferSubData(target, i, size, (U8*) data + (i-start));
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
+ LL_PROFILE_ZONE_NUM(start);
+ LL_PROFILE_ZONE_NUM(end);
+ LL_PROFILE_ZONE_NUM(end-start);
+
+ constexpr U32 block_size = 65536;
+
+ for (U32 i = start; i <= end; i += block_size)
+ {
+ //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+ //LL_PROFILE_GPU_ZONE("glBufferSubData");
+ U32 tend = llmin(i + block_size, end);
+ U32 size = tend - i + 1;
+ glBufferSubData(target, i, size, (U8*) data + (i-start));
+ }
}
}
-#endif
}
void LLVertexBuffer::unmapBuffer()
@@ -1364,114 +1410,116 @@ void LLVertexBuffer::_unmapBuffer()
}
};
-#if LL_DARWIN
- STOP_GLERROR;
- if (mMappedData)
- {
- if (mGLBuffer)
- {
- glDeleteBuffers(1, &mGLBuffer);
- }
- mGLBuffer = gen_buffer();
- glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
- sGLRenderBuffer = mGLBuffer;
- glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);
- }
- else if (mGLBuffer != sGLRenderBuffer)
- {
- glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
- sGLRenderBuffer = mGLBuffer;
- }
- STOP_GLERROR;
-
- if (mMappedIndexData)
+ if (gGLManager.mIsApple)
{
- if (mGLIndices)
+ STOP_GLERROR;
+ if (mMappedData)
{
- glDeleteBuffers(1, &mGLIndices);
+ if (mGLBuffer)
+ {
+ delete_buffers(1, &mGLBuffer);
+ }
+ mGLBuffer = gen_buffer();
+ glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+ sGLRenderBuffer = mGLBuffer;
+ glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);
}
-
- mGLIndices = gen_buffer();
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
- sGLRenderIndices = mGLIndices;
-
- glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW);
- }
- else if (mGLIndices != sGLRenderIndices)
- {
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
- sGLRenderIndices = mGLIndices;
- }
- STOP_GLERROR;
-#else
-
- if (!mMappedVertexRegions.empty())
- {
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
-
- if (sGLRenderBuffer != mGLBuffer)
+ else if (mGLBuffer != sGLRenderBuffer)
{
glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
sGLRenderBuffer = mGLBuffer;
}
+ STOP_GLERROR;
- U32 start = 0;
- U32 end = 0;
-
- std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion());
-
- for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+ if (mMappedIndexData)
{
- const MappedRegion& region = mMappedVertexRegions[i];
- if (region.mStart == end + 1)
- {
- end = region.mEnd;
- }
- else
+ if (mGLIndices)
{
- flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
- start = region.mStart;
- end = region.mEnd;
+ delete_buffers(1, &mGLIndices);
}
- }
-
- flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
- mMappedVertexRegions.clear();
- }
- if (!mMappedIndexRegions.empty())
- {
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index");
+ mGLIndices = gen_buffer();
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+ sGLRenderIndices = mGLIndices;
- if (mGLIndices != sGLRenderIndices)
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW);
+ }
+ else if (mGLIndices != sGLRenderIndices)
{
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
sGLRenderIndices = mGLIndices;
}
- U32 start = 0;
- U32 end = 0;
+ STOP_GLERROR;
+ }
+ else
+ {
+ if (!mMappedVertexRegions.empty())
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
+
+ if (sGLRenderBuffer != mGLBuffer)
+ {
+ glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+ sGLRenderBuffer = mGLBuffer;
+ }
- std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion());
+ U32 start = 0;
+ U32 end = 0;
- for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+ std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion());
+
+ for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+ {
+ const MappedRegion& region = mMappedVertexRegions[i];
+ if (region.mStart == end + 1)
+ {
+ end = region.mEnd;
+ }
+ else
+ {
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
+ start = region.mStart;
+ end = region.mEnd;
+ }
+ }
+
+ flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
+ mMappedVertexRegions.clear();
+ }
+
+ if (!mMappedIndexRegions.empty())
{
- const MappedRegion& region = mMappedIndexRegions[i];
- if (region.mStart == end + 1)
+ LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index");
+
+ if (mGLIndices != sGLRenderIndices)
{
- end = region.mEnd;
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+ sGLRenderIndices = mGLIndices;
}
- else
+ U32 start = 0;
+ U32 end = 0;
+
+ std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion());
+
+ for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
{
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
- start = region.mStart;
- end = region.mEnd;
+ const MappedRegion& region = mMappedIndexRegions[i];
+ if (region.mStart == end + 1)
+ {
+ end = region.mEnd;
+ }
+ else
+ {
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
+ start = region.mStart;
+ end = region.mEnd;
+ }
}
- }
- flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
- mMappedIndexRegions.clear();
+ flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
+ mMappedIndexRegions.clear();
+ }
}
-#endif
}
//----------------------------------------------------------------------------