From 0a617904f98ab5960379099822e4891a08137e68 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Tue, 17 Sep 2024 18:14:22 -0500
Subject: #2590 Fix for horrible FPS on Intel Mac (#2591)

* Work around for GHA mac runners not playing nice with Tracy
* Delay VBO deletion for a few frames
* Enable multithreaded GL driver and multithreaded media textures on Apple silicon
---
 indra/cmake/Tracy.cmake            |   5 +
 indra/llrender/llgl.cpp            |   5 +
 indra/llrender/llgl.h              |   1 +
 indra/llrender/llvertexbuffer.cpp  | 374 +++++++++++++++++++++----------------
 indra/newview/featuretable_mac.txt |  14 +-
 indra/newview/llfeaturemanager.cpp |   8 +
 6 files changed, 242 insertions(+), 165 deletions(-)

diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake
index ec7178c5a0..a7eac2711f 100644
--- a/indra/cmake/Tracy.cmake
+++ b/indra/cmake/Tracy.cmake
@@ -31,6 +31,11 @@ if (USE_TRACY)
     target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1)
   endif ()
 
+  # GHA runners don't always provide invariant TSC support, but always build with LL_TESTS enabled
+  if (DARWIN AND LL_TESTS)
+    target_compile_definitions(ll::tracy INTERFACE -DTRACY_TIMER_FALLBACK=1)
+  endif ()
+
   # See: indra/llcommon/llprofiler.h
   add_compile_definitions(LL_PROFILER_CONFIGURATION=3)
 endif (USE_TRACY)
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index c62cacdce6..c5c9d50dee 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -1170,6 +1170,11 @@ bool LLGLManager::initGL()
         mGLVendorShort = "INTEL";
         mIsIntel = true;
     }
+    else if (mGLVendor.find("APPLE") != std::string::npos)
+    {
+        mGLVendorShort = "APPLE";
+        mIsApple = true;
+    }
     else
     {
         mGLVendorShort = "MISC";
diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h
index 17f825bd71..f5b1e8d786 100644
--- a/indra/llrender/llgl.h
+++ b/indra/llrender/llgl.h
@@ -102,6 +102,7 @@ public:
     bool mIsAMD;
     bool mIsNVIDIA;
     bool mIsIntel;
+    bool mIsApple = false;
 
     // hints to the render pipe
     U32 mDownScaleMethod = 0; // see settings.xml RenderDownScaleMethod
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index e9fa369b0c..0be799db9d 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -289,22 +289,58 @@ static GLuint gen_buffer()
     return ret;
 }
 
-#define ANALYZE_VBO_POOL 0
+static void delete_buffers(S32 count, GLuint* buffers)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    // wait a few frames before actually deleting the buffers to avoid
+    // synchronization issues with the GPU
+    static std::vector<GLuint> sFreeList[4];
+
+    if (gGLManager.mInited)
+    {
+        U32 idx = LLImageGL::sFrameCount % 4;
+
+        for (S32 i = 0; i < count; ++i)
+        {
+            sFreeList[idx].push_back(buffers[i]);
+        }
+
+        idx = (LLImageGL::sFrameCount + 3) % 4;
+
+        if (!sFreeList[idx].empty())
+        {
+            glDeleteBuffers((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
+            sFreeList[idx].resize(0);
+        }
+    }
+}
 
-#if LL_DARWIN
 
-// experimental -- disable VBO pooling on OS X and use glMapBuffer
+#define ANALYZE_VBO_POOL 0
+
+// VBO Pool interface
 class LLVBOPool
+{
+    public:
+    virtual ~LLVBOPool() = default;
+    virtual void allocate(GLenum type, U32 size, GLuint& name, U8*& data) = 0;
+    virtual void free(GLenum type, U32 size, GLuint name, U8* data) = 0;
+    virtual U64 getVramBytesUsed() = 0;
+};
+
+// VBO Pool for Apple GPUs (as in M1/M2 etc, not Intel macs)
+// Effectively disables VBO pooling
+class LLAppleVBOPool final: public LLVBOPool
 {
 public:
     U64 mAllocated = 0;
 
-    U64 getVramBytesUsed()
+    U64 getVramBytesUsed() override
     {
         return mAllocated;
     }
 
-    void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         STOP_GLERROR;
@@ -324,7 +360,7 @@ public:
         }
     }
 
-    void free(GLenum type, U32 size, GLuint name, U8* data)
+    void free(GLenum type, U32 size, GLuint name, U8* data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -339,19 +375,17 @@ public:
         STOP_GLERROR;
         if (name)
         {
-            glDeleteBuffers(1, &name);
+            delete_buffers(1, &name);
         }
         STOP_GLERROR;
     }
 };
 
-#else
-
-class LLVBOPool
+// VBO Pool for GPUs that benefit from VBO pooling
+class LLDefaultVBOPool final : public LLVBOPool
 {
 public:
     typedef std::chrono::steady_clock::time_point Time;
-
     struct Entry
     {
         U8* mData;
@@ -359,7 +393,7 @@ public:
         Time mAge;
     };
 
-    ~LLVBOPool()
+    ~LLDefaultVBOPool() override
     {
         clear();
     }
@@ -377,7 +411,7 @@ public:
     U32 mMisses = 0;
     U32 mHits = 0;
 
-    U64 getVramBytesUsed()
+    U64 getVramBytesUsed() override
     {
         return mAllocated + mReserved;
     }
@@ -393,7 +427,7 @@ public:
         size += block_size - (size % block_size);
     }
 
-    void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -449,7 +483,7 @@ public:
         clean();
     }
 
-    void free(GLenum type, U32 size, GLuint name, U8* data)
+    void free(GLenum type, U32 size, GLuint name, U8* data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -512,7 +546,7 @@ public:
                     LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo cache timeout");
                     auto& entry = entries.back();
                     ll_aligned_free_16(entry.mData);
-                    glDeleteBuffers(1, &entry.mGLName);
+                    delete_buffers(1, &entry.mGLName);
                     llassert(mReserved >= iter->first);
                     mReserved -= iter->first;
                     entries.pop_back();
@@ -548,7 +582,7 @@ public:
             for (auto& entry : entries.second)
             {
                 ll_aligned_free_16(entry.mData);
-                glDeleteBuffers(1, &entry.mGLName);
+                delete_buffers(1, &entry.mGLName);
             }
         }
 
@@ -557,7 +591,7 @@ public:
             for (auto& entry : entries.second)
             {
                 ll_aligned_free_16(entry.mData);
-                glDeleteBuffers(1, &entry.mGLName);
+                delete_buffers(1, &entry.mGLName);
             }
         }
 
@@ -567,7 +601,6 @@ public:
         mVBOPool.clear();
     }
 };
-#endif
 
 static LLVBOPool* sVBOPool = nullptr;
 
@@ -896,7 +929,16 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
 void LLVertexBuffer::initClass(LLWindow* window)
 {
     llassert(sVBOPool == nullptr);
-    sVBOPool = new LLVBOPool();
+    if (gGLManager.mIsApple)
+    {
+        LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL;
+        sVBOPool = new LLAppleVBOPool();
+    }
+    else
+    {
+        LL_INFOS() << "VBO Pooling Enabled" << LL_ENDL;
+        sVBOPool = new LLDefaultVBOPool();
+    }
 
 #if ENABLE_GL_WORK_QUEUE
     sQueue = new GLWorkQueue();
@@ -964,7 +1006,6 @@ void LLVertexBuffer::flushBuffers()
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
     // must only be called from main thread
-    llassert(LLCoros::on_main_thread_main_coro());
     for (auto& buffer : sMappedBuffers)
     {
         buffer->_unmapBuffer();
@@ -1231,28 +1272,29 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
         count = mNumVerts - index;
     }
 
-#if !LL_DARWIN
-    U32 start = mOffsets[type] + sTypeSize[type] * index;
-    U32 end = start + sTypeSize[type] * count-1;
-
-    bool flagged = false;
-    // flag region as mapped
-    for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+    if (!gGLManager.mIsApple)
     {
-        MappedRegion& region = mMappedVertexRegions[i];
-        if (expand_region(region, start, end))
+        U32 start = mOffsets[type] + sTypeSize[type] * index;
+        U32 end = start + sTypeSize[type] * count-1;
+
+        bool flagged = false;
+        // flag region as mapped
+        for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
         {
-            flagged = true;
-            break;
+            MappedRegion& region = mMappedVertexRegions[i];
+            if (expand_region(region, start, end))
+            {
+                flagged = true;
+                break;
+            }
         }
-    }
 
-    if (!flagged)
-    {
-        //didn't expand an existing region, make a new one
-        mMappedVertexRegions.push_back({ start, end });
+        if (!flagged)
+        {
+            //didn't expand an existing region, make a new one
+            mMappedVertexRegions.push_back({ start, end });
+        }
     }
-#endif
     return mMappedData+mOffsets[type]+sTypeSize[type]*index;
 }
 
@@ -1267,28 +1309,29 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
         count = mNumIndices-index;
     }
 
-#if !LL_DARWIN
-    U32 start = sizeof(U16) * index;
-    U32 end = start + sizeof(U16) * count-1;
-
-    bool flagged = false;
-    // flag region as mapped
-    for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+    if (!gGLManager.mIsApple)
     {
-        MappedRegion& region = mMappedIndexRegions[i];
-        if (expand_region(region, start, end))
+        U32 start = sizeof(U16) * index;
+        U32 end = start + sizeof(U16) * count-1;
+
+        bool flagged = false;
+        // flag region as mapped
+        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
         {
-            flagged = true;
-            break;
+            MappedRegion& region = mMappedIndexRegions[i];
+            if (expand_region(region, start, end))
+            {
+                flagged = true;
+                break;
+            }
         }
-    }
 
-    if (!flagged)
-    {
-        //didn't expand an existing region, make a new one
-        mMappedIndexRegions.push_back({ start, end });
+        if (!flagged)
+        {
+            //didn't expand an existing region, make a new one
+            mMappedIndexRegions.push_back({ start, end });
+        }
     }
-#endif
 
     return mMappedIndexData + sizeof(U16)*index;
 }
@@ -1301,37 +1344,40 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
 //  dst -- mMappedData or mMappedIndexData
 void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst)
 {
-#if LL_DARWIN
-    // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call
-    // _mapBuffer to tag the buffer for flushing to GL
-    _mapBuffer();
-    LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy");
-    STOP_GLERROR;
-    // copy into mapped buffer
-    memcpy(dst+start, data, end-start+1);
-#else
-    llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices);
-
-    // skip mapped data and stream to GPU via glBufferSubData
-    if (end != 0)
+    if (gGLManager.mIsApple)
     {
-        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
-        LL_PROFILE_ZONE_NUM(start);
-        LL_PROFILE_ZONE_NUM(end);
-        LL_PROFILE_ZONE_NUM(end-start);
-
-        constexpr U32 block_size = 65536;
+        // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call
+        // _mapBuffer to tag the buffer for flushing to GL
+        _mapBuffer();
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy");
+        STOP_GLERROR;
+        // copy into mapped buffer
+        memcpy(dst+start, data, end-start+1);
+    }
+    else
+    {
+        llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices);
 
-        for (U32 i = start; i <= end; i += block_size)
+        // skip mapped data and stream to GPU via glBufferSubData
+        if (end != 0)
         {
-            //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
-            //LL_PROFILE_GPU_ZONE("glBufferSubData");
-            U32 tend = llmin(i + block_size, end);
-            U32 size = tend - i + 1;
-            glBufferSubData(target, i, size, (U8*) data + (i-start));
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
+            LL_PROFILE_ZONE_NUM(start);
+            LL_PROFILE_ZONE_NUM(end);
+            LL_PROFILE_ZONE_NUM(end-start);
+
+            constexpr U32 block_size = 65536;
+
+            for (U32 i = start; i <= end; i += block_size)
+            {
+                //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+                //LL_PROFILE_GPU_ZONE("glBufferSubData");
+                U32 tend = llmin(i + block_size, end);
+                U32 size = tend - i + 1;
+                glBufferSubData(target, i, size, (U8*) data + (i-start));
+            }
         }
     }
-#endif
 }
 
 void LLVertexBuffer::unmapBuffer()
@@ -1364,114 +1410,116 @@ void LLVertexBuffer::_unmapBuffer()
         }
     };
 
-#if LL_DARWIN
-    STOP_GLERROR;
-    if (mMappedData)
-    {
-        if (mGLBuffer)
-        {
-            glDeleteBuffers(1, &mGLBuffer);
-        }
-        mGLBuffer = gen_buffer();
-        glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
-        sGLRenderBuffer = mGLBuffer;
-        glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);
-    }
-    else if (mGLBuffer != sGLRenderBuffer)
-    {
-        glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
-        sGLRenderBuffer = mGLBuffer;
-    }
-    STOP_GLERROR;
-
-    if (mMappedIndexData)
+    if (gGLManager.mIsApple)
     {
-        if (mGLIndices)
+        STOP_GLERROR;
+        if (mMappedData)
         {
-            glDeleteBuffers(1, &mGLIndices);
+            if (mGLBuffer)
+            {
+                delete_buffers(1, &mGLBuffer);
+            }
+            mGLBuffer = gen_buffer();
+            glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+            sGLRenderBuffer = mGLBuffer;
+            glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);
         }
-
-        mGLIndices = gen_buffer();
-        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
-        sGLRenderIndices = mGLIndices;
-
-        glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW);
-    }
-    else if (mGLIndices != sGLRenderIndices)
-    {
-        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
-        sGLRenderIndices = mGLIndices;
-    }
-    STOP_GLERROR;
-#else
-
-    if (!mMappedVertexRegions.empty())
-    {
-        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
-
-        if (sGLRenderBuffer != mGLBuffer)
+        else if (mGLBuffer != sGLRenderBuffer)
         {
             glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
             sGLRenderBuffer = mGLBuffer;
         }
+        STOP_GLERROR;
 
-        U32 start = 0;
-        U32 end = 0;
-
-        std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion());
-
-        for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+        if (mMappedIndexData)
         {
-            const MappedRegion& region = mMappedVertexRegions[i];
-            if (region.mStart == end + 1)
-            {
-                end = region.mEnd;
-            }
-            else
+            if (mGLIndices)
             {
-                flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
-                start = region.mStart;
-                end = region.mEnd;
+                delete_buffers(1, &mGLIndices);
             }
-        }
-
-        flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
-        mMappedVertexRegions.clear();
-    }
 
-    if (!mMappedIndexRegions.empty())
-    {
-        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index");
+            mGLIndices = gen_buffer();
+            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+            sGLRenderIndices = mGLIndices;
 
-        if (mGLIndices != sGLRenderIndices)
+            glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW);
+        }
+        else if (mGLIndices != sGLRenderIndices)
         {
             glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
             sGLRenderIndices = mGLIndices;
         }
-        U32 start = 0;
-        U32 end = 0;
+        STOP_GLERROR;
+    }
+    else
+    {
+        if (!mMappedVertexRegions.empty())
+        {
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
+
+            if (sGLRenderBuffer != mGLBuffer)
+            {
+                glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+                sGLRenderBuffer = mGLBuffer;
+            }
 
-        std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion());
+            U32 start = 0;
+            U32 end = 0;
 
-        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+            std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion());
+
+            for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+            {
+                const MappedRegion& region = mMappedVertexRegions[i];
+                if (region.mStart == end + 1)
+                {
+                    end = region.mEnd;
+                }
+                else
+                {
+                    flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
+                    start = region.mStart;
+                    end = region.mEnd;
+                }
+            }
+
+            flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
+            mMappedVertexRegions.clear();
+        }
+
+        if (!mMappedIndexRegions.empty())
         {
-            const MappedRegion& region = mMappedIndexRegions[i];
-            if (region.mStart == end + 1)
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index");
+
+            if (mGLIndices != sGLRenderIndices)
             {
-                end = region.mEnd;
+                glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+                sGLRenderIndices = mGLIndices;
             }
-            else
+            U32 start = 0;
+            U32 end = 0;
+
+            std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion());
+
+            for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
             {
-                flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
-                start = region.mStart;
-                end = region.mEnd;
+                const MappedRegion& region = mMappedIndexRegions[i];
+                if (region.mStart == end + 1)
+                {
+                    end = region.mEnd;
+                }
+                else
+                {
+                    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
+                    start = region.mStart;
+                    end = region.mEnd;
+                }
             }
-        }
 
-        flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
-        mMappedIndexRegions.clear();
+            flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
+            mMappedIndexRegions.clear();
+        }
     }
-#endif
 }
 
 //----------------------------------------------------------------------------
diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt
index 2e220d2b1c..06ad730a40 100644
--- a/indra/newview/featuretable_mac.txt
+++ b/indra/newview/featuretable_mac.txt
@@ -1,4 +1,4 @@
-version 60
+version 61
 // The version number above should be incremented IF AND ONLY IF some
 // change has been made that is sufficiently important to justify
 // resetting the graphics preferences of all users to the recommended
@@ -68,7 +68,8 @@ RenderFSAASamples			1	3
 RenderMaxTextureIndex		1	16
 RenderGLContextCoreProfile         1   1
 RenderGLMultiThreadedTextures      1   0
-RenderGLMultiThreadedMedia         1   0
+RenderGLMultiThreadedMedia         1   1
+RenderAppleUseMultGL        1   1
 RenderReflectionsEnabled    1   1
 RenderReflectionProbeDetail	1	2
 RenderScreenSpaceReflections 1  1
@@ -381,6 +382,15 @@ list Intel
 RenderAnisotropic			1	0
 RenderFSAASamples			1	0
 
+// AppleGPU and NonAppleGPU can be thought of as Apple silicon vs Intel Mac
+list AppleGPU
+RenderGLMultiThreadedMedia  1   1
+RenderAppleUseMultGL        1   1
+
+list NonAppleGPU
+RenderGLMultiThreadedMedia  1   0
+RenderAppleUseMultGL        1   0
+
 list GL3
 RenderFSAASamples           0   0
 RenderReflectionProbeDetail	0	0
diff --git a/indra/newview/llfeaturemanager.cpp b/indra/newview/llfeaturemanager.cpp
index aa04221f4b..3259ea249b 100644
--- a/indra/newview/llfeaturemanager.cpp
+++ b/indra/newview/llfeaturemanager.cpp
@@ -656,6 +656,14 @@ void LLFeatureManager::applyBaseMasks()
     {
         maskFeatures("Intel");
     }
+    if (gGLManager.mIsApple)
+    {
+        maskFeatures("AppleGPU");
+    }
+    else
+    {
+        maskFeatures("NonAppleGPU");
+    }
     if (gGLManager.mGLVersion < 3.f)
     {
         maskFeatures("OpenGLPre30");
-- 
cgit v1.2.3