diff options
| -rw-r--r-- | indra/cmake/Tracy.cmake | 5 | ||||
| -rw-r--r-- | indra/llrender/llgl.cpp | 5 | ||||
| -rw-r--r-- | indra/llrender/llgl.h | 1 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 374 | ||||
| -rw-r--r-- | indra/newview/featuretable_mac.txt | 14 | ||||
| -rw-r--r-- | indra/newview/llfeaturemanager.cpp | 8 | 
6 files changed, 242 insertions, 165 deletions
| diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake index ec7178c5a0..a7eac2711f 100644 --- a/indra/cmake/Tracy.cmake +++ b/indra/cmake/Tracy.cmake @@ -31,6 +31,11 @@ if (USE_TRACY)      target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1)    endif () +  # GHA runners don't always provide invariant TSC support, but always build with LL_TESTS enabled +  if (DARWIN AND LL_TESTS) +    target_compile_definitions(ll::tracy INTERFACE -DTRACY_TIMER_FALLBACK=1) +  endif () +    # See: indra/llcommon/llprofiler.h    add_compile_definitions(LL_PROFILER_CONFIGURATION=3)  endif (USE_TRACY) diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index c62cacdce6..c5c9d50dee 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -1170,6 +1170,11 @@ bool LLGLManager::initGL()          mGLVendorShort = "INTEL";          mIsIntel = true;      } +    else if (mGLVendor.find("APPLE") != std::string::npos) +    { +        mGLVendorShort = "APPLE"; +        mIsApple = true; +    }      else      {          mGLVendorShort = "MISC"; diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 17f825bd71..f5b1e8d786 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -102,6 +102,7 @@ public:      bool mIsAMD;      bool mIsNVIDIA;      bool mIsIntel; +    bool mIsApple = false;      // hints to the render pipe      U32 mDownScaleMethod = 0; // see settings.xml RenderDownScaleMethod diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index e9fa369b0c..0be799db9d 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -289,22 +289,58 @@ static GLuint gen_buffer()      return ret;  } -#define ANALYZE_VBO_POOL 0 +static void delete_buffers(S32 count, GLuint* buffers) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +    // wait a few frames before actually deleting the buffers to avoid +    // synchronization issues with the GPU +    static std::vector<GLuint> sFreeList[4]; + +    if (gGLManager.mInited) +    { +        U32 idx = LLImageGL::sFrameCount % 4; + +        for (S32 i = 0; i < count; ++i) +        { +            sFreeList[idx].push_back(buffers[i]); +        } + +        idx = (LLImageGL::sFrameCount + 3) % 4; + +        if (!sFreeList[idx].empty()) +        { +            glDeleteBuffers((GLsizei)sFreeList[idx].size(), sFreeList[idx].data()); +            sFreeList[idx].resize(0); +        } +    } +} -#if LL_DARWIN -// experimental -- disable VBO pooling on OS X and use glMapBuffer +#define ANALYZE_VBO_POOL 0 + +// VBO Pool interface  class LLVBOPool  { +    public: +    virtual ~LLVBOPool() = default; +    virtual void allocate(GLenum type, U32 size, GLuint& name, U8*& data) = 0; +    virtual void free(GLenum type, U32 size, GLuint name, U8* data) = 0; +    virtual U64 getVramBytesUsed() = 0; +}; + +// VBO Pool for Apple GPUs (as in M1/M2 etc, not Intel macs) +// Effectively disables VBO pooling +class LLAppleVBOPool final: public LLVBOPool +{  public:      U64 mAllocated = 0; -    U64 getVramBytesUsed() +    U64 getVramBytesUsed() override      {          return mAllocated;      } -    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) +    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override      {          LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;          STOP_GLERROR; @@ -324,7 +360,7 @@ public:          }      } -    void free(GLenum type, U32 size, GLuint name, U8* data) +    void free(GLenum type, U32 size, GLuint name, U8* data) override      {          LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;          llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); @@ -339,19 +375,17 @@ public:          STOP_GLERROR;          if (name)          { -            glDeleteBuffers(1, &name); +            delete_buffers(1, &name);          }          STOP_GLERROR;      }  }; -#else - -class LLVBOPool +// VBO Pool for GPUs that benefit from VBO pooling +class LLDefaultVBOPool final : public LLVBOPool  {  public:      typedef std::chrono::steady_clock::time_point Time; -      struct Entry      {          U8* mData; @@ -359,7 +393,7 @@ public:          Time mAge;      }; -    ~LLVBOPool() +    ~LLDefaultVBOPool() override      {          clear();      } @@ -377,7 +411,7 @@ public:      U32 mMisses = 0;      U32 mHits = 0; -    U64 getVramBytesUsed() +    U64 getVramBytesUsed() override      {          return mAllocated + mReserved;      } @@ -393,7 +427,7 @@ public:          size += block_size - (size % block_size);      } -    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) +    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override      {          LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;          llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); @@ -449,7 +483,7 @@ public:          clean();      } -    void free(GLenum type, U32 size, GLuint name, U8* data) +    void free(GLenum type, U32 size, GLuint name, U8* data) override      {          LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;          llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); @@ -512,7 +546,7 @@ public:                      LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo cache timeout");                      auto& entry = entries.back();                      ll_aligned_free_16(entry.mData); -                    glDeleteBuffers(1, &entry.mGLName); +                    delete_buffers(1, &entry.mGLName);                      llassert(mReserved >= iter->first);                      mReserved -= iter->first;                      entries.pop_back(); @@ -548,7 +582,7 @@ public:              for (auto& entry : entries.second)              {                  ll_aligned_free_16(entry.mData); -                glDeleteBuffers(1, &entry.mGLName); +                delete_buffers(1, &entry.mGLName);              }          } @@ -557,7 +591,7 @@ public:              for (auto& entry : entries.second)              {                  ll_aligned_free_16(entry.mData); -                glDeleteBuffers(1, &entry.mGLName); +                delete_buffers(1, &entry.mGLName);              }          } @@ -567,7 +601,6 @@ public:          mVBOPool.clear();      }  }; -#endif  static LLVBOPool* sVBOPool = nullptr; @@ -896,7 +929,16 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const  void LLVertexBuffer::initClass(LLWindow* window)  {      llassert(sVBOPool == nullptr); -    sVBOPool = new LLVBOPool(); +    if (gGLManager.mIsApple) +    { +        LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL; +        sVBOPool = new LLAppleVBOPool(); +    } +    else +    { +        LL_INFOS() << "VBO Pooling Enabled" << LL_ENDL; +        sVBOPool = new LLDefaultVBOPool(); +    }  #if ENABLE_GL_WORK_QUEUE      sQueue = new GLWorkQueue(); @@ -964,7 +1006,6 @@ void LLVertexBuffer::flushBuffers()  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;      // must only be called from main thread -    llassert(LLCoros::on_main_thread_main_coro());      for (auto& buffer : sMappedBuffers)      {          buffer->_unmapBuffer(); @@ -1231,28 +1272,29 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde          count = mNumVerts - index;      } -#if !LL_DARWIN -    U32 start = mOffsets[type] + sTypeSize[type] * index; -    U32 end = start + sTypeSize[type] * count-1; - -    bool flagged = false; -    // flag region as mapped -    for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) +    if (!gGLManager.mIsApple)      { -        MappedRegion& region = mMappedVertexRegions[i]; -        if (expand_region(region, start, end)) +        U32 start = mOffsets[type] + sTypeSize[type] * index; +        U32 end = start + sTypeSize[type] * count-1; + +        bool flagged = false; +        // flag region as mapped +        for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)          { -            flagged = true; -            break; +            MappedRegion& region = mMappedVertexRegions[i]; +            if (expand_region(region, start, end)) +            { +                flagged = true; +                break; +            }          } -    } -    if (!flagged) -    { -        //didn't expand an existing region, make a new one -        mMappedVertexRegions.push_back({ start, end }); +        if (!flagged) +        { +            //didn't expand an existing region, make a new one +            mMappedVertexRegions.push_back({ start, end }); +        }      } -#endif      return mMappedData+mOffsets[type]+sTypeSize[type]*index;  } @@ -1267,28 +1309,29 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)          count = mNumIndices-index;      } -#if !LL_DARWIN -    U32 start = sizeof(U16) * index; -    U32 end = start + sizeof(U16) * count-1; - -    bool flagged = false; -    // flag region as mapped -    for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) +    if (!gGLManager.mIsApple)      { -        MappedRegion& region = mMappedIndexRegions[i]; -        if (expand_region(region, start, end)) +        U32 start = sizeof(U16) * index; +        U32 end = start + sizeof(U16) * count-1; + +        bool flagged = false; +        // flag region as mapped +        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)          { -            flagged = true; -            break; +            MappedRegion& region = mMappedIndexRegions[i]; +            if (expand_region(region, start, end)) +            { +                flagged = true; +                break; +            }          } -    } -    if (!flagged) -    { -        //didn't expand an existing region, make a new one -        mMappedIndexRegions.push_back({ start, end }); +        if (!flagged) +        { +            //didn't expand an existing region, make a new one +            mMappedIndexRegions.push_back({ start, end }); +        }      } -#endif      return mMappedIndexData + sizeof(U16)*index;  } @@ -1301,37 +1344,40 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)  //  dst -- mMappedData or mMappedIndexData  void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst)  { -#if LL_DARWIN -    // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call -    // _mapBuffer to tag the buffer for flushing to GL -    _mapBuffer(); -    LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy"); -    STOP_GLERROR; -    // copy into mapped buffer -    memcpy(dst+start, data, end-start+1); -#else -    llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices); - -    // skip mapped data and stream to GPU via glBufferSubData -    if (end != 0) +    if (gGLManager.mIsApple)      { -        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); -        LL_PROFILE_ZONE_NUM(start); -        LL_PROFILE_ZONE_NUM(end); -        LL_PROFILE_ZONE_NUM(end-start); - -        constexpr U32 block_size = 65536; +        // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call +        // _mapBuffer to tag the buffer for flushing to GL +        _mapBuffer(); +        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy"); +        STOP_GLERROR; +        // copy into mapped buffer +        memcpy(dst+start, data, end-start+1); +    } +    else +    { +        llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices); -        for (U32 i = start; i <= end; i += block_size) +        // skip mapped data and stream to GPU via glBufferSubData +        if (end != 0)          { -            //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); -            //LL_PROFILE_GPU_ZONE("glBufferSubData"); -            U32 tend = llmin(i + block_size, end); -            U32 size = tend - i + 1; -            glBufferSubData(target, i, size, (U8*) data + (i-start)); +            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); +            LL_PROFILE_ZONE_NUM(start); +            LL_PROFILE_ZONE_NUM(end); +            LL_PROFILE_ZONE_NUM(end-start); + +            constexpr U32 block_size = 65536; + +            for (U32 i = start; i <= end; i += block_size) +            { +                //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); +                //LL_PROFILE_GPU_ZONE("glBufferSubData"); +                U32 tend = llmin(i + block_size, end); +                U32 size = tend - i + 1; +                glBufferSubData(target, i, size, (U8*) data + (i-start)); +            }          }      } -#endif  }  void LLVertexBuffer::unmapBuffer() @@ -1364,114 +1410,116 @@ void LLVertexBuffer::_unmapBuffer()          }      }; -#if LL_DARWIN -    STOP_GLERROR; -    if (mMappedData) -    { -        if (mGLBuffer) -        { -            glDeleteBuffers(1, &mGLBuffer); -        } -        mGLBuffer = gen_buffer(); -        glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); -        sGLRenderBuffer = mGLBuffer; -        glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW); -    } -    else if (mGLBuffer != sGLRenderBuffer) -    { -        glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); -        sGLRenderBuffer = mGLBuffer; -    } -    STOP_GLERROR; - -    if (mMappedIndexData) +    if (gGLManager.mIsApple)      { -        if (mGLIndices) +        STOP_GLERROR; +        if (mMappedData)          { -            glDeleteBuffers(1, &mGLIndices); +            if (mGLBuffer) +            { +                delete_buffers(1, &mGLBuffer); +            } +            mGLBuffer = gen_buffer(); +            glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); +            sGLRenderBuffer = mGLBuffer; +            glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);          } - -        mGLIndices = gen_buffer(); -        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); -        sGLRenderIndices = mGLIndices; - -        glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW); -    } -    else if (mGLIndices != sGLRenderIndices) -    { -        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); -        sGLRenderIndices = mGLIndices; -    } -    STOP_GLERROR; -#else - -    if (!mMappedVertexRegions.empty()) -    { -        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex"); - -        if (sGLRenderBuffer != mGLBuffer) +        else if (mGLBuffer != sGLRenderBuffer)          {              glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);              sGLRenderBuffer = mGLBuffer;          } +        STOP_GLERROR; -        U32 start = 0; -        U32 end = 0; - -        std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion()); - -        for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) +        if (mMappedIndexData)          { -            const MappedRegion& region = mMappedVertexRegions[i]; -            if (region.mStart == end + 1) -            { -                end = region.mEnd; -            } -            else +            if (mGLIndices)              { -                flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); -                start = region.mStart; -                end = region.mEnd; +                delete_buffers(1, &mGLIndices);              } -        } - -        flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); -        mMappedVertexRegions.clear(); -    } -    if (!mMappedIndexRegions.empty()) -    { -        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index"); +            mGLIndices = gen_buffer(); +            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); +            sGLRenderIndices = mGLIndices; -        if (mGLIndices != sGLRenderIndices) +            glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW); +        } +        else if (mGLIndices != sGLRenderIndices)          {              glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);              sGLRenderIndices = mGLIndices;          } -        U32 start = 0; -        U32 end = 0; +        STOP_GLERROR; +    } +    else +    { +        if (!mMappedVertexRegions.empty()) +        { +            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex"); + +            if (sGLRenderBuffer != mGLBuffer) +            { +                glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); +                sGLRenderBuffer = mGLBuffer; +            } -        std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion()); +            U32 start = 0; +            U32 end = 0; -        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) +            std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion()); + +            for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) +            { +                const MappedRegion& region = mMappedVertexRegions[i]; +                if (region.mStart == end + 1) +                { +                    end = region.mEnd; +                } +                else +                { +                    flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); +                    start = region.mStart; +                    end = region.mEnd; +                } +            } + +            flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); +            mMappedVertexRegions.clear(); +        } + +        if (!mMappedIndexRegions.empty())          { -            const MappedRegion& region = mMappedIndexRegions[i]; -            if (region.mStart == end + 1) +            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index"); + +            if (mGLIndices != sGLRenderIndices)              { -                end = region.mEnd; +                glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); +                sGLRenderIndices = mGLIndices;              } -            else +            U32 start = 0; +            U32 end = 0; + +            std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion()); + +            for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)              { -                flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); -                start = region.mStart; -                end = region.mEnd; +                const MappedRegion& region = mMappedIndexRegions[i]; +                if (region.mStart == end + 1) +                { +                    end = region.mEnd; +                } +                else +                { +                    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); +                    start = region.mStart; +                    end = region.mEnd; +                }              } -        } -        flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); -        mMappedIndexRegions.clear(); +            flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); +            mMappedIndexRegions.clear(); +        }      } -#endif  }  //---------------------------------------------------------------------------- diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt index 2e220d2b1c..06ad730a40 100644 --- a/indra/newview/featuretable_mac.txt +++ b/indra/newview/featuretable_mac.txt @@ -1,4 +1,4 @@ -version 60 +version 61  // The version number above should be incremented IF AND ONLY IF some  // change has been made that is sufficiently important to justify  // resetting the graphics preferences of all users to the recommended @@ -68,7 +68,8 @@ RenderFSAASamples			1	3  RenderMaxTextureIndex		1	16  RenderGLContextCoreProfile         1   1  RenderGLMultiThreadedTextures      1   0 -RenderGLMultiThreadedMedia         1   0 +RenderGLMultiThreadedMedia         1   1 +RenderAppleUseMultGL        1   1  RenderReflectionsEnabled    1   1  RenderReflectionProbeDetail	1	2  RenderScreenSpaceReflections 1  1 @@ -381,6 +382,15 @@ list Intel  RenderAnisotropic			1	0  RenderFSAASamples			1	0 +// AppleGPU and NonAppleGPU can be thought of as Apple silicon vs Intel Mac +list AppleGPU +RenderGLMultiThreadedMedia  1   1 +RenderAppleUseMultGL        1   1 + +list NonAppleGPU +RenderGLMultiThreadedMedia  1   0 +RenderAppleUseMultGL        1   0 +  list GL3  RenderFSAASamples           0   0  RenderReflectionProbeDetail	0	0 diff --git a/indra/newview/llfeaturemanager.cpp b/indra/newview/llfeaturemanager.cpp index aa04221f4b..3259ea249b 100644 --- a/indra/newview/llfeaturemanager.cpp +++ b/indra/newview/llfeaturemanager.cpp @@ -656,6 +656,14 @@ void LLFeatureManager::applyBaseMasks()      {          maskFeatures("Intel");      } +    if (gGLManager.mIsApple) +    { +        maskFeatures("AppleGPU"); +    } +    else +    { +        maskFeatures("NonAppleGPU"); +    }      if (gGLManager.mGLVersion < 3.f)      {          maskFeatures("OpenGLPre30"); | 
