diff options
| author | Dave Parks <davep@lindenlab.com> | 2024-06-05 15:14:13 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-05 15:14:13 -0500 | 
| commit | 24586f810eb7ef8048a55687333d51c53aa2bed8 (patch) | |
| tree | c1213aae9efb9a5b6afd89ab6ce6ffe3dd02c277 | |
| parent | f568e6036bfa133ee8496a751f3269ec772fe5d3 (diff) | |
#1527 Improve performance on Apple silicon (#1632)
| -rw-r--r-- | indra/llcommon/llprofiler.h | 2 | ||||
| -rw-r--r-- | indra/llrender/llgl.h | 15 | ||||
| -rw-r--r-- | indra/llrender/llrender.cpp | 8 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 187 | ||||
| -rw-r--r-- | indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl | 2 | ||||
| -rw-r--r-- | indra/newview/llface.cpp | 7 | 
6 files changed, 183 insertions, 38 deletions
diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h index af5e5777bf..722d9afca2 100644 --- a/indra/llcommon/llprofiler.h +++ b/indra/llcommon/llprofiler.h @@ -162,7 +162,7 @@ extern thread_local bool gProfilerEnabled;  #define LL_LABEL_OBJECT_GL(type, name, length, label) -#if LL_PROFILER_CONFIGURATION > 1 +#if !LL_DARWIN && LL_PROFILER_CONFIGURATION > 1  #define LL_PROFILE_ALLOC(ptr, size)             TracyAlloc(ptr, size)  #define LL_PROFILE_FREE(ptr)                    TracyFree(ptr)  #else diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 2f538d0844..254c983110 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -156,13 +156,18 @@ void assert_glerror();  void clear_glerror(); -//#if LL_DEBUG +  # define stop_glerror() assert_glerror()  # define llglassertok() assert_glerror() -//#else -//# define stop_glerror() -//# define llglassertok() -//#endif + +// stop_glerror is still needed on OS X but has performance implications +// use macro below to conditionally add stop_glerror to non-release builds  +// on OS X +#if LL_DARWIN && !LL_RELEASE_FOR_DOWNLOAD +#define STOP_GLERROR stop_glerror() +#else  +#define STOP_GLERROR +#endif  #define llglassertok_always() assert_glerror() diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index 399281be84..51028e5667 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -990,6 +990,7 @@ void LLRender::syncLightState()  void LLRender::syncMatrices()  { +    STOP_GLERROR;      static const U32 name[] =      {          LLShaderMgr::MODELVIEW_MATRIX, @@ -1012,8 +1013,6 @@ void LLRender::syncMatrices()      if (shader)      { -        //llassert(shader); -          bool mvp_done = false;          U32 i = MM_MODELVIEW; @@ -1134,6 +1133,7 @@ void LLRender::syncMatrices()              syncLightState();          }      } +    STOP_GLERROR;  }  void LLRender::translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z) @@ -1585,6 +1585,7 @@ void LLRender::end()  }  void LLRender::flush()  { +    STOP_GLERROR;      if (mCount > 0)      {          LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; @@ -1693,6 +1694,9 @@ void LLRender::flush()                      vb->setColorData(mColorsp.get());                  } +#if LL_DARWIN +                vb->unmapBuffer(); +#endif                  vb->unbind();                  sVBCache[vhash] = { vb , std::chrono::steady_clock::now() }; diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 8cb124d406..fa3b2df6e0 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -290,6 +290,62 @@ static GLuint gen_buffer()  #define ANALYZE_VBO_POOL 0 +#if LL_DARWIN + +// experimental -- disable VBO pooling on OS X and use glMapBuffer +class LLVBOPool +{ +public: +    U64 mAllocated = 0; + +    U64 getVramBytesUsed() +    { +        return mAllocated; +    } + +    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) +    { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +        STOP_GLERROR; +        llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); +        llassert(name == 0); // non zero name indicates a gl name that wasn't freed +        llassert(data == nullptr);  // non null data indicates a buffer that wasn't freed +        llassert(size >= 2);  // any buffer size smaller than a single index is nonsensical + +        mAllocated += size; + +        { //allocate a new buffer +            LL_PROFILE_GPU_ZONE("vbo alloc"); +            // ON OS X, we don't allocate a VBO until the last possible moment  +            // in unmapBuffer +            data = (U8*) ll_aligned_malloc_16(size); +            STOP_GLERROR; +        } +    } + +    void free(GLenum type, U32 size, GLuint name, U8* data) +    { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +        llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); +        llassert(size >= 2); +         +        if (data) +        { +            ll_aligned_free_16(data); +        } +         +        mAllocated -= size; +        STOP_GLERROR; +        if (name) +        { +            glDeleteBuffers(1, &name); +        } +        STOP_GLERROR; +    } +}; + +#else +  class LLVBOPool  {  public: @@ -509,9 +565,8 @@ public:          mIBOPool.clear();          mVBOPool.clear();      } - -  }; +#endif  static LLVBOPool* sVBOPool = nullptr; @@ -629,6 +684,8 @@ void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVecto      LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;      llassert(LLGLSLShader::sCurBoundShaderPtr != NULL); +    STOP_GLERROR; +      gGL.syncMatrices();      U32 mask = LLVertexBuffer::MAP_VERTEX; @@ -743,8 +800,10 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi      llassert(mGLBuffer == sGLRenderBuffer);      llassert(mGLIndices == sGLRenderIndices);      gGL.syncMatrices(); +    STOP_GLERROR;      glDrawRangeElements(sGLMode[mode], start, end, count, mIndicesType,          (GLvoid*) (indices_offset * (size_t) mIndicesStride)); +    STOP_GLERROR;  }  void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const @@ -760,7 +819,9 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const      llassert(mGLIndices == sGLRenderIndices);      gGL.syncMatrices(); +    STOP_GLERROR;      glDrawArrays(sGLMode[mode], first, count); +    STOP_GLERROR;  }  //static @@ -783,9 +844,10 @@ void LLVertexBuffer::initClass(LLWindow* window)  //static  void LLVertexBuffer::unbind()  { +    STOP_GLERROR;      glBindBuffer(GL_ARRAY_BUFFER, 0);      glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); - +    STOP_GLERROR;      sGLRenderBuffer = 0;      sGLRenderIndices = 0;  } @@ -1081,6 +1143,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde          count = mNumVerts - index;      } +#if !LL_DARWIN      U32 start = mOffsets[type] + sTypeSize[type] * index;      U32 end = start + sTypeSize[type] * count-1; @@ -1101,7 +1164,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde          //didn't expand an existing region, make a new one          mMappedVertexRegions.push_back({ start, end });      } - +#endif      return mMappedData+mOffsets[type]+sTypeSize[type]*index;  } @@ -1115,6 +1178,7 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)          count = mNumIndices-index;      } +#if !LL_DARWIN      U32 start = sizeof(U16) * index;      U32 end = start + sizeof(U16) * count-1; @@ -1135,6 +1199,7 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)          //didn't expand an existing region, make a new one          mMappedIndexRegions.push_back({ start, end });      } +#endif      return mMappedIndexData + sizeof(U16)*index;  } @@ -1143,9 +1208,17 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)  //  target -- "target" parameter for glBufferSubData  //  start -- first byte to copy  //  end -- last byte to copy (NOT last byte + 1) -//  data -- mMappedData or mMappedIndexData -static void flush_vbo(GLenum target, U32 start, U32 end, void* data) -{ +//  data -- data to be flushed +//  dst -- mMappedData or mMappedIndexData +static void flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst) +{ +#if LL_DARWIN +    LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy"); +    STOP_GLERROR; +    // copy into mapped buffer +    memcpy(dst+start, data, end-start+1); +#else +    // skip mapped data and stream to GPU via glBufferSubData      if (end != 0)      {          LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); @@ -1164,10 +1237,12 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)              glBufferSubData(target, i, size, (U8*) data + (i-start));          }      } +#endif  }  void LLVertexBuffer::unmapBuffer()  { +    STOP_GLERROR;      struct SortMappedRegion      {          bool operator()(const MappedRegion& lhs, const MappedRegion& rhs) @@ -1176,9 +1251,51 @@ void LLVertexBuffer::unmapBuffer()          }      }; +#if LL_DARWIN +    STOP_GLERROR; +    if (mMappedData) +    { +        if (mGLBuffer) +        { +            glDeleteBuffers(1, &mGLBuffer); +        } +        mGLBuffer = gen_buffer(); +        glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); +        sGLRenderBuffer = mGLBuffer; +        glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW); +    } +    else if (mGLBuffer != sGLRenderBuffer) +    { +        glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); +        sGLRenderBuffer = mGLBuffer; +    } +    STOP_GLERROR; + +    if (mMappedIndexData) +    { +        if (mGLIndices) +        { +            glDeleteBuffers(1, &mGLIndices); +        } + +        mGLIndices = gen_buffer(); +        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); +        sGLRenderIndices = mGLIndices; + +        glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW); +    } +    else if (mGLIndices != sGLRenderIndices) +    { +        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); +        sGLRenderIndices = mGLIndices; +    } +    STOP_GLERROR; +#else +      if (!mMappedVertexRegions.empty())      {          LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex"); +          if (sGLRenderBuffer != mGLBuffer)          {              glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); @@ -1199,14 +1316,13 @@ void LLVertexBuffer::unmapBuffer()              }              else              { -                flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); +                flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);                  start = region.mStart;                  end = region.mEnd;              }          } -        flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); - +        flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);          mMappedVertexRegions.clear();      } @@ -1233,16 +1349,16 @@ void LLVertexBuffer::unmapBuffer()              }              else              { -                flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); +                flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);                  start = region.mStart;                  end = region.mEnd;              }          } -        flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); - +        flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);          mMappedIndexRegions.clear();      } +#endif  }  //---------------------------------------------------------------------------- @@ -1363,10 +1479,17 @@ bool LLVertexBuffer::getClothWeightStrider(LLStrider<LLVector4>& strider, U32 in  // Set for rendering  void LLVertexBuffer::setBuffer()  { +    STOP_GLERROR; +#if LL_DARWIN +    if (!mGLBuffer) +    { // OS X doesn't allocate a buffer until we call unmapBuffer +        return; +    } +#endif      // no data may be pending      llassert(mMappedVertexRegions.empty());      llassert(mMappedIndexRegions.empty()); - +          // a shader must be bound      llassert(LLGLSLShader::sCurBoundShaderPtr); @@ -1395,12 +1518,15 @@ void LLVertexBuffer::setBuffer()          glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);          sGLRenderIndices = mGLIndices;      } + +    STOP_GLERROR;  }  // virtual (default)  void LLVertexBuffer::setupVertexBuffer()  { +    STOP_GLERROR;      U8* base = nullptr;      U32 data_mask = LLGLSLShader::sCurBoundShaderPtr->mAttributeMask; @@ -1490,59 +1616,76 @@ void LLVertexBuffer::setupVertexBuffer()          void* ptr = (void*)(base + mOffsets[TYPE_VERTEX]);          glVertexAttribPointer(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);      } +    STOP_GLERROR;  }  void LLVertexBuffer::setPositionData(const LLVector4a* data)  { +#if !LL_DARWIN      llassert(sGLRenderBuffer == mGLBuffer); -    flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data); +#endif +    flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data, mMappedData);  }  void LLVertexBuffer::setTexCoordData(const LLVector2* data)  { +#if !LL_DARWIN      llassert(sGLRenderBuffer == mGLBuffer); -    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data); +#endif +    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data, mMappedData);  }  void LLVertexBuffer::setColorData(const LLColor4U* data)  { +#if !LL_DARWIN      llassert(sGLRenderBuffer == mGLBuffer); -    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data); +#endif +    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data, mMappedData);  }  void LLVertexBuffer::setNormalData(const LLVector4a* data)  { +#if !LL_DARWIN      llassert(sGLRenderBuffer == mGLBuffer); -    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data); +#endif +    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data, mMappedData);  }  void LLVertexBuffer::setTangentData(const LLVector4a* data)  { +#if !LL_DARWIN      llassert(sGLRenderBuffer == mGLBuffer); -    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data); +#endif +    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data, mMappedData);  }  void LLVertexBuffer::setWeight4Data(const LLVector4a* data)  { +#if !LL_DARWIN      llassert(sGLRenderBuffer == mGLBuffer); -    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data); +#endif +    flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data, mMappedData);  }  void LLVertexBuffer::setIndexData(const U16* data)  { +#if !LL_DARWIN      llassert(sGLRenderIndices == mGLIndices); -    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data); +#endif +    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data, mMappedIndexData);  }  void LLVertexBuffer::setIndexData(const U32* data)  { +#if !LL_DARWIN      llassert(sGLRenderIndices == mGLIndices); +#endif      if (mIndicesType != GL_UNSIGNED_INT)      { // HACK -- vertex buffers are initialized as 16-bit indices, but can be switched to 32-bit indices          mIndicesType = GL_UNSIGNED_INT;          mIndicesStride = 4;          mNumIndices /= 2;      } -    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data); +    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data, mMappedIndexData);  } diff --git a/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl b/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl index 5f598f84a7..5c79fd7315 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/terrainF.glsl @@ -57,7 +57,7 @@ void main()      outColor.a = 0.0; // yes, downstream atmospherics  -    frag_data[0] = outColor; +    frag_data[0] = max(outColor, vec4(0));      frag_data[1] = vec4(0.0,0.0,0.0,-1.0);      vec3 nvn = normalize(vary_normal);      frag_data[2] = vec4(nvn.xyz, GBUFFER_FLAG_HAS_ATMOS); diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 0e2f19b3d9..cda73f59ed 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -628,13 +628,6 @@ void LLFace::renderOneWireframe(const LLColor4 &color, F32 fogCfx, bool wirefram      {          LLGLDisable depth(wireframe_selection ? 0 : GL_BLEND); -        //LLGLEnable stencil(wireframe_selection ? 0 : GL_STENCIL_TEST); - -        if (!wireframe_selection) -        { //modify wireframe into outline selection mode -            glStencilFunc(GL_NOTEQUAL, 2, 0xffff); -            glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); -        }          LLGLEnable offset(GL_POLYGON_OFFSET_LINE);          glPolygonOffset(3.f, 3.f);  | 
