diff options
Diffstat (limited to 'indra/llrender/llvertexbuffer.cpp')
-rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 444 |
1 files changed, 277 insertions, 167 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index ee491b79e3..cc0329c6f8 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -36,10 +36,7 @@ #include "llshadermgr.h" #include "llglslshader.h" #include "llmemory.h" - -#include "llcontrol.h" - -extern LLControlGroup gSavedSettings; +#include <glm/gtc/type_ptr.hpp> //Next Highest Power Of Two //helper function, returns first number > v that is a power of 2, or v if v is already a power of 2 @@ -275,11 +272,13 @@ static GLuint gen_buffer() { LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen buffer"); sIndex = pool_size; +#if !LL_DARWIN if (!gGLManager.mIsAMD) { glGenBuffers(pool_size, sNamePool); } else +#endif { // work around for AMD driver bug for (U32 i = 0; i < pool_size; ++i) { @@ -292,22 +291,58 @@ static GLuint gen_buffer() return ret; } -#define ANALYZE_VBO_POOL 0 +static void delete_buffers(S32 count, GLuint* buffers) +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + // wait a few frames before actually deleting the buffers to avoid + // synchronization issues with the GPU + static std::vector<GLuint> sFreeList[4]; + + if (gGLManager.mInited) + { + U32 idx = LLImageGL::sFrameCount % 4; + + for (S32 i = 0; i < count; ++i) + { + sFreeList[idx].push_back(buffers[i]); + } -#if 0 // LL_DARWIN + idx = (LLImageGL::sFrameCount + 3) % 4; -// experimental -- disable VBO pooling on OS X and use glMapBuffer + if (!sFreeList[idx].empty()) + { + glDeleteBuffers((GLsizei)sFreeList[idx].size(), sFreeList[idx].data()); + sFreeList[idx].resize(0); + } + } +} + + +#define ANALYZE_VBO_POOL 0 + +// VBO Pool interface class LLVBOPool { + public: + virtual ~LLVBOPool() = default; + virtual void allocate(GLenum type, U32 size, GLuint& name, U8*& data) = 0; + virtual void free(GLenum type, U32 size, GLuint name, U8* data) = 0; + virtual U64 getVramBytesUsed() = 0; +}; + +// VBO Pool for Apple GPUs (as in M1/M2 etc, not Intel macs) +// Effectively disables VBO pooling +class LLAppleVBOPool final: public LLVBOPool +{ public: U64 mAllocated = 0; - U64 getVramBytesUsed() + U64 getVramBytesUsed() override { return mAllocated; } - void allocate(GLenum type, U32 size, GLuint& name, U8*& data) + void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; STOP_GLERROR; @@ -327,7 +362,7 @@ public: } } - void free(GLenum type, U32 size, GLuint name, U8* data) + void free(GLenum type, U32 size, GLuint name, U8* data) override { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); @@ -342,21 +377,17 @@ public: STOP_GLERROR; if (name) { - glDeleteBuffers(1, &name); + delete_buffers(1, &name); } STOP_GLERROR; } }; -#else - -class LLVBOPool +// VBO Pool for GPUs that benefit from VBO pooling +class LLDefaultVBOPool final : public LLVBOPool { public: typedef std::chrono::steady_clock::time_point Time; - - U32 mMappingMode; - struct Entry { U8* mData; @@ -364,16 +395,8 @@ public: Time mAge; }; - /* - LLVBOPool() + ~LLDefaultVBOPool() override { - - } - */ - - ~LLVBOPool() - { - if(mMappingMode == 3) return; clear(); } @@ -390,10 +413,9 @@ public: U32 mMisses = 0; U32 mHits = 0; - U64 getVramBytesUsed() + U64 getVramBytesUsed() override { - if(mMappingMode == 3) return mAllocated; - else return mAllocated + mReserved; + return mAllocated + mReserved; } // increase the size to some common value (e.g. a power of two) to increase hit rate @@ -407,7 +429,7 @@ public: size += block_size - (size % block_size); } - void allocate(GLenum type, U32 size, GLuint& name, U8*& data) + void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); @@ -415,20 +437,6 @@ public: llassert(data == nullptr); // non null data indicates a buffer that wasn't freed llassert(size >= 2); // any buffer size smaller than a single index is nonsensical - if(mMappingMode == 3) - { - mAllocated += size; - - { //allocate a new buffer - LL_PROFILE_GPU_ZONE("vbo alloc"); - // ON OS X, we don't allocate a VBO until the last possible moment - // in unmapBuffer - data = (U8*) ll_aligned_malloc_16(size); - //STOP_GLERROR; - } - return; - } - mDistributed += size; adjustSize(size); mAllocated += size; @@ -477,30 +485,11 @@ public: clean(); } - void free(GLenum type, U32 size, GLuint name, U8* data) + void free(GLenum type, U32 size, GLuint name, U8* data) override { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER); llassert(size >= 2); - - if(mMappingMode == 3) - { - if (data) - { - ll_aligned_free_16(data); - } - - mAllocated -= size; - //STOP_GLERROR; - if (name) - { - glDeleteBuffers(1, &name); - } - //STOP_GLERROR; - - return; - } - llassert(name != 0); llassert(data != nullptr); @@ -559,7 +548,7 @@ public: LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo cache timeout"); auto& entry = entries.back(); ll_aligned_free_16(entry.mData); - glDeleteBuffers(1, &entry.mGLName); + delete_buffers(1, &entry.mGLName); llassert(mReserved >= iter->first); mReserved -= iter->first; entries.pop_back(); @@ -595,7 +584,7 @@ public: for (auto& entry : entries.second) { ll_aligned_free_16(entry.mData); - glDeleteBuffers(1, &entry.mGLName); + delete_buffers(1, &entry.mGLName); } } @@ -604,7 +593,7 @@ public: for (auto& entry : entries.second) { ll_aligned_free_16(entry.mData); - glDeleteBuffers(1, &entry.mGLName); + delete_buffers(1, &entry.mGLName); } } @@ -614,10 +603,71 @@ public: mVBOPool.clear(); } }; -#endif static LLVBOPool* sVBOPool = nullptr; +void LLVertexBufferData::drawWithMatrix() +{ + if (!mVB) + { + llassert(false); + // Not supposed to happen, check buffer generation + return; + } + + if (mTexName) + { + gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTexName); + } + else + { + gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE); + } + + gGL.matrixMode(LLRender::MM_MODELVIEW); + gGL.pushMatrix(); + gGL.loadMatrix(glm::value_ptr(mModelView)); + gGL.matrixMode(LLRender::MM_PROJECTION); + gGL.pushMatrix(); + gGL.loadMatrix(glm::value_ptr(mProjection)); + gGL.matrixMode(LLRender::MM_TEXTURE0); + gGL.pushMatrix(); + gGL.loadMatrix(glm::value_ptr(mTexture0)); + + mVB->setBuffer(); + mVB->drawArrays(mMode, 0, mCount); + + gGL.popMatrix(); + gGL.matrixMode(LLRender::MM_PROJECTION); + gGL.popMatrix(); + gGL.matrixMode(LLRender::MM_MODELVIEW); + gGL.popMatrix(); +} + +void LLVertexBufferData::draw() +{ + if (!mVB) + { + llassert(false); + // Not supposed to happen, check buffer generation + return; + } + + if (mTexName) + { + gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTexName); + } + else + { + gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE); + } + + mVB->setBuffer(); + mVB->drawArrays(mMode, 0, mCount); +} + +//============================================================================ + //static U64 LLVertexBuffer::getBytesAllocated() { @@ -632,7 +682,6 @@ U32 LLVertexBuffer::sGLRenderIndices = 0; U32 LLVertexBuffer::sLastMask = 0; U32 LLVertexBuffer::sVertexCount = 0; -U32 LLVertexBuffer::sMappingMode = 0; //NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware const U32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] = @@ -681,7 +730,6 @@ const U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] = GL_POINTS, GL_LINES, GL_LINE_STRIP, - GL_QUADS, GL_LINE_LOOP, }; @@ -896,10 +944,21 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const void LLVertexBuffer::initClass(LLWindow* window) { llassert(sVBOPool == nullptr); - sVBOPool = new LLVBOPool(); - sVBOPool->mMappingMode = sMappingMode; - //LL_INFOS() << "sVBOPool intialized with mapping mode: " << sMappingMode << LL_ENDL; + LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL; + sVBOPool = new LLAppleVBOPool(); + + //if (gGLManager.mIsApple) + if(0) + { + LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL; + sVBOPool = new LLAppleVBOPool(); + } + else + { + LL_INFOS() << "VBO Pooling Enabled" << LL_ENDL; + sVBOPool = new LLDefaultVBOPool(); + } #if ENABLE_GL_WORK_QUEUE sQueue = new GLWorkQueue(); @@ -958,6 +1017,24 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask) } } +// list of mapped buffers +// NOTE: must not be LLPointer<LLVertexBuffer> to avoid breaking non-ref-counted LLVertexBuffer instances +static std::vector<LLVertexBuffer*> sMappedBuffers; + +//static +void LLVertexBuffer::flushBuffers() +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + // must only be called from main thread + for (auto& buffer : sMappedBuffers) + { + buffer->_unmapBuffer(); + buffer->mMapped = false; + } + + sMappedBuffers.resize(0); +} + //static U32 LLVertexBuffer::calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices) { @@ -1001,6 +1078,12 @@ U32 LLVertexBuffer::calcVertexSize(const U32& typemask) //virtual LLVertexBuffer::~LLVertexBuffer() { + if (mMapped) + { // is on the mapped buffer list but doesn't need to be flushed + mMapped = false; + unmapBuffer(); + } + destroyGLBuffer(); destroyGLIndices(); @@ -1202,13 +1285,15 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, U32 start, U32 end) U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 index, S32 count) { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + _mapBuffer(); if (count == -1) { count = mNumVerts - index; } - if(sMappingMode != 3) + //if (!gGLManager.mIsApple) + if (1) { U32 start = mOffsets[type] + sTypeSize[type] * index; U32 end = start + sTypeSize[type] * count-1; @@ -1231,7 +1316,6 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde mMappedVertexRegions.push_back({ start, end }); } } - return mMappedData+mOffsets[type]+sTypeSize[type]*index; } @@ -1239,13 +1323,15 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count) { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + _mapBuffer(); if (count == -1) { count = mNumIndices-index; } - if(sMappingMode != 3) + //if (!gGLManager.mIsApple) + if(1) { U32 start = sizeof(U16) * index; U32 end = start + sizeof(U16) * count-1; @@ -1280,12 +1366,13 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count) // dst -- mMappedData or mMappedIndexData void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst) { - if(sMappingMode == 2) + if (gGLManager.mIsApple) { - //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb glMapBufferRange"); - if (end == 0) return; + U32 MapBits = GL_MAP_WRITE_BIT; U32 buffer_size = end-start+1; - U8 * mptr = (U8*) glMapBufferRange( target, start, end-start+1, GL_MAP_WRITE_BIT); + + U8 * mptr = NULL; + mptr = (U8*) glMapBufferRange( target, start, end-start+1, MapBits); if (mptr) { @@ -1293,44 +1380,65 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8 if(!glUnmapBuffer(target)) LL_WARNS() << "glUnmapBuffer() failed" << LL_ENDL; } else LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; - return; - } - if(sMappingMode == 3) - { + /* + // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call + // _mapBuffer to tag the buffer for flushing to GL + _mapBuffer(); LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy"); - //STOP_GLERROR; + STOP_GLERROR; // copy into mapped buffer memcpy(dst+start, data, end-start+1); - return; + */ } - - llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices); - - // skip mapped data and stream to GPU via glBufferSubData - if (end != 0) + else { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); - LL_PROFILE_ZONE_NUM(start); - LL_PROFILE_ZONE_NUM(end); - LL_PROFILE_ZONE_NUM(end-start); - - constexpr U32 block_size = 8192; + llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices); - for (U32 i = start; i <= end; i += block_size) + // skip mapped data and stream to GPU via glBufferSubData + if (end != 0) { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - //LL_PROFILE_GPU_ZONE("glBufferSubData"); - U32 tend = llmin(i + block_size, end); - U32 size = tend - i + 1; - glBufferSubData(target, i, size, (U8*) data + (i-start)); + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); + LL_PROFILE_ZONE_NUM(start); + LL_PROFILE_ZONE_NUM(end); + LL_PROFILE_ZONE_NUM(end-start); + + constexpr U32 block_size = 65536; + + for (U32 i = start; i <= end; i += block_size) + { + //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + //LL_PROFILE_GPU_ZONE("glBufferSubData"); + U32 tend = llmin(i + block_size, end); + U32 size = tend - i + 1; + glBufferSubData(target, i, size, (U8*) data + (i-start)); + } } } } void LLVertexBuffer::unmapBuffer() { + flushBuffers(); +} + +void LLVertexBuffer::_mapBuffer() +{ + if (!mMapped) + { + mMapped = true; + sMappedBuffers.push_back(this); + } +} + +void LLVertexBuffer::_unmapBuffer() +{ STOP_GLERROR; + if (!mMapped) + { + return; + } + struct SortMappedRegion { bool operator()(const MappedRegion& lhs, const MappedRegion& rhs) @@ -1339,114 +1447,116 @@ void LLVertexBuffer::unmapBuffer() } }; - if(sMappingMode == 3) + //if (gGLManager.mIsApple) + if (0) { - //STOP_GLERROR; + STOP_GLERROR; if (mMappedData) { if (mGLBuffer) { - glDeleteBuffers(1, &mGLBuffer); + delete_buffers(1, &mGLBuffer); } mGLBuffer = gen_buffer(); glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); sGLRenderBuffer = mGLBuffer; - glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW); } else if (mGLBuffer != sGLRenderBuffer) { glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); sGLRenderBuffer = mGLBuffer; } - //STOP_GLERROR; + STOP_GLERROR; if (mMappedIndexData) { if (mGLIndices) { - glDeleteBuffers(1, &mGLIndices); + delete_buffers(1, &mGLIndices); } mGLIndices = gen_buffer(); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); sGLRenderIndices = mGLIndices; - glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_DYNAMIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW); } else if (mGLIndices != sGLRenderIndices) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); sGLRenderIndices = mGLIndices; } - //STOP_GLERROR; - return; + STOP_GLERROR; } - - if (!mMappedVertexRegions.empty()) + else { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex"); - - if (sGLRenderBuffer != mGLBuffer) + if (!mMappedVertexRegions.empty()) { - glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); - sGLRenderBuffer = mGLBuffer; - } + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex"); - U32 start = 0; - U32 end = 0; - - std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion()); - - for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) - { - const MappedRegion& region = mMappedVertexRegions[i]; - if (region.mStart == end + 1) + if (sGLRenderBuffer != mGLBuffer) { - end = region.mEnd; + glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); + sGLRenderBuffer = mGLBuffer; } - else - { - flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); - start = region.mStart; - end = region.mEnd; - } - } - flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); - mMappedVertexRegions.clear(); - } + U32 start = 0; + U32 end = 0; - if (!mMappedIndexRegions.empty()) - { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index"); + std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion()); - if (mGLIndices != sGLRenderIndices) - { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); - sGLRenderIndices = mGLIndices; - } - U32 start = 0; - U32 end = 0; + for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) + { + const MappedRegion& region = mMappedVertexRegions[i]; + if (region.mStart == end + 1) + { + end = region.mEnd; + } + else + { + flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); + start = region.mStart; + end = region.mEnd; + } + } - std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion()); + flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData); + mMappedVertexRegions.clear(); + } - for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) + if (!mMappedIndexRegions.empty()) { - const MappedRegion& region = mMappedIndexRegions[i]; - if (region.mStart == end + 1) + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index"); + + if (mGLIndices != sGLRenderIndices) { - end = region.mEnd; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); + sGLRenderIndices = mGLIndices; } - else + U32 start = 0; + U32 end = 0; + + std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion()); + + for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) { - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); - start = region.mStart; - end = region.mEnd; + const MappedRegion& region = mMappedIndexRegions[i]; + if (region.mStart == end + 1) + { + end = region.mEnd; + } + else + { + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); + start = region.mStart; + end = region.mEnd; + } } - } - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); - mMappedIndexRegions.clear(); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData); + mMappedIndexRegions.clear(); + } } } @@ -1568,12 +1678,12 @@ bool LLVertexBuffer::getClothWeightStrider(LLStrider<LLVector4>& strider, U32 in // Set for rendering void LLVertexBuffer::setBuffer() { - if(sMappingMode == 3) + STOP_GLERROR; + + if (mMapped) { - if (!mGLBuffer) - { - return; - } + LL_WARNS_ONCE() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL; + _unmapBuffer(); } // no data may be pending |