1 files changed, 277 insertions, 167 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index ee491b79e3..cc0329c6f8 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -36,10 +36,7 @@
 #include "llshadermgr.h"
 #include "llglslshader.h"
 #include "llmemory.h"
-
-#include "llcontrol.h"
-
-extern LLControlGroup gSavedSettings;
+#include <glm/gtc/type_ptr.hpp>
 
 //Next Highest Power Of Two
 //helper function, returns first number > v that is a power of 2, or v if v is already a power of 2
@@ -275,11 +272,13 @@ static GLuint gen_buffer()
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen buffer");
         sIndex = pool_size;
+#if !LL_DARWIN
         if (!gGLManager.mIsAMD)
         {
             glGenBuffers(pool_size, sNamePool);
         }
         else
+#endif
         { // work around for AMD driver bug
             for (U32 i = 0; i < pool_size; ++i)
             {
@@ -292,22 +291,58 @@ static GLuint gen_buffer()
     return ret;
 }
 
-#define ANALYZE_VBO_POOL 0
+static void delete_buffers(S32 count, GLuint* buffers)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    // wait a few frames before actually deleting the buffers to avoid
+    // synchronization issues with the GPU
+    static std::vector<GLuint> sFreeList[4];
+
+    if (gGLManager.mInited)
+    {
+        U32 idx = LLImageGL::sFrameCount % 4;
+
+        for (S32 i = 0; i < count; ++i)
+        {
+            sFreeList[idx].push_back(buffers[i]);
+        }
 
-#if 0 // LL_DARWIN
+        idx = (LLImageGL::sFrameCount + 3) % 4;
 
-// experimental -- disable VBO pooling on OS X and use glMapBuffer
+        if (!sFreeList[idx].empty())
+        {
+            glDeleteBuffers((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
+            sFreeList[idx].resize(0);
+        }
+    }
+}
+
+
+#define ANALYZE_VBO_POOL 0
+
+// VBO Pool interface
 class LLVBOPool
 {
+    public:
+    virtual ~LLVBOPool() = default;
+    virtual void allocate(GLenum type, U32 size, GLuint& name, U8*& data) = 0;
+    virtual void free(GLenum type, U32 size, GLuint name, U8* data) = 0;
+    virtual U64 getVramBytesUsed() = 0;
+};
+
+// VBO Pool for Apple GPUs (as in M1/M2 etc, not Intel macs)
+// Effectively disables VBO pooling
+class LLAppleVBOPool final: public LLVBOPool
+{
 public:
     U64 mAllocated = 0;
 
-    U64 getVramBytesUsed()
+    U64 getVramBytesUsed() override
     {
         return mAllocated;
     }
 
-    void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         STOP_GLERROR;
@@ -327,7 +362,7 @@ public:
         }
     }
 
-    void free(GLenum type, U32 size, GLuint name, U8* data)
+    void free(GLenum type, U32 size, GLuint name, U8* data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -342,21 +377,17 @@ public:
         STOP_GLERROR;
         if (name)
         {
-            glDeleteBuffers(1, &name);
+            delete_buffers(1, &name);
         }
         STOP_GLERROR;
     }
 };
 
-#else
-
-class LLVBOPool
+// VBO Pool for GPUs that benefit from VBO pooling
+class LLDefaultVBOPool final : public LLVBOPool
 {
 public:
     typedef std::chrono::steady_clock::time_point Time;
-
-    U32 mMappingMode;
-
     struct Entry
     {
         U8* mData;
@@ -364,16 +395,8 @@ public:
         Time mAge;
     };
 
-    /*
-    LLVBOPool()
+    ~LLDefaultVBOPool() override
     {
-
-    }
-    */
-
-    ~LLVBOPool()
-    {
-        if(mMappingMode == 3) return;
         clear();
     }
 
@@ -390,10 +413,9 @@ public:
     U32 mMisses = 0;
     U32 mHits = 0;
 
-    U64 getVramBytesUsed()
+    U64 getVramBytesUsed() override
     {
-        if(mMappingMode == 3) return mAllocated;
-        else return mAllocated + mReserved;
+        return mAllocated + mReserved;
     }
 
     // increase the size to some common value (e.g. a power of two) to increase hit rate
@@ -407,7 +429,7 @@ public:
         size += block_size - (size % block_size);
     }
 
-    void allocate(GLenum type, U32 size, GLuint& name, U8*& data)
+    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
@@ -415,20 +437,6 @@ public:
         llassert(data == nullptr);  // non null data indicates a buffer that wasn't freed
         llassert(size >= 2);  // any buffer size smaller than a single index is nonsensical
 
-        if(mMappingMode == 3)
-        {
-            mAllocated += size;
-
-            { //allocate a new buffer
-                LL_PROFILE_GPU_ZONE("vbo alloc");
-                // ON OS X, we don't allocate a VBO until the last possible moment
-                // in unmapBuffer
-                data = (U8*) ll_aligned_malloc_16(size);
-                //STOP_GLERROR;
-            }
-            return;
-        }
-
         mDistributed += size;
         adjustSize(size);
         mAllocated += size;
@@ -477,30 +485,11 @@ public:
         clean();
     }
 
-    void free(GLenum type, U32 size, GLuint name, U8* data)
+    void free(GLenum type, U32 size, GLuint name, U8* data) override
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
         llassert(type == GL_ARRAY_BUFFER || type == GL_ELEMENT_ARRAY_BUFFER);
         llassert(size >= 2);
-
-        if(mMappingMode == 3)
-        {
-            if (data)
-            {
-                ll_aligned_free_16(data);
-            }
-
-            mAllocated -= size;
-            //STOP_GLERROR;
-            if (name)
-            {
-                glDeleteBuffers(1, &name);
-            }
-            //STOP_GLERROR;
-
-            return;
-        }
-
         llassert(name != 0);
         llassert(data != nullptr);
 
@@ -559,7 +548,7 @@ public:
                     LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo cache timeout");
                     auto& entry = entries.back();
                     ll_aligned_free_16(entry.mData);
-                    glDeleteBuffers(1, &entry.mGLName);
+                    delete_buffers(1, &entry.mGLName);
                     llassert(mReserved >= iter->first);
                     mReserved -= iter->first;
                     entries.pop_back();
@@ -595,7 +584,7 @@ public:
             for (auto& entry : entries.second)
             {
                 ll_aligned_free_16(entry.mData);
-                glDeleteBuffers(1, &entry.mGLName);
+                delete_buffers(1, &entry.mGLName);
             }
         }
 
@@ -604,7 +593,7 @@ public:
             for (auto& entry : entries.second)
             {
                 ll_aligned_free_16(entry.mData);
-                glDeleteBuffers(1, &entry.mGLName);
+                delete_buffers(1, &entry.mGLName);
             }
         }
 
@@ -614,10 +603,71 @@ public:
         mVBOPool.clear();
     }
 };
-#endif
 
 static LLVBOPool* sVBOPool = nullptr;
 
+void LLVertexBufferData::drawWithMatrix()
+{
+    if (!mVB)
+    {
+        llassert(false);
+        // Not supposed to happen, check buffer generation
+        return;
+    }
+
+    if (mTexName)
+    {
+        gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTexName);
+    }
+    else
+    {
+        gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
+    }
+
+    gGL.matrixMode(LLRender::MM_MODELVIEW);
+    gGL.pushMatrix();
+    gGL.loadMatrix(glm::value_ptr(mModelView));
+    gGL.matrixMode(LLRender::MM_PROJECTION);
+    gGL.pushMatrix();
+    gGL.loadMatrix(glm::value_ptr(mProjection));
+    gGL.matrixMode(LLRender::MM_TEXTURE0);
+    gGL.pushMatrix();
+    gGL.loadMatrix(glm::value_ptr(mTexture0));
+
+    mVB->setBuffer();
+    mVB->drawArrays(mMode, 0, mCount);
+
+    gGL.popMatrix();
+    gGL.matrixMode(LLRender::MM_PROJECTION);
+    gGL.popMatrix();
+    gGL.matrixMode(LLRender::MM_MODELVIEW);
+    gGL.popMatrix();
+}
+
+void LLVertexBufferData::draw()
+{
+    if (!mVB)
+    {
+        llassert(false);
+        // Not supposed to happen, check buffer generation
+        return;
+    }
+
+    if (mTexName)
+    {
+        gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTexName);
+    }
+    else
+    {
+        gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
+    }
+
+    mVB->setBuffer();
+    mVB->drawArrays(mMode, 0, mCount);
+}
+
+//============================================================================
+
 //static
 U64 LLVertexBuffer::getBytesAllocated()
 {
@@ -632,7 +682,6 @@ U32 LLVertexBuffer::sGLRenderIndices = 0;
 U32 LLVertexBuffer::sLastMask = 0;
 U32 LLVertexBuffer::sVertexCount = 0;
 
-U32 LLVertexBuffer::sMappingMode = 0;
 
 //NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware
 const U32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] =
@@ -681,7 +730,6 @@ const U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] =
     GL_POINTS,
     GL_LINES,
     GL_LINE_STRIP,
-    GL_QUADS,
     GL_LINE_LOOP,
 };
 
@@ -896,10 +944,21 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const
 void LLVertexBuffer::initClass(LLWindow* window)
 {
     llassert(sVBOPool == nullptr);
-    sVBOPool = new LLVBOPool();
-    sVBOPool->mMappingMode = sMappingMode;
 
-    //LL_INFOS() << "sVBOPool intialized with mapping mode: " << sMappingMode << LL_ENDL;
+    LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL;
+    sVBOPool = new LLAppleVBOPool();
+
+    //if (gGLManager.mIsApple)
+    if(0)
+    {
+        LL_INFOS() << "VBO Pooling Disabled" << LL_ENDL;
+        sVBOPool = new LLAppleVBOPool();
+    }
+    else
+    {
+        LL_INFOS() << "VBO Pooling Enabled" << LL_ENDL;
+        sVBOPool = new LLDefaultVBOPool();
+    }
 
 #if ENABLE_GL_WORK_QUEUE
     sQueue = new GLWorkQueue();
@@ -958,6 +1017,24 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask)
     }
 }
 
+// list of mapped buffers
+// NOTE: must not be LLPointer<LLVertexBuffer> to avoid breaking non-ref-counted LLVertexBuffer instances
+static std::vector<LLVertexBuffer*> sMappedBuffers;
+
+//static
+void LLVertexBuffer::flushBuffers()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    // must only be called from main thread
+    for (auto& buffer : sMappedBuffers)
+    {
+        buffer->_unmapBuffer();
+        buffer->mMapped = false;
+    }
+
+    sMappedBuffers.resize(0);
+}
+
 //static
 U32 LLVertexBuffer::calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices)
 {
@@ -1001,6 +1078,12 @@ U32 LLVertexBuffer::calcVertexSize(const U32& typemask)
 //virtual
 LLVertexBuffer::~LLVertexBuffer()
 {
+    if (mMapped)
+    { // is on the mapped buffer list but doesn't need to be flushed
+        mMapped = false;
+        unmapBuffer();
+    }
+
     destroyGLBuffer();
     destroyGLIndices();
 
@@ -1202,13 +1285,15 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, U32 start, U32 end)
 U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 index, S32 count)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    _mapBuffer();
 
     if (count == -1)
     {
         count = mNumVerts - index;
     }
 
-    if(sMappingMode != 3)
+    //if (!gGLManager.mIsApple)
+    if (1)
     {
         U32 start = mOffsets[type] + sTypeSize[type] * index;
         U32 end = start + sTypeSize[type] * count-1;
@@ -1231,7 +1316,6 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
             mMappedVertexRegions.push_back({ start, end });
         }
     }
-
     return mMappedData+mOffsets[type]+sTypeSize[type]*index;
 }
 
@@ -1239,13 +1323,15 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
 U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+    _mapBuffer();
 
     if (count == -1)
     {
         count = mNumIndices-index;
     }
 
-    if(sMappingMode != 3)
+    //if (!gGLManager.mIsApple)
+    if(1)
     {
         U32 start = sizeof(U16) * index;
         U32 end = start + sizeof(U16) * count-1;
@@ -1280,12 +1366,13 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
 //  dst -- mMappedData or mMappedIndexData
 void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8* dst)
 {
-    if(sMappingMode == 2)
+    if (gGLManager.mIsApple)
     {
-        //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb glMapBufferRange");
-        if (end == 0) return;
+        U32 MapBits = GL_MAP_WRITE_BIT;
         U32 buffer_size = end-start+1;
-        U8 * mptr = (U8*) glMapBufferRange( target, start, end-start+1, GL_MAP_WRITE_BIT);
+
+        U8 * mptr = NULL;
+        mptr = (U8*) glMapBufferRange( target, start, end-start+1, MapBits);
 
         if (mptr)
         {
@@ -1293,44 +1380,65 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
             if(!glUnmapBuffer(target)) LL_WARNS() << "glUnmapBuffer() failed" << LL_ENDL;
         }
         else LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL;
-        return;
-    }
 
-    if(sMappingMode == 3)
-    {
+        /*
+        // on OS X, flush_vbo doesn't actually write to the GL buffer, so be sure to call
+        // _mapBuffer to tag the buffer for flushing to GL
+        _mapBuffer();
         LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb memcpy");
-        //STOP_GLERROR;
+        STOP_GLERROR;
         // copy into mapped buffer
         memcpy(dst+start, data, end-start+1);
-        return;
+        */
     }
-
-    llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices);
-
-    // skip mapped data and stream to GPU via glBufferSubData
-    if (end != 0)
+    else
     {
-        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
-        LL_PROFILE_ZONE_NUM(start);
-        LL_PROFILE_ZONE_NUM(end);
-        LL_PROFILE_ZONE_NUM(end-start);
-
-        constexpr U32 block_size = 8192;
+        llassert(target == GL_ARRAY_BUFFER ? sGLRenderBuffer == mGLBuffer : sGLRenderIndices == mGLIndices);
 
-        for (U32 i = start; i <= end; i += block_size)
+        // skip mapped data and stream to GPU via glBufferSubData
+        if (end != 0)
         {
-            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
-            //LL_PROFILE_GPU_ZONE("glBufferSubData");
-            U32 tend = llmin(i + block_size, end);
-            U32 size = tend - i + 1;
-            glBufferSubData(target, i, size, (U8*) data + (i-start));
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData");
+            LL_PROFILE_ZONE_NUM(start);
+            LL_PROFILE_ZONE_NUM(end);
+            LL_PROFILE_ZONE_NUM(end-start);
+
+            constexpr U32 block_size = 65536;
+
+            for (U32 i = start; i <= end; i += block_size)
+            {
+                //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+                //LL_PROFILE_GPU_ZONE("glBufferSubData");
+                U32 tend = llmin(i + block_size, end);
+                U32 size = tend - i + 1;
+                glBufferSubData(target, i, size, (U8*) data + (i-start));
+            }
         }
     }
 }
 
 void LLVertexBuffer::unmapBuffer()
 {
+    flushBuffers();
+}
+
+void LLVertexBuffer::_mapBuffer()
+{
+    if (!mMapped)
+    {
+        mMapped = true;
+        sMappedBuffers.push_back(this);
+    }
+}
+
+void LLVertexBuffer::_unmapBuffer()
+{
     STOP_GLERROR;
+    if (!mMapped)
+    {
+        return;
+    }
+
     struct SortMappedRegion
     {
         bool operator()(const MappedRegion& lhs, const MappedRegion& rhs)
@@ -1339,114 +1447,116 @@ void LLVertexBuffer::unmapBuffer()
         }
     };
 
-    if(sMappingMode == 3)
+    //if (gGLManager.mIsApple)
+    if (0)
     {
-        //STOP_GLERROR;
+        STOP_GLERROR;
         if (mMappedData)
         {
             if (mGLBuffer)
             {
-                glDeleteBuffers(1, &mGLBuffer);
+                delete_buffers(1, &mGLBuffer);
             }
             mGLBuffer = gen_buffer();
             glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
             sGLRenderBuffer = mGLBuffer;
-            glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_DYNAMIC_DRAW);
+            glBufferData(GL_ARRAY_BUFFER, mSize, mMappedData, GL_STATIC_DRAW);
         }
         else if (mGLBuffer != sGLRenderBuffer)
         {
             glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
             sGLRenderBuffer = mGLBuffer;
         }
-        //STOP_GLERROR;
+        STOP_GLERROR;
 
         if (mMappedIndexData)
         {
             if (mGLIndices)
             {
-                glDeleteBuffers(1, &mGLIndices);
+                delete_buffers(1, &mGLIndices);
             }
 
             mGLIndices = gen_buffer();
             glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
             sGLRenderIndices = mGLIndices;
 
-            glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_DYNAMIC_DRAW);
+            glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, GL_STATIC_DRAW);
         }
         else if (mGLIndices != sGLRenderIndices)
         {
             glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
             sGLRenderIndices = mGLIndices;
         }
-        //STOP_GLERROR;
-        return;
+        STOP_GLERROR;
     }
-
-    if (!mMappedVertexRegions.empty())
+    else
     {
-        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
-
-        if (sGLRenderBuffer != mGLBuffer)
+        if (!mMappedVertexRegions.empty())
         {
-            glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
-            sGLRenderBuffer = mGLBuffer;
-        }
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - vertex");
 
-        U32 start = 0;
-        U32 end = 0;
-
-        std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion());
-
-        for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
-        {
-            const MappedRegion& region = mMappedVertexRegions[i];
-            if (region.mStart == end + 1)
+            if (sGLRenderBuffer != mGLBuffer)
             {
-                end = region.mEnd;
+                glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer);
+                sGLRenderBuffer = mGLBuffer;
             }
-            else
-            {
-                flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
-                start = region.mStart;
-                end = region.mEnd;
-            }
-        }
 
-        flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
-        mMappedVertexRegions.clear();
-    }
+            U32 start = 0;
+            U32 end = 0;
 
-    if (!mMappedIndexRegions.empty())
-    {
-        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index");
+            std::sort(mMappedVertexRegions.begin(), mMappedVertexRegions.end(), SortMappedRegion());
 
-        if (mGLIndices != sGLRenderIndices)
-        {
-            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
-            sGLRenderIndices = mGLIndices;
-        }
-        U32 start = 0;
-        U32 end = 0;
+            for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)
+            {
+                const MappedRegion& region = mMappedVertexRegions[i];
+                if (region.mStart == end + 1)
+                {
+                    end = region.mEnd;
+                }
+                else
+                {
+                    flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
+                    start = region.mStart;
+                    end = region.mEnd;
+                }
+            }
 
-        std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion());
+            flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, mMappedData);
+            mMappedVertexRegions.clear();
+        }
 
-        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
+        if (!mMappedIndexRegions.empty())
         {
-            const MappedRegion& region = mMappedIndexRegions[i];
-            if (region.mStart == end + 1)
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("unmapBuffer - index");
+
+            if (mGLIndices != sGLRenderIndices)
             {
-                end = region.mEnd;
+                glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices);
+                sGLRenderIndices = mGLIndices;
             }
-            else
+            U32 start = 0;
+            U32 end = 0;
+
+            std::sort(mMappedIndexRegions.begin(), mMappedIndexRegions.end(), SortMappedRegion());
+
+            for (U32 i = 0; i < mMappedIndexRegions.size(); ++i)
             {
-                flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
-                start = region.mStart;
-                end = region.mEnd;
+                const MappedRegion& region = mMappedIndexRegions[i];
+                if (region.mStart == end + 1)
+                {
+                    end = region.mEnd;
+                }
+                else
+                {
+                    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
+                    start = region.mStart;
+                    end = region.mEnd;
+                }
             }
-        }
 
-        flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
-        mMappedIndexRegions.clear();
+            flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, mMappedIndexData);
+            mMappedIndexRegions.clear();
+        }
     }
 }
 
@@ -1568,12 +1678,12 @@ bool LLVertexBuffer::getClothWeightStrider(LLStrider<LLVector4>& strider, U32 in
 // Set for rendering
 void LLVertexBuffer::setBuffer()
 {
-    if(sMappingMode == 3)
+    STOP_GLERROR;
+
+    if (mMapped)
     {
-        if (!mGLBuffer)
-        {
-            return;
-        }
+        LL_WARNS_ONCE() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL;
+        _unmapBuffer();
     }
 
     // no data may be pending