diff options
| author | Dave Parks <davep@lindenlab.com> | 2023-01-11 16:20:24 -0600 | 
|---|---|---|
| committer | Dave Parks <davep@lindenlab.com> | 2023-01-11 16:20:24 -0600 | 
| commit | b9a4d81d5140b34199a6582b1189473b6a2e72fb (patch) | |
| tree | 8a65c4be790f3060fa1443aeb9080509f1f6745c | |
| parent | b4f53334141b179bdb1762636ce313f14d9e2b10 (diff) | |
SL-18869 Optimizations -- Revive LLVBOPool and fix silly typo in renderShadowSimple
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 326 | ||||
| -rw-r--r-- | indra/newview/pipeline.cpp | 7 | 
2 files changed, 211 insertions, 122 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index e1352691d4..c1ffe6957a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -273,7 +273,189 @@ static GLWorkQueue* sQueue = nullptr;  #endif  //============================================================================ +// Pool of reusable VertexBuffer state +// batch calls to glGenBuffers +static GLuint gen_buffer() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +    constexpr U32 pool_size = 4096; + +    thread_local static GLuint sNamePool[pool_size]; +    thread_local static U32 sIndex = 0; + +    if (sIndex == 0) +    { +        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen buffer"); +        sIndex = pool_size; +        glGenBuffers(pool_size, sNamePool); +    } + +    return sNamePool[--sIndex]; +} + +class LLVBOPool +{ +public: +    typedef std::chrono::steady_clock::time_point Time; + +    struct Entry +    { +        U8* mData; +        GLuint mGLName; +        Time mAge; +    }; + +    ~LLVBOPool() +    { +        clear(); +    } + +    typedef std::unordered_map<U32, std::list<Entry>> Pool; + +    Pool mVBOPool; +    Pool mIBOPool; + +    U32 mMissCount = 0; + +    void allocate(GLenum type, U32 size, GLuint& name, U8*& data) +    { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + +        size = nhpo2(size); + +        auto& pool = type == GL_ELEMENT_ARRAY_BUFFER ? mIBOPool : mVBOPool; + +        auto& iter = pool.find(size); +        if (iter == pool.end()) +        { // cache miss, allocate a new buffer +            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo pool miss"); +            LL_PROFILE_GPU_ZONE("vbo alloc"); + +            ++mMissCount; +            if (mMissCount > 1024) +            { //clean cache on every 1024 misses +                mMissCount = 0; +                clean(); +            } + +            name = gen_buffer(); +            glBindBuffer(type, name); +            glBufferData(type, size, nullptr, GL_DYNAMIC_DRAW); +            if (type == GL_ELEMENT_ARRAY_BUFFER) +            { +                LLVertexBuffer::sGLRenderIndices = name; +            } +            else +            { +                LLVertexBuffer::sGLRenderBuffer = name; +            } + +            data = (U8*)ll_aligned_malloc_16(size); +        } +        else +        { +            std::list<Entry>& entries = iter->second; +            Entry& entry = entries.back(); +            name = entry.mGLName; +            data = entry.mData; + +            entries.pop_back(); +            if (entries.empty()) +            { +                pool.erase(iter); +            } +        } +    } + +    void free(GLenum type, U32 size, GLuint name, U8* data) +    { +        size = nhpo2(size); + +        auto& pool = type == GL_ELEMENT_ARRAY_BUFFER ? mIBOPool : mVBOPool; + +        auto& iter = pool.find(size); + +        if (iter == pool.end()) +        { +            std::list<Entry> newlist; +            newlist.push_front({ data, name, std::chrono::steady_clock::now() }); +            pool[size] = newlist; +        } +        else +        { +            iter->second.push_front({ data, name, std::chrono::steady_clock::now() }); +        } +    } + +    void clean() +    { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + +        std::unordered_map<U32, std::list<Entry>>* pools[] = { &mVBOPool, &mIBOPool }; + +        using namespace std::chrono_literals; + +        Time cutoff = std::chrono::steady_clock::now() - 5s; + +        for (auto* pool : pools) +        { +            for (Pool::iterator iter = pool->begin(); iter != pool->end(); ) +            { +                auto& entries = iter->second; + +                while (!entries.empty() && entries.back().mAge < cutoff) +                { +                    LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vbo cache timeout"); +                    auto& entry = entries.back(); +                    ll_aligned_free_16(entry.mData); +                    glDeleteBuffers(1, &entry.mGLName); +                    entries.pop_back(); +                } + +                if (entries.empty()) +                { +                    iter = pool->erase(iter); +                } +                else +                { +                    ++iter; +                } +            } +        } +    } + +    void clear() +    { +        for (auto& entries : mIBOPool) +        { +            for (auto& entry : entries.second) +            { +                ll_aligned_free_16(entry.mData); +                glDeleteBuffers(1, &entry.mGLName); +            } +        } + +        for (auto& entries : mVBOPool) +        { +            for (auto& entry : entries.second) +            { +                ll_aligned_free_16(entry.mData); +                glDeleteBuffers(1, &entry.mGLName); +            } +        } + +        mIBOPool.clear(); +        mVBOPool.clear(); +    } + + +}; + +static LLVBOPool* sVBOPool = nullptr; + +//============================================================================ +//   //static  std::list<U32> LLVertexBuffer::sAvailableVAOName;  U32 LLVertexBuffer::sCurVAOName = 1; @@ -643,6 +825,8 @@ void LLVertexBuffer::initClass(LLWindow* window)      sEnableVBOs = true;      sDisableVBOMapping = true; +    sVBOPool = new LLVBOPool(); +  #if ENABLE_GL_WORK_QUEUE      sQueue = new GLWorkQueue(); @@ -687,6 +871,9 @@ void LLVertexBuffer::cleanupClass()  {  	unbind(); +    delete sVBOPool; +    sVBOPool = nullptr; +  #if ENABLE_GL_WORK_QUEUE      sQueue->close();      for (int i = 0; i < THREAD_COUNT; ++i) @@ -720,15 +907,8 @@ S32 LLVertexBuffer::determineUsage(S32 usage)  		ret_usage = 0;  	} -	if (ret_usage == GL_DYNAMIC_DRAW && sPreferStreamDraw) -	{ -		ret_usage = GL_STREAM_DRAW; -	} -	 -	if (ret_usage == 0 && LLRender::sGLCoreProfile) -	{ //MUST use VBOs for all rendering -		ret_usage = GL_STREAM_DRAW; -	} +    // dynamic draw or nothing +    ret_usage = GL_DYNAMIC_DRAW;  	return ret_usage;  } @@ -830,62 +1010,17 @@ LLVertexBuffer::~LLVertexBuffer()  //---------------------------------------------------------------------------- -// batch glGenBuffers -static GLuint gen_buffer() -{ -    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; -    constexpr U32 pool_size = 4096; - -    thread_local static GLuint sNamePool[pool_size]; -    thread_local static U32 sIndex = 0; - -    if (sIndex == 0) -    { -        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo"); -        sIndex = pool_size; -        glGenBuffers(pool_size, sNamePool); -    } - -    return sNamePool[--sIndex]; -} - -// batch glDeleteBuffers -static void release_buffer(U32 buff) -{ -    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; -#if 0 - -    constexpr U32 pool_size = 4096; - -    thread_local static GLuint sNamePool[pool_size]; -    thread_local static U32 sIndex = 0; - -    if (sIndex == pool_size) -    { -        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo"); -        sIndex = 0; -        glDeleteBuffers(pool_size, sNamePool); -    } - -    sNamePool[sIndex++] = buff; -#else -    glDeleteBuffers(1, &buff); -#endif -} -  void LLVertexBuffer::genBuffer(U32 size)  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;      mSize = size; -    mMappedData = (U8*) ll_aligned_malloc_16(size); -    mGLBuffer = gen_buffer(); - -    glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); -    glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage); -    glBindBuffer(GL_ARRAY_BUFFER, 0); -    sGLRenderBuffer = 0; +    if (sVBOPool) +    { +        sVBOPool->allocate(GL_ARRAY_BUFFER, size, mGLBuffer, mMappedData); +    } +          sGLCount++;  } @@ -894,25 +1029,24 @@ void LLVertexBuffer::genIndices(U32 size)      LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;      mIndicesSize = size; -    mMappedIndexData = (U8*) ll_aligned_malloc_16(size); - -    mGLIndices = gen_buffer(); - -    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); -    glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, nullptr, mUsage); -    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); -    sGLRenderIndices = 0; +    if (sVBOPool) +    { +        sVBOPool->allocate(GL_ELEMENT_ARRAY_BUFFER, size, mGLIndices, mMappedIndexData); +    }  	sGLCount++;  }  void LLVertexBuffer::releaseBuffer()  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; -    release_buffer(mGLBuffer); -    mGLBuffer = 0; -    ll_aligned_free_16(mMappedData); +    if (sVBOPool) +    { +        sVBOPool->free(GL_ARRAY_BUFFER, mSize, mGLBuffer, mMappedData); +    } + +    mGLBuffer = 0;      mMappedData = nullptr;  	sGLCount--; @@ -921,10 +1055,12 @@ void LLVertexBuffer::releaseBuffer()  void LLVertexBuffer::releaseIndices()  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; -    release_buffer(mGLIndices); -    mGLIndices = 0; +     +    if (sVBOPool) +    { +        sVBOPool->free(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mGLIndices, mMappedIndexData); +    } -    ll_aligned_free_16(mMappedIndexData);      mMappedIndexData = nullptr;  	sGLCount--; @@ -1604,49 +1740,7 @@ void LLVertexBuffer::flush(bool discard)  {  	if (useVBOs())  	{ -        if (discard) -        { // discard existing VBO data if the buffer must be updated -             -            if (!mMappedVertexRegions.empty()) -            { -                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard vbo"); -                LL_PROFILE_ZONE_NUM(mSize); -                release_buffer(mGLBuffer); -                mGLBuffer = gen_buffer(); -                bindGLBuffer(); -                { -                    LL_PROFILE_GPU_ZONE("glBufferData"); -                    glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage); - -                    for (int i = 0; i < mSize; i += 65536) -                    { -                        LL_PROFILE_GPU_ZONE("glBufferSubData"); -                        S32 end = llmin(i + 65536, mSize); -                        S32 count = end - i; -                        glBufferSubData(GL_ARRAY_BUFFER, i, count, mMappedData + i); -                    } -                } -                mMappedVertexRegions.clear(); -            } -            if (!mMappedIndexRegions.empty()) -            { -                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard ibo"); -                LL_PROFILE_ZONE_NUM(mIndicesSize); -                release_buffer(mGLIndices); -                mGLIndices = gen_buffer(); -                bindGLIndices(); -                { -                    LL_PROFILE_GPU_ZONE("glBufferData (ibo)"); -                    glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, mUsage); -                } -                mMappedIndexRegions.clear(); -            } -        } -        else -        { -            unmapBuffer(); -        } - +        unmapBuffer();  	}  } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 66083470e2..cedbe4d117 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7328,7 +7328,7 @@ void LLPipeline::renderShadowSimple(U32 type)              mSimplePool->applyModelMatrix(params);              vb->setBufferFast(LLVertexBuffer::MAP_VERTEX);              vb->drawRangeFast(LLRender::TRIANGLES, 0, vb->getNumVerts()-1, vb->getNumIndices(), 0); -            vb = last_vb; +            last_vb = vb;          }      }      gGL.loadMatrix(gGLModelView); @@ -9538,11 +9538,6 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera              }              else              { -                //{  sort should not be necessary because each entry in sCull should already  -                // be sorted by vertex buffer -                //    LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("sort shadow simple"); -                //    std::sort(sCull->beginRenderMap(type), sCull->endRenderMap(type), CompareVertexBuffer()); -                //}                  renderShadowSimple(type);              }          }  | 
