diff options
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 825 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.h | 69 | ||||
| -rw-r--r-- | indra/newview/llviewerdisplay.cpp | 17 | ||||
| -rw-r--r-- | indra/newview/llviewerwindow.cpp | 8 | ||||
| -rw-r--r-- | indra/newview/pipeline.cpp | 17 | 
5 files changed, 446 insertions, 490 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 7b8f85acba..20261dcb8a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -62,6 +62,14 @@ U32 wpo2(U32 i)  	return r;  } +struct CompareMappedRegion +{ +    bool operator()(const LLVertexBuffer::MappedRegion& lhs, const LLVertexBuffer::MappedRegion& rhs) +    { +        return lhs.mStart < rhs.mStart; +    } +}; +  const U32 LL_VBO_BLOCK_SIZE = 2048;  const U32 LL_VBO_POOL_MAX_SEED_SIZE = 256*1024; @@ -81,266 +89,217 @@ U32 vbo_block_index(U32 size)  const U32 LL_VBO_POOL_SEED_COUNT = vbo_block_index(LL_VBO_POOL_MAX_SEED_SIZE) + 1; +#define ENABLE_GL_WORK_QUEUE 0 + +#if ENABLE_GL_WORK_QUEUE + +#define THREAD_COUNT 1  //============================================================================ -//static -LLVBOPool LLVertexBuffer::sStreamVBOPool(GL_STREAM_DRAW, GL_ARRAY_BUFFER); -LLVBOPool LLVertexBuffer::sDynamicVBOPool(GL_DYNAMIC_DRAW, GL_ARRAY_BUFFER); -LLVBOPool LLVertexBuffer::sDynamicCopyVBOPool(GL_DYNAMIC_COPY, GL_ARRAY_BUFFER); -LLVBOPool LLVertexBuffer::sStreamIBOPool(GL_STREAM_DRAW, GL_ELEMENT_ARRAY_BUFFER); -LLVBOPool LLVertexBuffer::sDynamicIBOPool(GL_DYNAMIC_DRAW, GL_ELEMENT_ARRAY_BUFFER); +// High performance WorkQueue for usage in real-time rendering work +class GLWorkQueue +{ +public: +    using Work = std::function<void()>; -U32 LLVBOPool::sBytesPooled = 0; -U32 LLVBOPool::sIndexBytesPooled = 0; -U32 LLVBOPool::sNameIdx = 0; -U32 LLVBOPool::sNamePool[1024]; +    GLWorkQueue(); -std::list<U32> LLVertexBuffer::sAvailableVAOName; -U32 LLVertexBuffer::sCurVAOName = 1; +    void post(const Work& value); -U32 LLVertexBuffer::sAllocatedIndexBytes = 0; -U32 LLVertexBuffer::sIndexCount = 0; +    size_t size(); -U32 LLVertexBuffer::sBindCount = 0; -U32 LLVertexBuffer::sSetCount = 0; -S32 LLVertexBuffer::sCount = 0; -S32 LLVertexBuffer::sGLCount = 0; -S32 LLVertexBuffer::sMappedCount = 0; -bool LLVertexBuffer::sDisableVBOMapping = false; -bool LLVertexBuffer::sEnableVBOs = true; -U32 LLVertexBuffer::sGLRenderBuffer = 0; -U32 LLVertexBuffer::sGLRenderArray = 0; -U32 LLVertexBuffer::sGLRenderIndices = 0; -U32 LLVertexBuffer::sLastMask = 0; -bool LLVertexBuffer::sVBOActive = false; -bool LLVertexBuffer::sIBOActive = false; -U32 LLVertexBuffer::sAllocatedBytes = 0; -U32 LLVertexBuffer::sVertexCount = 0; -bool LLVertexBuffer::sMapped = false; -bool LLVertexBuffer::sUseStreamDraw = true; -bool LLVertexBuffer::sUseVAO = false; -bool LLVertexBuffer::sPreferStreamDraw = false; +    bool done(); -U32 LLVBOPool::genBuffer() -{ -	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX +    // Get the next element from the queue +    Work pop(); -	if (sNameIdx == 0) -	{ -		glGenBuffers(1024, sNamePool); -		sNameIdx = 1024; -	} +    void runOne(); -	return sNamePool[--sNameIdx]; -} +    bool runPending(); -void LLVBOPool::deleteBuffer(U32 name) -{ -	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX -	if (gGLManager.mInited) -	{ -		LLVertexBuffer::unbind(); +    void runUntilClose(); -		glBindBuffer(mType, name); -		glBufferData(mType, 0, NULL, mUsage); -		glBindBuffer(mType, 0); +    void close(); -		glDeleteBuffers(1, &name); -	} -} +    bool isClosed(); +    void syncGL(); -LLVBOPool::LLVBOPool(U32 vboUsage, U32 vboType) -: mUsage(vboUsage), mType(vboType), mMissCountDirty(true) -{ -    mFreeList.resize(LL_VBO_POOL_SEED_COUNT); -	mMissCount.resize(LL_VBO_POOL_SEED_COUNT); -	std::fill(mMissCount.begin(), mMissCount.end(), 0); -} +private: +    std::mutex mMutex; +    std::condition_variable mCondition; +    std::queue<Work> mQueue; +    bool mClosed = false; +}; -U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed) +GLWorkQueue::GLWorkQueue()  { -	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX -	llassert(vbo_block_size(size) == size); -	 -	U8* ret = NULL; -	U32 i = vbo_block_index(size); +} -	if (mFreeList.size() <= i) -	{ -		mFreeList.resize(i+1); -	} +void GLWorkQueue::syncGL() +{ +    /*if (mSync) +    { +        std::lock_guard<std::mutex> lock(mMutex); +        glWaitSync(mSync, 0, GL_TIMEOUT_IGNORED); +        mSync = 0; +    }*/ +} -	if (mFreeList[i].empty() || for_seed) -	{ -		//make a new buffer -		name = genBuffer(); +size_t GLWorkQueue::size() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    std::lock_guard<std::mutex> lock(mMutex); +    return mQueue.size(); +} -		glBindBuffer(mType, name); +bool GLWorkQueue::done() +{ +    return size() == 0 && isClosed(); +} -		if (!for_seed && i < LL_VBO_POOL_SEED_COUNT) -		{ //record this miss -			mMissCount[i]++;	 -            mMissCountDirty = true;  // signal to ::seedPool() -		} +void GLWorkQueue::post(const GLWorkQueue::Work& value) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    { +        std::lock_guard<std::mutex> lock(mMutex); +        mQueue.push(std::move(value)); +    } -		if (mType == GL_ARRAY_BUFFER) -		{ -			LLVertexBuffer::sAllocatedBytes += size; -		} -		else -		{ -			LLVertexBuffer::sAllocatedIndexBytes += size; -		} +    mCondition.notify_one(); +} -		if (LLVertexBuffer::sDisableVBOMapping || mUsage != GL_DYNAMIC_DRAW) -		{ -			glBufferData(mType, size, 0, mUsage); -			if (mUsage != GL_DYNAMIC_COPY) -			{ //data will be provided by application -				ret = (U8*) ll_aligned_malloc<64>(size); -				if (!ret) -				{ -                    LL_ERRS() -                        << "Failed to allocate " << size << " bytes for LLVBOPool buffer " << name << "." << LL_NEWLINE -                        << "Free list size: " -                        << mFreeList.size()  // this happens if we are out of memory so a solution might be to clear some from freelist -							  << " Allocated Bytes: " << LLVertexBuffer::sAllocatedBytes -                        << " Allocated Index Bytes: " << LLVertexBuffer::sAllocatedIndexBytes << " Pooled Bytes: " << sBytesPooled -                        << " Pooled Index Bytes: " << sIndexBytesPooled << LL_ENDL; -				} -			} -		} -		else -		{ //always use a true hint of static draw when allocating non-client-backed buffers -			glBufferData(mType, size, 0, GL_STATIC_DRAW); -		} +// Get the next element from the queue +GLWorkQueue::Work GLWorkQueue::pop() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    // Lock the mutex +    { +        std::unique_lock<std::mutex> lock(mMutex); -		glBindBuffer(mType, 0); +        // Wait for a new element to become available or for the queue to close +        { +            mCondition.wait(lock, [=] { return !mQueue.empty() || mClosed; }); +        } +    } -		if (for_seed) -		{ //put into pool for future use -			llassert(mFreeList.size() > i); +    Work ret; -			Record rec; -			rec.mGLName = name; -			rec.mClientData = ret; -	 -			if (mType == GL_ARRAY_BUFFER) -			{ -				sBytesPooled += size; -			} -			else -			{ -				sIndexBytesPooled += size; -			} -			mFreeList[i].push_back(rec); -            mMissCountDirty = true;  // signal to ::seedPool() -		} -	} -	else -	{ -		name = mFreeList[i].front().mGLName; -		ret = mFreeList[i].front().mClientData; +    { +        std::lock_guard<std::mutex> lock(mMutex); -		if (mType == GL_ARRAY_BUFFER) -		{ -			sBytesPooled -= size; -		} -		else -		{ -			sIndexBytesPooled -= size; -		} +        // Get the next element from the queue +        if (mQueue.size() > 0) +        { +            ret = mQueue.front(); +            mQueue.pop(); +        } +        else +        { +            ret = []() {}; +        } +    } -		mFreeList[i].pop_front(); -        mMissCountDirty = true;  // signal to ::seedPool() -	} +    return ret; +} -	return ret; +void GLWorkQueue::runOne() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    Work w = pop(); +    w(); +    //mSync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);  } -void LLVBOPool::release(U32 name, U8* buffer, U32 size) +void GLWorkQueue::runUntilClose()  { -	llassert(vbo_block_size(size) == size); +    while (!isClosed()) +    { +        runOne(); +    } +} -	deleteBuffer(name); -	ll_aligned_free_fallback((U8*) buffer); +void GLWorkQueue::close() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    { +        std::lock_guard<std::mutex> lock(mMutex); +        mClosed = true; +    } -	if (mType == GL_ARRAY_BUFFER) -	{ -		LLVertexBuffer::sAllocatedBytes -= size; -	} -	else -	{ -		LLVertexBuffer::sAllocatedIndexBytes -= size; -	} +    mCondition.notify_all();  } -void LLVBOPool::seedPool() +bool GLWorkQueue::isClosed()  { -	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX -    if (mMissCountDirty) -	{ -	U32 dummy_name = 0; -        U32 size       = LL_VBO_BLOCK_SIZE; - -	for (U32 i = 0; i < LL_VBO_POOL_SEED_COUNT; i++) -	{ -		if (mMissCount[i] > mFreeList[i].size()) -		{  -			S32 count = mMissCount[i] - mFreeList[i].size(); -			for (U32 j = 0; j < count; ++j) -			{ -				allocate(dummy_name, size, true); -			} -		} -            size += LL_VBO_BLOCK_SIZE; -        } -        mMissCountDirty = false; -	} +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    std::lock_guard<std::mutex> lock(mMutex); +    return mClosed;  } -void LLVBOPool::cleanup() +#include "llwindow.h" + +class LLGLWorkerThread : public LLThread  { -	U32 size = LL_VBO_BLOCK_SIZE; +public: +    LLGLWorkerThread(const std::string& name, GLWorkQueue* queue, LLWindow* window) +        : LLThread(name) +    { +        mWindow = window; +        mContext = mWindow->createSharedContext(); +        mQueue = queue; +    } -	for (U32 i = 0; i < mFreeList.size(); ++i) -	{ -		record_list_t& l = mFreeList[i]; +    void run() override +    { +        mWindow->makeContextCurrent(mContext); +        gGL.init(false); +        mQueue->runUntilClose(); +        gGL.shutdown(); +        mWindow->destroySharedContext(mContext); +    } -		while (!l.empty()) -		{ -			Record& r = l.front(); +    GLWorkQueue* mQueue; +    LLWindow* mWindow; +    void* mContext = nullptr; +}; -			deleteBuffer(r.mGLName); -			 -			if (r.mClientData) -			{ -				ll_aligned_free<64>((void*) r.mClientData); -			} -			l.pop_front(); +static LLGLWorkerThread* sVBOThread[THREAD_COUNT]; +static GLWorkQueue* sQueue = nullptr; -			if (mType == GL_ARRAY_BUFFER) -			{ -				sBytesPooled -= size; -				LLVertexBuffer::sAllocatedBytes -= size; -			} -			else -			{ -				sIndexBytesPooled -= size; -				LLVertexBuffer::sAllocatedIndexBytes -= size; -			} -		} +#endif -		size += LL_VBO_BLOCK_SIZE; -	} +//============================================================================ -	//reset miss counts -	std::fill(mMissCount.begin(), mMissCount.end(), 0); -} +//static +std::list<U32> LLVertexBuffer::sAvailableVAOName; +U32 LLVertexBuffer::sCurVAOName = 1; + +U32 LLVertexBuffer::sAllocatedIndexBytes = 0; +U32 LLVertexBuffer::sIndexCount = 0; + +U32 LLVertexBuffer::sBindCount = 0; +U32 LLVertexBuffer::sSetCount = 0; +S32 LLVertexBuffer::sCount = 0; +S32 LLVertexBuffer::sGLCount = 0; +S32 LLVertexBuffer::sMappedCount = 0; +bool LLVertexBuffer::sDisableVBOMapping = false; +bool LLVertexBuffer::sEnableVBOs = true; +U32 LLVertexBuffer::sGLRenderBuffer = 0; +U32 LLVertexBuffer::sGLRenderArray = 0; +U32 LLVertexBuffer::sGLRenderIndices = 0; +U32 LLVertexBuffer::sLastMask = 0; +bool LLVertexBuffer::sVBOActive = false; +bool LLVertexBuffer::sIBOActive = false; +U32 LLVertexBuffer::sAllocatedBytes = 0; +U32 LLVertexBuffer::sVertexCount = 0; +bool LLVertexBuffer::sMapped = false; +bool LLVertexBuffer::sUseStreamDraw = true; +bool LLVertexBuffer::sUseVAO = false; +bool LLVertexBuffer::sPreferStreamDraw = false;  //NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware @@ -420,17 +379,6 @@ void LLVertexBuffer::releaseVAOName(U32 name)  //static -void LLVertexBuffer::seedPools() -{ -	LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX -	sStreamVBOPool.seedPool(); -	sDynamicVBOPool.seedPool(); -	sDynamicCopyVBOPool.seedPool(); -	sStreamIBOPool.seedPool(); -	sDynamicIBOPool.seedPool(); -} - -//static  void LLVertexBuffer::setupClientArrays(U32 data_mask)  {  	if (sLastMask != data_mask) @@ -473,7 +421,7 @@ void LLVertexBuffer::drawArrays(U32 mode, const std::vector<LLVector3>& pos)      }      gGL.end();      gGL.flush(); -		} +}  //static  void LLVertexBuffer::drawElements(U32 mode, const LLVector4a* pos, const LLVector2* tc, S32 num_indices, const U16* indicesp) @@ -704,10 +652,20 @@ void LLVertexBuffer::drawArrays(U32 mode, U32 first, U32 count) const  }  //static -void LLVertexBuffer::initClass(bool use_vbo, bool no_vbo_mapping) +void LLVertexBuffer::initClass(LLWindow* window)  { -    sEnableVBOs = use_vbo; -	sDisableVBOMapping = sEnableVBOs && no_vbo_mapping; +    sEnableVBOs = true; +    sDisableVBOMapping = true; + +#if ENABLE_GL_WORK_QUEUE +    sQueue = new GLWorkQueue(); + +    for (int i = 0; i < THREAD_COUNT; ++i) +    { +        sVBOThread[i] = new LLGLWorkerThread("VBO Worker", sQueue, window); +        sVBOThread[i]->start(); +    } +#endif  }  //static  @@ -743,14 +701,19 @@ void LLVertexBuffer::cleanupClass()  {  	unbind(); -	sStreamIBOPool.cleanup(); -	sDynamicIBOPool.cleanup(); -	sStreamVBOPool.cleanup(); -	sDynamicVBOPool.cleanup(); -	sDynamicCopyVBOPool.cleanup(); - -    llassert(0 == LLVBOPool::sBytesPooled); -    llassert(0 == LLVBOPool::sIndexBytesPooled); +#if ENABLE_GL_WORK_QUEUE +    sQueue->close(); +    for (int i = 0; i < THREAD_COUNT; ++i) +    { +        sVBOThread[i]->shutdown(); +        delete sVBOThread[i]; +        sVBOThread[i] = nullptr; +    } + +    delete sQueue; +    sQueue = nullptr; +#endif +      //llassert(0 == sAllocatedBytes);      //llassert(0 == sAllocatedIndexBytes);  } @@ -781,21 +744,6 @@ S32 LLVertexBuffer::determineUsage(S32 usage)  		ret_usage = GL_STREAM_DRAW;  	} -	if (ret_usage && ret_usage != GL_STREAM_DRAW) -	{ //only stream_draw and dynamic_draw are supported when using VBOs, dynamic draw is the default -		if (ret_usage != GL_DYNAMIC_COPY) -		{ -		    if (sDisableVBOMapping) -		    { //always use stream draw if VBO mapping is disabled -			    ret_usage = GL_STREAM_DRAW; -		    } -		    else -		    { -			    ret_usage = GL_DYNAMIC_DRAW; -		    } -	    } -	} -	  	return ret_usage;  } @@ -848,7 +796,7 @@ S32 LLVertexBuffer::calcOffsets(const U32& typemask, S32* offsets, S32 num_verti  	offsets[TYPE_TEXTURE_INDEX] = offsets[TYPE_VERTEX] + 12; -	return offset+16; +	return offset;  }  //static  @@ -896,74 +844,101 @@ LLVertexBuffer::~LLVertexBuffer()  //---------------------------------------------------------------------------- +// batch glGenBuffers +static GLuint gen_buffer() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +    constexpr U32 pool_size = 4096; + +    thread_local static GLuint sNamePool[pool_size]; +    thread_local static U32 sIndex = 0; + +    if (sIndex == 0) +    { +        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo"); +        sIndex = pool_size; +        glGenBuffers(pool_size, sNamePool); +    } + +    return sNamePool[--sIndex]; +} + +// batch glDeleteBuffers +static void release_buffer(U32 buff) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +#if 0 + +    constexpr U32 pool_size = 4096; + +    thread_local static GLuint sNamePool[pool_size]; +    thread_local static U32 sIndex = 0; + +    if (sIndex == pool_size) +    { +        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("gen ibo"); +        sIndex = 0; +        glDeleteBuffers(pool_size, sNamePool); +    } + +    sNamePool[sIndex++] = buff; +#else +    glDeleteBuffers(1, &buff); +#endif +} +  void LLVertexBuffer::genBuffer(U32 size)  { -	mSize = vbo_block_size(size); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; -	if (mUsage == GL_STREAM_DRAW) -	{ -		mMappedData = sStreamVBOPool.allocate(mGLBuffer, mSize); -	} -	else if (mUsage == GL_DYNAMIC_DRAW) -	{ -		mMappedData = sDynamicVBOPool.allocate(mGLBuffer, mSize); -	} -	else -	{ -		mMappedData = sDynamicCopyVBOPool.allocate(mGLBuffer, mSize); -	} -	 -	 -	sGLCount++; +    mSize = size; +    mMappedData = (U8*) ll_aligned_malloc_16(size); +    mGLBuffer = gen_buffer(); + +    glBindBuffer(GL_ARRAY_BUFFER, mGLBuffer); +    glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage); +    glBindBuffer(GL_ARRAY_BUFFER, 0); + +    sGLCount++;  }  void LLVertexBuffer::genIndices(U32 size)  { -	mIndicesSize = vbo_block_size(size); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + +    mIndicesSize = size; +    mMappedIndexData = (U8*) ll_aligned_malloc_16(size); + +    mGLIndices = gen_buffer(); + +    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mGLIndices); +    glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, nullptr, mUsage); +    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); -	if (mUsage == GL_STREAM_DRAW) -	{ -		mMappedIndexData = sStreamIBOPool.allocate(mGLIndices, mIndicesSize); -	} -	else -	{ -		mMappedIndexData = sDynamicIBOPool.allocate(mGLIndices, mIndicesSize); -	} -	  	sGLCount++;  }  void LLVertexBuffer::releaseBuffer()  { -	if (mUsage == GL_STREAM_DRAW) -	{ -		sStreamVBOPool.release(mGLBuffer, mMappedData, mSize); -	} -	else -	{ -		sDynamicVBOPool.release(mGLBuffer, mMappedData, mSize); -	} -	 -	mGLBuffer = 0; -	mMappedData = NULL; +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +    release_buffer(mGLBuffer); +    mGLBuffer = 0; +    ll_aligned_free_16(mMappedData); +    mMappedData = nullptr; +	  	sGLCount--;  }  void LLVertexBuffer::releaseIndices()  { -	if (mUsage == GL_STREAM_DRAW) -	{ -		sStreamIBOPool.release(mGLIndices, mMappedIndexData, mIndicesSize); -	} -	else -	{ -		sDynamicIBOPool.release(mGLIndices, mMappedIndexData, mIndicesSize); -	} +    LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; +    release_buffer(mGLIndices); +    mGLIndices = 0; + +    ll_aligned_free_16(mMappedIndexData); +    mMappedIndexData = nullptr; -	mGLIndices = 0; -	mMappedIndexData = NULL; -	  	sGLCount--;  } @@ -1183,21 +1158,20 @@ bool LLVertexBuffer::useVBOs() const  //---------------------------------------------------------------------------- -bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count) +// if no gap between region and given range exists, expand region to cover given range and return true +// otherwise return false +bool expand_region(LLVertexBuffer::MappedRegion& region, S32 start, S32 end)  { -	S32 end = index+count; -	S32 region_end = region.mIndex+region.mCount; -	if (end < region.mIndex || -		index > region_end) +	if (end < region.mStart || +		start > region.mEnd)  	{ //gap exists, do not merge  		return false;  	} -	S32 new_end = llmax(end, region_end); -	S32 new_index = llmin(index, region.mIndex); -	region.mIndex = new_index; -	region.mCount = new_end-new_index; +    region.mStart = llmin(region.mStart, start); +    region.mEnd = llmax(region.mEnd, end); +  	return true;  } @@ -1215,34 +1189,34 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran  	{  		LL_ERRS() << "LLVertexBuffer::mapVertexBuffer() called on unallocated buffer." << LL_ENDL;  	} -		 -	if (useVBOs()) -	{ -		if (count == -1) -		{ -			count = mNumVerts-index; -		} -		bool mapped = false; -		//see if range is already mapped + +    if (useVBOs()) +    { +        if (count == -1) +        { +            count = mNumVerts - index; +        } + +        S32 start = mOffsets[type] + sTypeSize[type] * index; +        S32 end = start + sTypeSize[type] * count; + +		bool flagged = false; +		// flag region as mapped  		for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)  		{  			MappedRegion& region = mMappedVertexRegions[i]; -			if (region.mType == type) -			{ -				if (expand_region(region, index, count)) -				{ -					mapped = true; -					break; -				} -			} +            if (expand_region(region, start, end)) +            { +                flagged = true; +                break; +            }  		} -		if (!mapped) +		if (!flagged)  		{ -			//not already mapped, map new region -			MappedRegion region(type, index, count); -			mMappedVertexRegions.push_back(region); +			//didn't expand an existing region, make a new one +            mMappedVertexRegions.push_back({ start, end });  		}  		if (mVertexLocked && map_range) @@ -1299,25 +1273,26 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)  			count = mNumIndices-index;  		} -		bool mapped = false; -		//see if range is already mapped -		for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) -		{ -			MappedRegion& region = mMappedIndexRegions[i]; -			if (expand_region(region, index, count)) -			{ -				mapped = true; -				break; -			} -		} +        S32 start = sizeof(U16) * index; +        S32 end = start + sizeof(U16) * count; -		if (!mapped) -		{ -			//not already mapped, map new region -			MappedRegion region(TYPE_INDEX, index, count); -			mMappedIndexRegions.push_back(region); -		} -		 +        bool flagged = false; +        // flag region as mapped +        for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) +        { +            MappedRegion& region = mMappedIndexRegions[i]; +            if (expand_region(region, start, end)) +            { +                flagged = true; +                break; +            } +        } + +        if (!flagged) +        { +            //didn't expand an existing region, make a new one +            mMappedIndexRegions.push_back({ start, end }); +        }  		if (mIndexLocked && map_range)  		{ @@ -1360,6 +1335,27 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)      return mMappedIndexData + sizeof(U16)*index;  } +static void flush_vbo(GLenum target, S32 start, S32 end, void* data) +{ +    if (end != 0) +    { +        LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); +        LL_PROFILE_ZONE_NUM(start); +        LL_PROFILE_ZONE_NUM(end); +        LL_PROFILE_ZONE_NUM(end-start); + +        constexpr S32 block_size = 65536; + +        for (S32 i = start; i < end; i += block_size) +        { +            LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); +            LL_PROFILE_GPU_ZONE("glBufferSubData"); +            S32 tend = llmin(i + block_size, end); +            glBufferSubData(target, i, tend - i, (U8*) data + (i-start)); +        } +    } +} +  void LLVertexBuffer::unmapBuffer()  {  	if (!useVBOs()) @@ -1377,37 +1373,31 @@ void LLVertexBuffer::unmapBuffer()  		if (!mMappedVertexRegions.empty())  		{ -			stop_glerror(); +            S32 start = 0; +            S32 end = 0; +  			for (U32 i = 0; i < mMappedVertexRegions.size(); ++i)  			{  				const MappedRegion& region = mMappedVertexRegions[i]; -				S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0; -				S32 length = sTypeSize[region.mType]*region.mCount; -				if (mSize >= length + offset) -				{ -					glBufferSubData(GL_ARRAY_BUFFER, offset, length, (U8*)mMappedData + offset); -				} -				else -				{ -					GLint size = 0; -					glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &size); -					LL_WARNS() << "Attempted to map regions to a buffer that is too small, "  -						<< "mapped size: " << mSize -						<< ", gl buffer size: " << size -						<< ", length: " << length -						<< ", offset: " << offset -						<< LL_ENDL; -				} -				stop_glerror(); +                if (region.mStart == end + 1) +                { +                    end = region.mEnd; +                } +                else +                { +                    flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); +                    start = region.mStart; +                    end = region.mEnd; +                }  			} +            flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); +  			mMappedVertexRegions.clear();  		}  		else  		{ -			stop_glerror(); -			glBufferSubData(GL_ARRAY_BUFFER, 0, getSize(), (U8*) mMappedData); -			stop_glerror(); +            llassert(false); // this shouldn't happen -- a buffer must always be explicitly mapped  		}  		mVertexLocked = false; @@ -1421,36 +1411,31 @@ void LLVertexBuffer::unmapBuffer()  		if (!mMappedIndexRegions.empty())  		{ -			for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) -			{ -				const MappedRegion& region = mMappedIndexRegions[i]; -				S32 offset = region.mIndex >= 0 ? sizeof(U16)*region.mIndex : 0; -				S32 length = sizeof(U16)*region.mCount; -				if (mIndicesSize >= length + offset) -				{ -					glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, offset, length, (U8*) mMappedIndexData+offset); -				} -				else -				{ -					GLint size = 0; -					glGetBufferParameteriv(GL_ELEMENT_ARRAY_BUFFER, GL_BUFFER_SIZE, &size); -					LL_WARNS() << "Attempted to map regions to a buffer that is too small, "  -						<< "mapped size: " << mIndicesSize -						<< ", gl buffer size: " << size -						<< ", length: " << length -						<< ", offset: " << offset -						<< LL_ENDL; -				} -				stop_glerror(); -			} +            S32 start = 0; +            S32 end = 0; + +            for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) +            { +                const MappedRegion& region = mMappedIndexRegions[i]; +                if (region.mStart == end + 1) +                { +                    end = region.mEnd; +                } +                else +                { +                    flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); +                    start = region.mStart; +                    end = region.mEnd; +                } +            } + +            flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start);  			mMappedIndexRegions.clear();  		}  		else  		{ -			stop_glerror(); -			glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, getIndicesSize(), (U8*) mMappedIndexData); -			stop_glerror(); +            llassert(false); // this shouldn't happen -- a buffer must always be explicitly mapped  		}  		mIndexLocked = false; @@ -1640,11 +1625,53 @@ bool LLVertexBuffer::bindGLIndicesFast()      return false;  } -void LLVertexBuffer::flush() +void LLVertexBuffer::flush(bool discard)  {  	if (useVBOs())  	{ -		unmapBuffer(); +        if (discard) +        { // discard existing VBO data if the buffer must be updated +             +            if (!mMappedVertexRegions.empty()) +            { +                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard vbo"); +                LL_PROFILE_ZONE_NUM(mSize); +                release_buffer(mGLBuffer); +                mGLBuffer = gen_buffer(); +                bindGLBuffer(); +                { +                    LL_PROFILE_GPU_ZONE("glBufferData"); +                    glBufferData(GL_ARRAY_BUFFER, mSize, nullptr, mUsage); + +                    for (int i = 0; i < mSize; i += 65536) +                    { +                        LL_PROFILE_GPU_ZONE("glBufferSubData"); +                        S32 end = llmin(i + 65536, mSize); +                        S32 count = end - i; +                        glBufferSubData(GL_ARRAY_BUFFER, i, count, mMappedData + i); +                    } +                } +                mMappedVertexRegions.clear(); +            } +            if (!mMappedIndexRegions.empty()) +            { +                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("flush discard ibo"); +                LL_PROFILE_ZONE_NUM(mIndicesSize); +                release_buffer(mGLIndices); +                mGLIndices = gen_buffer(); +                bindGLIndices(); +                { +                    LL_PROFILE_GPU_ZONE("glBufferData (ibo)"); +                    glBufferData(GL_ELEMENT_ARRAY_BUFFER, mIndicesSize, mMappedIndexData, mUsage); +                } +                mMappedIndexRegions.clear(); +            } +        } +        else +        { +            unmapBuffer(); +        } +  	}  } @@ -2045,10 +2072,4 @@ void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)      }  	} -LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count) -: mType(type), mIndex(index), mCount(count) -{  -	mEnd = mIndex+mCount;	 -}	 - diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index bb7460fb2a..74b951884d 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -51,66 +51,15 @@  //============================================================================ -// gl name pools for dynamic and streaming buffers -class LLVBOPool -{ -public: -	static U32 sBytesPooled; -	static U32 sIndexBytesPooled; -	 -	LLVBOPool(U32 vboUsage, U32 vboType); -		 -	const U32 mUsage; -	const U32 mType; - -	//size MUST be a power of 2 -	U8* allocate(U32& name, U32 size, bool for_seed = false); -	 -	//size MUST be the size provided to allocate that returned the given name -	void release(U32 name, U8* buffer, U32 size); -	 -	//batch allocate buffers to be provided to the application on demand -	void seedPool(); - -	//destroy all records in mFreeList -	void cleanup(); - -	U32 genBuffer(); -	void deleteBuffer(U32 name); - -	class Record -	{ -	public: -		U32 mGLName; -		U8* mClientData; -	}; - -	typedef std::list<Record> record_list_t; -	std::vector<record_list_t> mFreeList; -	std::vector<U32> mMissCount; -    bool mMissCountDirty;   // flag any changes to mFreeList or mMissCount - -	//used to avoid calling glGenBuffers for every VBO creation -	static U32 sNamePool[1024]; -	static U32 sNameIdx; -}; - - -//============================================================================  // base class   class LLPrivateMemoryPool;  class LLVertexBuffer : public LLRefCount  {  public: -	class MappedRegion +	struct MappedRegion  	{ -	public: -		S32 mType; -		S32 mIndex; -		S32 mCount; -		S32 mEnd; -		 -		MappedRegion(S32 type, S32 index, S32 count); +        S32 mStart; +        S32 mEnd;  	};  	LLVertexBuffer(const LLVertexBuffer& rhs) @@ -125,12 +74,6 @@ public:  		return *this;  	} -	static LLVBOPool sStreamVBOPool; -	static LLVBOPool sDynamicVBOPool; -	static LLVBOPool sDynamicCopyVBOPool; -	static LLVBOPool sStreamIBOPool; -	static LLVBOPool sDynamicIBOPool; -  	static std::list<U32> sAvailableVAOName;  	static U32 sCurVAOName; @@ -138,12 +81,10 @@ public:  	static bool sUseVAO;  	static bool	sPreferStreamDraw; -	static void seedPools(); -  	static U32 getVAOName();  	static void releaseVAOName(U32 name); -	static void initClass(bool use_vbo, bool no_vbo_mapping); +	static void initClass(LLWindow* window);  	static void cleanupClass();  	static void setupClientArrays(U32 data_mask);  	static void drawArrays(U32 mode, const std::vector<LLVector3>& pos); @@ -240,7 +181,7 @@ public:  	virtual void	setBuffer(U32 data_mask); 	// calls  setupVertexBuffer() if data_mask is not 0      void	setBufferFast(U32 data_mask); 	// calls setupVertexBufferFast(), assumes data_mask is not 0 among other assumptions -	void flush(); //flush pending data to GL memory +    void flush(bool discard = false); //flush pending data to GL memory, if discard is true, discard previous VBO  	// allocate buffer  	bool	allocateBuffer(S32 nverts, S32 nindices, bool create);  	virtual bool resizeBuffer(S32 newnverts, S32 newnindices); diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index 01fca47184..c6d2b476db 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -710,12 +710,6 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)  			if (!for_snapshot)  			{ -				if (gFrameCount > 1) -				{ //for some reason, ATI 4800 series will error out if you  -				  //try to generate a shadow before the first frame is through -					gPipeline.generateSunShadow(*LLViewerCamera::getInstance()); -				} -  				LLVertexBuffer::unbind();  				LLGLState::checkStates(); @@ -936,8 +930,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)  			else  			{  				gPipeline.renderGeom(*LLViewerCamera::getInstance(), TRUE); -			} -			 +			}			  			gGL.setColorMask(true, true);  			//store this frame's modelview matrix for use @@ -967,6 +960,14 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)          LLRenderTarget &rt = (gPipeline.sRenderDeferred ? gPipeline.mRT->deferredScreen : gPipeline.mRT->screen);          rt.flush(); + +        if (gFrameCount > 1 && !for_snapshot) +        { //for some reason, ATI 4800 series will error out if you  +          //try to generate a shadow before the first frame is through +            gPipeline.generateSunShadow(*LLViewerCamera::getInstance()); +        } + +          if (LLPipeline::sRenderDeferred)          {  			gPipeline.renderDeferredLighting(); diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp index bc4f00bd3f..5848cbfd9d 100644 --- a/indra/newview/llviewerwindow.cpp +++ b/indra/newview/llviewerwindow.cpp @@ -658,12 +658,6 @@ public:  			} -			addText(xpos, ypos, llformat("%d MB Index Data (%d MB Pooled, %d KIndices)", LLVertexBuffer::sAllocatedIndexBytes/(1024*1024), LLVBOPool::sIndexBytesPooled/(1024*1024), LLVertexBuffer::sIndexCount/1024)); -			ypos += y_inc; - -			addText(xpos, ypos, llformat("%d MB Vertex Data (%d MB Pooled, %d KVerts)", LLVertexBuffer::sAllocatedBytes/(1024*1024), LLVBOPool::sBytesPooled/(1024*1024), LLVertexBuffer::sVertexCount/1024)); -			ypos += y_inc; -  			addText(xpos, ypos, llformat("%d Vertex Buffers", LLVertexBuffer::sGLCount));  			ypos += y_inc; @@ -1974,7 +1968,7 @@ LLViewerWindow::LLViewerWindow(const Params& p)  	LL_DEBUGS("Window") << "Loading feature tables." << LL_ENDL;  	// Initialize OpenGL Renderer -	LLVertexBuffer::initClass(gSavedSettings.getBOOL("RenderVBOEnable"), gSavedSettings.getBOOL("RenderVBOMappingDisable")); +	LLVertexBuffer::initClass(mWindow);  	LL_INFOS("RenderInit") << "LLVertexBuffer initialization done." << LL_ENDL ;  	gGL.init(true); diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 5e585852f4..d56b31a372 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -2338,6 +2338,7 @@ static LLTrace::BlockTimerStatHandle FTM_CULL("Object Culling");  void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result)  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; //LL_RECORD_BLOCK_TIME(FTM_CULL); +    LL_PROFILE_GPU_ZONE("updateCull"); // should always be zero GPU time, but drop a timer to flush stuff out      bool water_clip = !sRenderTransparentWater; @@ -2649,10 +2650,6 @@ void LLPipeline::updateGL()  			LLGLUpdate::sGLQ.pop_front();  		}  	} - -	{ //seed VBO Pools -		LLVertexBuffer::seedPools(); -	}  }  void LLPipeline::clearRebuildGroups() @@ -3229,6 +3226,7 @@ void LLPipeline::markRebuild(LLDrawable *drawablep, LLDrawable::EDrawableFlags f  void LLPipeline::stateSort(LLCamera& camera, LLCullResult &result)  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE; +    LL_PROFILE_GPU_ZONE("stateSort");  	if (hasAnyRenderType(LLPipeline::RENDER_TYPE_AVATAR,  					  LLPipeline::RENDER_TYPE_CONTROL_AV, @@ -3837,6 +3835,7 @@ void LLPipeline::postSort(LLCamera &camera)      // flush particle VB      if (LLVOPartGroup::sVB)      { +        LL_PROFILE_GPU_ZONE("flush particle vb");          LLVOPartGroup::sVB->flush();      }      else @@ -3860,9 +3859,12 @@ void LLPipeline::postSort(LLCamera &camera)      }*/      // pack vertex buffers for groups that chose to delay their updates -    for (LLSpatialGroup::sg_vector_t::iterator iter = mMeshDirtyGroup.begin(); iter != mMeshDirtyGroup.end(); ++iter)      { -        (*iter)->rebuildMesh(); +        LL_PROFILE_GPU_ZONE("rebuildMesh"); +        for (LLSpatialGroup::sg_vector_t::iterator iter = mMeshDirtyGroup.begin(); iter != mMeshDirtyGroup.end(); ++iter) +        { +            (*iter)->rebuildMesh(); +        }      }      /*if (use_transform_feedback) @@ -7259,8 +7261,6 @@ void LLPipeline::doResetVertexBuffers(bool forced)  	LLVOPartGroup::destroyGL();      gGL.resetVertexBuffer(); -	SUBSYSTEM_CLEANUP(LLVertexBuffer); -	  	if (LLVertexBuffer::sGLCount != 0)  	{  		LL_WARNS() << "VBO wipe failed -- " << LLVertexBuffer::sGLCount << " buffers remaining." << LL_ENDL; @@ -7280,7 +7280,6 @@ void LLPipeline::doResetVertexBuffers(bool forced)  	sNoAlpha = gSavedSettings.getBOOL("RenderNoAlpha");  	LLPipeline::sTextureBindTest = gSavedSettings.getBOOL("RenderDebugTextureBind"); -	LLVertexBuffer::initClass(LLVertexBuffer::sEnableVBOs, LLVertexBuffer::sDisableVBOMapping);      gGL.initVertexBuffer();      mDeferredVB = new LLVertexBuffer(DEFERRED_VB_MASK, 0);  | 
