From cde5d29faf84c5cb7fc1b0d0ff6d03f3b7354c8f Mon Sep 17 00:00:00 2001 From: RunitaiLinden Date: Tue, 10 Sep 2024 18:27:45 -0500 Subject: Profile guided optimizations --- indra/llcommon/llstrider.h | 7 ++++ indra/llrender/llimagegl.cpp | 52 +++++++++++++++++++++----- indra/llrender/llvertexbuffer.cpp | 59 +++++++++++++++++++++++++++++- indra/llrender/llvertexbuffer.h | 12 ++++++ indra/newview/llface.cpp | 77 +++++++++++++++++++++++++-------------- indra/newview/llskinningutil.cpp | 4 +- indra/newview/llviewertexture.cpp | 45 ----------------------- indra/newview/llvoavatar.cpp | 16 +++++--- indra/newview/llvovolume.cpp | 23 ++---------- 9 files changed, 182 insertions(+), 113 deletions(-) (limited to 'indra') diff --git a/indra/llcommon/llstrider.h b/indra/llcommon/llstrider.h index 06cf8d3480..d756aca62b 100644 --- a/indra/llcommon/llstrider.h +++ b/indra/llcommon/llstrider.h @@ -41,6 +41,13 @@ public: LLStrider(Object* first) { mObjectp = first; mSkip = sizeof(Object); } ~LLStrider() { } + const LLStrider& operator=(const LLStrider& rhs) + { + mBytep = rhs.mBytep; + mSkip = rhs.mSkip; + return *this; + } + const LLStrider& operator = (Object *first) { mObjectp = first; return *this;} void setStride (S32 skipBytes) { mSkip = (skipBytes ? skipBytes : sizeof(Object));} diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp index 68c20048ec..67b4ada62f 100644 --- a/indra/llrender/llimagegl.cpp +++ b/indra/llrender/llimagegl.cpp @@ -1045,15 +1045,47 @@ void sub_image_lines(U32 target, S32 miplevel, S32 x_offset, S32 y_offset, S32 w { LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE; + LL_PROFILE_ZONE_NUM(width); + LL_PROFILE_ZONE_NUM(height); + U32 components = LLImageGL::dataFormatComponents(pixformat); U32 type_width = type_width_from_pixtype(pixtype); const U32 line_width = data_width * components * type_width; const U32 y_offset_end = y_offset + height; - for (U32 y_pos = y_offset; y_pos < y_offset_end; ++y_pos) + + if (width == data_width && height % 32 == 0) + { + LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("subimage - batched lines"); + + // full width, batch multiple lines at a time + // set batch size based on width + U32 batch_size = 32; + + if (width > 1024) + { + batch_size = 8; + } + else if (width > 512) + { + batch_size = 16; + } + + // full width texture, do 32 lines at a time + for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += batch_size) + { + glTexSubImage2D(target, miplevel, x_offset, y_pos, width, batch_size, pixformat, pixtype, src); + src += line_width * batch_size; + } + } + else { - glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src); - src += line_width; + // partial width or strange height + for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += 1) + { + glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src); + src += line_width; + } } } @@ -2139,6 +2171,8 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h) return ; } + LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE; + U32 length = w * h; U32 alphatotal = 0; @@ -2150,15 +2184,15 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h) // this will mid-skew the data (and thus increase the chances of not // being used as a mask) from high-frequency alpha maps which // suffer the worst from aliasing when used as alpha masks. - if (w >= 2 && h >= 2) + if (w >= 4 && h >= 4) { - llassert(w%2 == 0); - llassert(h%2 == 0); + llassert(w%4 == 0); + llassert(h%4 == 0); const GLubyte* rowstart = ((const GLubyte*) data_in) + mAlphaOffset; - for (U32 y = 0; y < h; y+=2) + for (U32 y = 0; y < h; y+=4) { const GLubyte* current = rowstart; - for (U32 x = 0; x < w; x+=2) + for (U32 x = 0; x < w; x+=4) { const U32 s1 = current[0]; alphatotal += s1; @@ -2182,7 +2216,7 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h) } - rowstart += 2 * w * mAlphaStride; + rowstart += 4 * w * mAlphaStride; } length *= 2; // we sampled everything twice, essentially } diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 156e300853..c1f239fc43 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -954,6 +954,25 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask) } } +// list of mapped buffers +// NOTE: must not be LLPointer to avoid breaking non-ref-counted LLVertexBuffer instances +static std::vector sMappedBuffers; + +//static +void LLVertexBuffer::flushBuffers() +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + // must only be called from main thread + llassert(LLCoros::on_main_thread_main_coro()); + for (auto& buffer : sMappedBuffers) + { + buffer->_unmapBuffer(); + buffer->mMapped = false; + } + + sMappedBuffers.resize(0); +} + //static U32 LLVertexBuffer::calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices) { @@ -997,6 +1016,12 @@ U32 LLVertexBuffer::calcVertexSize(const U32& typemask) //virtual LLVertexBuffer::~LLVertexBuffer() { + if (mMapped) + { // is on the mapped buffer list but doesn't need to be flushed + mMapped = false; + unmapBuffer(); + } + destroyGLBuffer(); destroyGLIndices(); @@ -1198,6 +1223,7 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, U32 start, U32 end) U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 index, S32 count) { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + _mapBuffer(); if (count == -1) { @@ -1233,6 +1259,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count) { LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX; + _mapBuffer(); if (count == -1) { @@ -1289,11 +1316,11 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8 LL_PROFILE_ZONE_NUM(end); LL_PROFILE_ZONE_NUM(end-start); - constexpr U32 block_size = 8192; + constexpr U32 block_size = 65536; for (U32 i = start; i <= end; i += block_size) { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); //LL_PROFILE_GPU_ZONE("glBufferSubData"); U32 tend = llmin(i + block_size, end); U32 size = tend - i + 1; @@ -1304,8 +1331,29 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8 } void LLVertexBuffer::unmapBuffer() +{ + flushBuffers(); +} + +void LLVertexBuffer::_mapBuffer() +{ + // must only be called from main thread + llassert(LLCoros::on_main_thread_main_coro()); + if (!mMapped) + { + mMapped = true; + sMappedBuffers.push_back(this); + } +} + +void LLVertexBuffer::_unmapBuffer() { STOP_GLERROR; + if (!mMapped) + { + return; + } + struct SortMappedRegion { bool operator()(const MappedRegion& lhs, const MappedRegion& rhs) @@ -1549,6 +1597,13 @@ void LLVertexBuffer::setBuffer() return; } #endif + + if (mMapped) + { + LL_WARNS() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL; + _unmapBuffer(); + } + // no data may be pending llassert(mMappedVertexRegions.empty()); llassert(mMappedIndexRegions.empty()); diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index 2a4affdc60..9fe468f89e 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -120,6 +120,9 @@ public: // indexed by the following enum static U32 calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices); + // flush any pending mapped buffers + static void flushBuffers(); + //WARNING -- when updating these enums you MUST // 1 - update LLVertexBuffer::sTypeSize // 2 - update LLVertexBuffer::vb_type_name @@ -190,6 +193,8 @@ public: // map for data access (see also getFooStrider below) U8* mapVertexBuffer(AttributeType type, U32 index, S32 count = -1); U8* mapIndexBuffer(U32 index, S32 count = -1); + + // synonym for flushBuffers void unmapBuffer(); // set for rendering @@ -312,6 +317,13 @@ private: bool allocateBuffer(S32 nverts, S32 nindices, bool create) { return allocateBuffer(nverts, nindices); } + // actually unmap buffer + void _unmapBuffer(); + + // add to set of mapped buffers + void _mapBuffer(); + bool mMapped = false; + public: static U64 getBytesAllocated(); diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index ccfef09b09..a1ec75e34b 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -1741,7 +1741,7 @@ bool LLFace::getGeometryVolume(const LLVolume& volume, { //bump mapped or has material, just do the whole expensive loop LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("getGeometryVolume - texgen default"); - std::vector bump_tc; + LLStrider bump_tc; if (mat && !mat->getNormalID().isNull()) { //writing out normal and specular texture coordinates, not bump offsets @@ -1803,49 +1803,70 @@ bool LLFace::getGeometryVolume(const LLVolume& volume, } const bool do_xform = (xforms & xform_channel) != XFORM_NONE; + // hold onto strider to front of TC array for use later + bump_tc = dst; - for (S32 i = 0; i < num_vertices; i++) { - LLVector2 tc(vf.mTexCoords[i]); - - LLVector4a& norm = vf.mNormals[i]; - - LLVector4a& center = *(vf.mCenter); - - if (texgen != LLTextureEntry::TEX_GEN_DEFAULT) + // NOTE: split TEX_GEN_PLANAR implementation to reduce branchiness of inner loop + // These are per-vertex operations and every little bit counts + if (texgen == LLTextureEntry::TEX_GEN_PLANAR) { - LLVector4a vec = vf.mPositions[i]; + LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - planar"); + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + LLVector4a& norm = vf.mNormals[i]; + LLVector4a& center = *(vf.mCenter); + LLVector4a vec = vf.mPositions[i]; - vec.mul(scalea); + vec.mul(scalea); - if (texgen == LLTextureEntry::TEX_GEN_PLANAR) - { planarProjection(tc, norm, center, vec); - } - } - if (tex_mode && mTextureMatrix) - { - LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); - tmp = tmp * *mTextureMatrix; - tc.mV[0] = tmp.mV[0]; - tc.mV[1] = tmp.mV[1]; + if (tex_mode && mTextureMatrix) + { + LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); + tmp = tmp * *mTextureMatrix; + tc.mV[0] = tmp.mV[0]; + tc.mV[1] = tmp.mV[1]; + } + else if (do_xform) + { + xform(tc, cos_ang, sin_ang, os, ot, ms, mt); + } + + *dst++ = tc; + } } - else if (do_xform) + else { - xform(tc, cos_ang, sin_ang, os, ot, ms, mt); - } + LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - transform"); - *dst++ = tc; - if (do_bump) - { - bump_tc.push_back(tc); + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + + if (tex_mode && mTextureMatrix) + { + LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); + tmp = tmp * *mTextureMatrix; + tc.mV[0] = tmp.mV[0]; + tc.mV[1] = tmp.mV[1]; + } + else if (do_xform) + { + xform(tc, cos_ang, sin_ang, os, ot, ms, mt); + } + + *dst++ = tc; + } } } } if ((!mat && !gltf_mat) && do_bump) { + LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - do bump"); mVertexBuffer->getTexCoord1Strider(tex_coords1, mGeomIndex, mGeomCount); mVObjp->getVolume()->genTangents(face_index); diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp index 9b4ed4c946..1c92e06700 100644 --- a/indra/newview/llskinningutil.cpp +++ b/indra/newview/llskinningutil.cpp @@ -315,11 +315,9 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar) } } -static LLTrace::BlockTimerStatHandle FTM_FACE_RIGGING_INFO("Face Rigging Info"); - void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face) { - LL_RECORD_BLOCK_TIME(FTM_FACE_RIGGING_INFO); + LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR; if (vol_face.mJointRiggingInfoTab.needsUpdate()) { diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp index 681d91c945..9e1cb84bd1 100644 --- a/indra/newview/llviewertexture.cpp +++ b/indra/newview/llviewertexture.cpp @@ -1361,51 +1361,6 @@ void LLViewerFetchedTexture::addToCreateTexture() } else { - LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE; -#if 1 - // - //if mRequestedDiscardLevel > mDesiredDiscardLevel, we assume the required image res keep going up, - //so do not scale down the over qualified image. - //Note: scaling down image is expensensive. Do it only when very necessary. - // - if(mRequestedDiscardLevel <= mDesiredDiscardLevel && !mForceToSaveRawImage) - { - U32 w = mFullWidth >> mRawDiscardLevel; - U32 h = mFullHeight >> mRawDiscardLevel; - - //if big image, do not load extra data - //scale it down to size >= LLViewerTexture::sMinLargeImageSize - if(w * h > LLViewerTexture::sMinLargeImageSize) - { - S32 d_level = llmin(mRequestedDiscardLevel, (S32)mDesiredDiscardLevel) - mRawDiscardLevel; - - if(d_level > 0) - { - S32 i = 0; - while((d_level > 0) && ((w >> i) * (h >> i) > LLViewerTexture::sMinLargeImageSize)) - { - i++; - d_level--; - } - if(i > 0) - { - mRawDiscardLevel += i; - if(mRawDiscardLevel >= getDiscardLevel() && getDiscardLevel() > 0) - { - mNeedsCreateTexture = false; - destroyRawImage(); - return; - } - - { - //make a duplicate in case somebody else is using this raw image - mRawImage = mRawImage->scaled(w >> i, h >> i); - } - } - } - } - } -#endif scheduleCreateTexture(); } return; diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index b04bb99b97..26cb2a573a 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -10684,14 +10684,18 @@ void LLVOAvatar::updateRiggingInfo() std::map curr_rigging_info_key; - // Get current rigging info key - for (LLVOVolume* vol : volumes) { - if (vol->isMesh() && vol->getVolume()) + LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR("update rig info - get key") + + // Get current rigging info key + for (LLVOVolume* vol : volumes) { - const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID(); - S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD); - curr_rigging_info_key[mesh_id] = max_lod; + if (vol->isMesh() && vol->getVolume()) + { + const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID(); + S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD); + curr_rigging_info_key[mesh_id] = max_lod; + } } } diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 4b63354893..4f48a070e3 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -6028,8 +6028,8 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) group->mBuilt = 1.f; - const U32 MAX_BUFFER_COUNT = 4096; - LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT]; + static std::vector locked_buffer; + locked_buffer.resize(0); U32 buffer_count = 0; @@ -6074,8 +6074,6 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) group->dirtyGeom(); gPipeline.markRebuild(group); } - - buff->unmapBuffer(); } } } @@ -6091,17 +6089,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { LL_PROFILE_ZONE_NAMED("rebuildMesh - flush"); - for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter) - { - (*iter)->unmapBuffer(); - } - - // don't forget alpha - if(group != NULL && - !group->mVertexBuffer.isNull()) - { - group->mVertexBuffer->unmapBuffer(); - } + LLVertexBuffer::flushBuffers(); } group->clearState(LLSpatialGroup::MESH_DIRTY | LLSpatialGroup::NEW_DRAWINFO); @@ -6783,11 +6771,6 @@ U32 LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace ++face_iter; } - - if (buffer) - { - buffer->unmapBuffer(); - } } group->mBufferMap[mask].clear(); -- cgit v1.2.3