summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcosmic-linden <111533034+cosmic-linden@users.noreply.github.com>2024-09-10 17:38:32 -0700
committerGitHub <noreply@github.com>2024-09-10 17:38:32 -0700
commitf90712b9837957f2a0d11386c6bf48e9a48ff745 (patch)
treedf1dbaf2d2024bed5cd4dd0cd8a80d1ce3f71e2c
parent13e74a32871c7c8f7c7556c6fc08150fc6f27876 (diff)
parentcde5d29faf84c5cb7fc1b0d0ff6d03f3b7354c8f (diff)
Merge pull request #2544 from secondlife/davep-profile-session-9/10
Profile guided optimizations
-rw-r--r--indra/llcommon/llstrider.h7
-rw-r--r--indra/llrender/llimagegl.cpp52
-rw-r--r--indra/llrender/llvertexbuffer.cpp59
-rw-r--r--indra/llrender/llvertexbuffer.h12
-rw-r--r--indra/newview/llface.cpp77
-rw-r--r--indra/newview/llskinningutil.cpp4
-rw-r--r--indra/newview/llviewertexture.cpp45
-rw-r--r--indra/newview/llvoavatar.cpp16
-rw-r--r--indra/newview/llvovolume.cpp23
9 files changed, 182 insertions, 113 deletions
diff --git a/indra/llcommon/llstrider.h b/indra/llcommon/llstrider.h
index 06cf8d3480..d756aca62b 100644
--- a/indra/llcommon/llstrider.h
+++ b/indra/llcommon/llstrider.h
@@ -41,6 +41,13 @@ public:
LLStrider(Object* first) { mObjectp = first; mSkip = sizeof(Object); }
~LLStrider() { }
+ const LLStrider<Object>& operator=(const LLStrider<Object>& rhs)
+ {
+ mBytep = rhs.mBytep;
+ mSkip = rhs.mSkip;
+ return *this;
+ }
+
const LLStrider<Object>& operator = (Object *first) { mObjectp = first; return *this;}
void setStride (S32 skipBytes) { mSkip = (skipBytes ? skipBytes : sizeof(Object));}
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 68c20048ec..67b4ada62f 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -1045,15 +1045,47 @@ void sub_image_lines(U32 target, S32 miplevel, S32 x_offset, S32 y_offset, S32 w
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+ LL_PROFILE_ZONE_NUM(width);
+ LL_PROFILE_ZONE_NUM(height);
+
U32 components = LLImageGL::dataFormatComponents(pixformat);
U32 type_width = type_width_from_pixtype(pixtype);
const U32 line_width = data_width * components * type_width;
const U32 y_offset_end = y_offset + height;
- for (U32 y_pos = y_offset; y_pos < y_offset_end; ++y_pos)
+
+ if (width == data_width && height % 32 == 0)
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("subimage - batched lines");
+
+ // full width, batch multiple lines at a time
+ // set batch size based on width
+ U32 batch_size = 32;
+
+ if (width > 1024)
+ {
+ batch_size = 8;
+ }
+ else if (width > 512)
+ {
+ batch_size = 16;
+ }
+
+ // full width texture, do 32 lines at a time
+ for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += batch_size)
+ {
+ glTexSubImage2D(target, miplevel, x_offset, y_pos, width, batch_size, pixformat, pixtype, src);
+ src += line_width * batch_size;
+ }
+ }
+ else
{
- glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src);
- src += line_width;
+ // partial width or strange height
+ for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += 1)
+ {
+ glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src);
+ src += line_width;
+ }
}
}
@@ -2139,6 +2171,8 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
return ;
}
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+
U32 length = w * h;
U32 alphatotal = 0;
@@ -2150,15 +2184,15 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
// this will mid-skew the data (and thus increase the chances of not
// being used as a mask) from high-frequency alpha maps which
// suffer the worst from aliasing when used as alpha masks.
- if (w >= 2 && h >= 2)
+ if (w >= 4 && h >= 4)
{
- llassert(w%2 == 0);
- llassert(h%2 == 0);
+ llassert(w%4 == 0);
+ llassert(h%4 == 0);
const GLubyte* rowstart = ((const GLubyte*) data_in) + mAlphaOffset;
- for (U32 y = 0; y < h; y+=2)
+ for (U32 y = 0; y < h; y+=4)
{
const GLubyte* current = rowstart;
- for (U32 x = 0; x < w; x+=2)
+ for (U32 x = 0; x < w; x+=4)
{
const U32 s1 = current[0];
alphatotal += s1;
@@ -2182,7 +2216,7 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
}
- rowstart += 2 * w * mAlphaStride;
+ rowstart += 4 * w * mAlphaStride;
}
length *= 2; // we sampled everything twice, essentially
}
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 156e300853..c1f239fc43 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -954,6 +954,25 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask)
}
}
+// list of mapped buffers
+// NOTE: must not be LLPointer<LLVertexBuffer> to avoid breaking non-ref-counted LLVertexBuffer instances
+static std::vector<LLVertexBuffer*> sMappedBuffers;
+
+//static
+void LLVertexBuffer::flushBuffers()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ // must only be called from main thread
+ llassert(LLCoros::on_main_thread_main_coro());
+ for (auto& buffer : sMappedBuffers)
+ {
+ buffer->_unmapBuffer();
+ buffer->mMapped = false;
+ }
+
+ sMappedBuffers.resize(0);
+}
+
//static
U32 LLVertexBuffer::calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices)
{
@@ -997,6 +1016,12 @@ U32 LLVertexBuffer::calcVertexSize(const U32& typemask)
//virtual
LLVertexBuffer::~LLVertexBuffer()
{
+ if (mMapped)
+ { // is on the mapped buffer list but doesn't need to be flushed
+ mMapped = false;
+ unmapBuffer();
+ }
+
destroyGLBuffer();
destroyGLIndices();
@@ -1198,6 +1223,7 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, U32 start, U32 end)
U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 index, S32 count)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ _mapBuffer();
if (count == -1)
{
@@ -1233,6 +1259,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ _mapBuffer();
if (count == -1)
{
@@ -1289,11 +1316,11 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
LL_PROFILE_ZONE_NUM(end);
LL_PROFILE_ZONE_NUM(end-start);
- constexpr U32 block_size = 8192;
+ constexpr U32 block_size = 65536;
for (U32 i = start; i <= end; i += block_size)
{
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+ //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
//LL_PROFILE_GPU_ZONE("glBufferSubData");
U32 tend = llmin(i + block_size, end);
U32 size = tend - i + 1;
@@ -1305,7 +1332,28 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
void LLVertexBuffer::unmapBuffer()
{
+ flushBuffers();
+}
+
+void LLVertexBuffer::_mapBuffer()
+{
+ // must only be called from main thread
+ llassert(LLCoros::on_main_thread_main_coro());
+ if (!mMapped)
+ {
+ mMapped = true;
+ sMappedBuffers.push_back(this);
+ }
+}
+
+void LLVertexBuffer::_unmapBuffer()
+{
STOP_GLERROR;
+ if (!mMapped)
+ {
+ return;
+ }
+
struct SortMappedRegion
{
bool operator()(const MappedRegion& lhs, const MappedRegion& rhs)
@@ -1549,6 +1597,13 @@ void LLVertexBuffer::setBuffer()
return;
}
#endif
+
+ if (mMapped)
+ {
+ LL_WARNS() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL;
+ _unmapBuffer();
+ }
+
// no data may be pending
llassert(mMappedVertexRegions.empty());
llassert(mMappedIndexRegions.empty());
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 2a4affdc60..9fe468f89e 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -120,6 +120,9 @@ public:
// indexed by the following enum
static U32 calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices);
+ // flush any pending mapped buffers
+ static void flushBuffers();
+
//WARNING -- when updating these enums you MUST
// 1 - update LLVertexBuffer::sTypeSize
// 2 - update LLVertexBuffer::vb_type_name
@@ -190,6 +193,8 @@ public:
// map for data access (see also getFooStrider below)
U8* mapVertexBuffer(AttributeType type, U32 index, S32 count = -1);
U8* mapIndexBuffer(U32 index, S32 count = -1);
+
+ // synonym for flushBuffers
void unmapBuffer();
// set for rendering
@@ -312,6 +317,13 @@ private:
bool allocateBuffer(S32 nverts, S32 nindices, bool create) { return allocateBuffer(nverts, nindices); }
+ // actually unmap buffer
+ void _unmapBuffer();
+
+ // add to set of mapped buffers
+ void _mapBuffer();
+ bool mMapped = false;
+
public:
static U64 getBytesAllocated();
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index ccfef09b09..a1ec75e34b 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -1741,7 +1741,7 @@ bool LLFace::getGeometryVolume(const LLVolume& volume,
{ //bump mapped or has material, just do the whole expensive loop
LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("getGeometryVolume - texgen default");
- std::vector<LLVector2> bump_tc;
+ LLStrider<LLVector2> bump_tc;
if (mat && !mat->getNormalID().isNull())
{ //writing out normal and specular texture coordinates, not bump offsets
@@ -1803,49 +1803,70 @@ bool LLFace::getGeometryVolume(const LLVolume& volume,
}
const bool do_xform = (xforms & xform_channel) != XFORM_NONE;
+ // hold onto strider to front of TC array for use later
+ bump_tc = dst;
- for (S32 i = 0; i < num_vertices; i++)
{
- LLVector2 tc(vf.mTexCoords[i]);
-
- LLVector4a& norm = vf.mNormals[i];
-
- LLVector4a& center = *(vf.mCenter);
-
- if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
+ // NOTE: split TEX_GEN_PLANAR implementation to reduce branchiness of inner loop
+ // These are per-vertex operations and every little bit counts
+ if (texgen == LLTextureEntry::TEX_GEN_PLANAR)
{
- LLVector4a vec = vf.mPositions[i];
+ LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - planar");
+ for (S32 i = 0; i < num_vertices; i++)
+ {
+ LLVector2 tc(vf.mTexCoords[i]);
+ LLVector4a& norm = vf.mNormals[i];
+ LLVector4a& center = *(vf.mCenter);
+ LLVector4a vec = vf.mPositions[i];
- vec.mul(scalea);
+ vec.mul(scalea);
- if (texgen == LLTextureEntry::TEX_GEN_PLANAR)
- {
planarProjection(tc, norm, center, vec);
- }
- }
- if (tex_mode && mTextureMatrix)
- {
- LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
- tmp = tmp * *mTextureMatrix;
- tc.mV[0] = tmp.mV[0];
- tc.mV[1] = tmp.mV[1];
+ if (tex_mode && mTextureMatrix)
+ {
+ LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+ tmp = tmp * *mTextureMatrix;
+ tc.mV[0] = tmp.mV[0];
+ tc.mV[1] = tmp.mV[1];
+ }
+ else if (do_xform)
+ {
+ xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+ }
+
+ *dst++ = tc;
+ }
}
- else if (do_xform)
+ else
{
- xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
- }
+ LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - transform");
- *dst++ = tc;
- if (do_bump)
- {
- bump_tc.push_back(tc);
+ for (S32 i = 0; i < num_vertices; i++)
+ {
+ LLVector2 tc(vf.mTexCoords[i]);
+
+ if (tex_mode && mTextureMatrix)
+ {
+ LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
+ tmp = tmp * *mTextureMatrix;
+ tc.mV[0] = tmp.mV[0];
+ tc.mV[1] = tmp.mV[1];
+ }
+ else if (do_xform)
+ {
+ xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
+ }
+
+ *dst++ = tc;
+ }
}
}
}
if ((!mat && !gltf_mat) && do_bump)
{
+ LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - do bump");
mVertexBuffer->getTexCoord1Strider(tex_coords1, mGeomIndex, mGeomCount);
mVObjp->getVolume()->genTangents(face_index);
diff --git a/indra/newview/llskinningutil.cpp b/indra/newview/llskinningutil.cpp
index 9b4ed4c946..1c92e06700 100644
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@@ -315,11 +315,9 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
}
}
-static LLTrace::BlockTimerStatHandle FTM_FACE_RIGGING_INFO("Face Rigging Info");
-
void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face)
{
- LL_RECORD_BLOCK_TIME(FTM_FACE_RIGGING_INFO);
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
if (vol_face.mJointRiggingInfoTab.needsUpdate())
{
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index 681d91c945..9e1cb84bd1 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -1361,51 +1361,6 @@ void LLViewerFetchedTexture::addToCreateTexture()
}
else
{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
-#if 1
- //
- //if mRequestedDiscardLevel > mDesiredDiscardLevel, we assume the required image res keep going up,
- //so do not scale down the over qualified image.
- //Note: scaling down image is expensensive. Do it only when very necessary.
- //
- if(mRequestedDiscardLevel <= mDesiredDiscardLevel && !mForceToSaveRawImage)
- {
- U32 w = mFullWidth >> mRawDiscardLevel;
- U32 h = mFullHeight >> mRawDiscardLevel;
-
- //if big image, do not load extra data
- //scale it down to size >= LLViewerTexture::sMinLargeImageSize
- if(w * h > LLViewerTexture::sMinLargeImageSize)
- {
- S32 d_level = llmin(mRequestedDiscardLevel, (S32)mDesiredDiscardLevel) - mRawDiscardLevel;
-
- if(d_level > 0)
- {
- S32 i = 0;
- while((d_level > 0) && ((w >> i) * (h >> i) > LLViewerTexture::sMinLargeImageSize))
- {
- i++;
- d_level--;
- }
- if(i > 0)
- {
- mRawDiscardLevel += i;
- if(mRawDiscardLevel >= getDiscardLevel() && getDiscardLevel() > 0)
- {
- mNeedsCreateTexture = false;
- destroyRawImage();
- return;
- }
-
- {
- //make a duplicate in case somebody else is using this raw image
- mRawImage = mRawImage->scaled(w >> i, h >> i);
- }
- }
- }
- }
- }
-#endif
scheduleCreateTexture();
}
return;
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index 02e4e6bef2..5329b354d3 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -10689,14 +10689,18 @@ void LLVOAvatar::updateRiggingInfo()
std::map<LLUUID, S32> curr_rigging_info_key;
- // Get current rigging info key
- for (LLVOVolume* vol : volumes)
{
- if (vol->isMesh() && vol->getVolume())
+ LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR("update rig info - get key")
+
+ // Get current rigging info key
+ for (LLVOVolume* vol : volumes)
{
- const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID();
- S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD);
- curr_rigging_info_key[mesh_id] = max_lod;
+ if (vol->isMesh() && vol->getVolume())
+ {
+ const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID();
+ S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD);
+ curr_rigging_info_key[mesh_id] = max_lod;
+ }
}
}
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 4b63354893..4f48a070e3 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -6028,8 +6028,8 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
group->mBuilt = 1.f;
- const U32 MAX_BUFFER_COUNT = 4096;
- LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT];
+ static std::vector<LLVertexBuffer*> locked_buffer;
+ locked_buffer.resize(0);
U32 buffer_count = 0;
@@ -6074,8 +6074,6 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
group->dirtyGeom();
gPipeline.markRebuild(group);
}
-
- buff->unmapBuffer();
}
}
}
@@ -6091,17 +6089,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
{
LL_PROFILE_ZONE_NAMED("rebuildMesh - flush");
- for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter)
- {
- (*iter)->unmapBuffer();
- }
-
- // don't forget alpha
- if(group != NULL &&
- !group->mVertexBuffer.isNull())
- {
- group->mVertexBuffer->unmapBuffer();
- }
+ LLVertexBuffer::flushBuffers();
}
group->clearState(LLSpatialGroup::MESH_DIRTY | LLSpatialGroup::NEW_DRAWINFO);
@@ -6783,11 +6771,6 @@ U32 LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace
++face_iter;
}
-
- if (buffer)
- {
- buffer->unmapBuffer();
- }
}
group->mBufferMap[mask].clear();