summaryrefslogtreecommitdiff
path: root/indra/llrender
diff options
context:
space:
mode:
authorcosmic-linden <111533034+cosmic-linden@users.noreply.github.com>2024-09-10 17:38:32 -0700
committerGitHub <noreply@github.com>2024-09-10 17:38:32 -0700
commitf90712b9837957f2a0d11386c6bf48e9a48ff745 (patch)
treedf1dbaf2d2024bed5cd4dd0cd8a80d1ce3f71e2c /indra/llrender
parent13e74a32871c7c8f7c7556c6fc08150fc6f27876 (diff)
parentcde5d29faf84c5cb7fc1b0d0ff6d03f3b7354c8f (diff)
Merge pull request #2544 from secondlife/davep-profile-session-9/10
Profile guided optimizations
Diffstat (limited to 'indra/llrender')
-rw-r--r--indra/llrender/llimagegl.cpp52
-rw-r--r--indra/llrender/llvertexbuffer.cpp59
-rw-r--r--indra/llrender/llvertexbuffer.h12
3 files changed, 112 insertions, 11 deletions
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 68c20048ec..67b4ada62f 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -1045,15 +1045,47 @@ void sub_image_lines(U32 target, S32 miplevel, S32 x_offset, S32 y_offset, S32 w
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+ LL_PROFILE_ZONE_NUM(width);
+ LL_PROFILE_ZONE_NUM(height);
+
U32 components = LLImageGL::dataFormatComponents(pixformat);
U32 type_width = type_width_from_pixtype(pixtype);
const U32 line_width = data_width * components * type_width;
const U32 y_offset_end = y_offset + height;
- for (U32 y_pos = y_offset; y_pos < y_offset_end; ++y_pos)
+
+ if (width == data_width && height % 32 == 0)
+ {
+ LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("subimage - batched lines");
+
+ // full width, batch multiple lines at a time
+ // set batch size based on width
+ U32 batch_size = 32;
+
+ if (width > 1024)
+ {
+ batch_size = 8;
+ }
+ else if (width > 512)
+ {
+ batch_size = 16;
+ }
+
+ // full width texture, do 32 lines at a time
+ for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += batch_size)
+ {
+ glTexSubImage2D(target, miplevel, x_offset, y_pos, width, batch_size, pixformat, pixtype, src);
+ src += line_width * batch_size;
+ }
+ }
+ else
{
- glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src);
- src += line_width;
+ // partial width or strange height
+ for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += 1)
+ {
+ glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src);
+ src += line_width;
+ }
}
}
@@ -2139,6 +2171,8 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
return ;
}
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+
U32 length = w * h;
U32 alphatotal = 0;
@@ -2150,15 +2184,15 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
// this will mid-skew the data (and thus increase the chances of not
// being used as a mask) from high-frequency alpha maps which
// suffer the worst from aliasing when used as alpha masks.
- if (w >= 2 && h >= 2)
+ if (w >= 4 && h >= 4)
{
- llassert(w%2 == 0);
- llassert(h%2 == 0);
+ llassert(w%4 == 0);
+ llassert(h%4 == 0);
const GLubyte* rowstart = ((const GLubyte*) data_in) + mAlphaOffset;
- for (U32 y = 0; y < h; y+=2)
+ for (U32 y = 0; y < h; y+=4)
{
const GLubyte* current = rowstart;
- for (U32 x = 0; x < w; x+=2)
+ for (U32 x = 0; x < w; x+=4)
{
const U32 s1 = current[0];
alphatotal += s1;
@@ -2182,7 +2216,7 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
}
- rowstart += 2 * w * mAlphaStride;
+ rowstart += 4 * w * mAlphaStride;
}
length *= 2; // we sampled everything twice, essentially
}
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 156e300853..c1f239fc43 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -954,6 +954,25 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask)
}
}
+// list of mapped buffers
+// NOTE: must not be LLPointer<LLVertexBuffer> to avoid breaking non-ref-counted LLVertexBuffer instances
+static std::vector<LLVertexBuffer*> sMappedBuffers;
+
+//static
+void LLVertexBuffer::flushBuffers()
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ // must only be called from main thread
+ llassert(LLCoros::on_main_thread_main_coro());
+ for (auto& buffer : sMappedBuffers)
+ {
+ buffer->_unmapBuffer();
+ buffer->mMapped = false;
+ }
+
+ sMappedBuffers.resize(0);
+}
+
//static
U32 LLVertexBuffer::calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices)
{
@@ -997,6 +1016,12 @@ U32 LLVertexBuffer::calcVertexSize(const U32& typemask)
//virtual
LLVertexBuffer::~LLVertexBuffer()
{
+ if (mMapped)
+ { // is on the mapped buffer list but doesn't need to be flushed
+ mMapped = false;
+ unmapBuffer();
+ }
+
destroyGLBuffer();
destroyGLIndices();
@@ -1198,6 +1223,7 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, U32 start, U32 end)
U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 index, S32 count)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ _mapBuffer();
if (count == -1)
{
@@ -1233,6 +1259,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
+ _mapBuffer();
if (count == -1)
{
@@ -1289,11 +1316,11 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
LL_PROFILE_ZONE_NUM(end);
LL_PROFILE_ZONE_NUM(end-start);
- constexpr U32 block_size = 8192;
+ constexpr U32 block_size = 65536;
for (U32 i = start; i <= end; i += block_size)
{
- LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+ //LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
//LL_PROFILE_GPU_ZONE("glBufferSubData");
U32 tend = llmin(i + block_size, end);
U32 size = tend - i + 1;
@@ -1305,7 +1332,28 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
void LLVertexBuffer::unmapBuffer()
{
+ flushBuffers();
+}
+
+void LLVertexBuffer::_mapBuffer()
+{
+ // must only be called from main thread
+ llassert(LLCoros::on_main_thread_main_coro());
+ if (!mMapped)
+ {
+ mMapped = true;
+ sMappedBuffers.push_back(this);
+ }
+}
+
+void LLVertexBuffer::_unmapBuffer()
+{
STOP_GLERROR;
+ if (!mMapped)
+ {
+ return;
+ }
+
struct SortMappedRegion
{
bool operator()(const MappedRegion& lhs, const MappedRegion& rhs)
@@ -1549,6 +1597,13 @@ void LLVertexBuffer::setBuffer()
return;
}
#endif
+
+ if (mMapped)
+ {
+ LL_WARNS() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL;
+ _unmapBuffer();
+ }
+
// no data may be pending
llassert(mMappedVertexRegions.empty());
llassert(mMappedIndexRegions.empty());
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 2a4affdc60..9fe468f89e 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -120,6 +120,9 @@ public:
// indexed by the following enum
static U32 calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices);
+ // flush any pending mapped buffers
+ static void flushBuffers();
+
//WARNING -- when updating these enums you MUST
// 1 - update LLVertexBuffer::sTypeSize
// 2 - update LLVertexBuffer::vb_type_name
@@ -190,6 +193,8 @@ public:
// map for data access (see also getFooStrider below)
U8* mapVertexBuffer(AttributeType type, U32 index, S32 count = -1);
U8* mapIndexBuffer(U32 index, S32 count = -1);
+
+ // synonym for flushBuffers
void unmapBuffer();
// set for rendering
@@ -312,6 +317,13 @@ private:
bool allocateBuffer(S32 nverts, S32 nindices, bool create) { return allocateBuffer(nverts, nindices); }
+ // actually unmap buffer
+ void _unmapBuffer();
+
+ // add to set of mapped buffers
+ void _mapBuffer();
+ bool mMapped = false;
+
public:
static U64 getBytesAllocated();