summaryrefslogtreecommitdiff
path: root/indra/llrender
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2021-10-15 01:01:51 +0000
committerDave Parks <davep@lindenlab.com>2021-10-15 01:01:51 +0000
commit02f813d17bcf4592f6425e207e9fe487275b9354 (patch)
treef1098b331df2ec42db671b5491f54d7bcbd74059 /indra/llrender
parent851767b808c3cb05d718538389ccc1ed3c95d1a1 (diff)
parent08c767d231a9ed96fc8755a3426f992cb23a63b5 (diff)
SL-16166 Optimization pass on render pipe inner loops
Diffstat (limited to 'indra/llrender')
-rw-r--r--indra/llrender/llglslshader.cpp4
-rw-r--r--indra/llrender/llgltexture.h2
-rw-r--r--indra/llrender/llrender.cpp39
-rw-r--r--indra/llrender/llrender.h14
-rw-r--r--indra/llrender/lltexture.h4
-rw-r--r--indra/llrender/llvertexbuffer.cpp155
-rw-r--r--indra/llrender/llvertexbuffer.h11
7 files changed, 218 insertions, 11 deletions
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index 394fcd2b2f..84eac00c65 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -1017,7 +1017,7 @@ S32 LLGLSLShader::bindTexture(S32 uniform, LLTexture *texture, LLTexUnit::eTextu
if (uniform > -1)
{
- gGL.getTexUnit(uniform)->bind(texture, mode);
+ gGL.getTexUnit(uniform)->bindFast(texture);
gGL.getTexUnit(uniform)->setTextureColorSpace(colorspace);
}
@@ -1048,7 +1048,7 @@ S32 LLGLSLShader::unbindTexture(S32 uniform, LLTexUnit::eTextureType mode)
if (uniform > -1)
{
- gGL.getTexUnit(uniform)->unbind(mode);
+ gGL.getTexUnit(uniform)->unbindFast(mode);
}
return uniform;
diff --git a/indra/llrender/llgltexture.h b/indra/llrender/llgltexture.h
index 071912c2c2..028457c510 100644
--- a/indra/llrender/llgltexture.h
+++ b/indra/llrender/llgltexture.h
@@ -176,7 +176,7 @@ private:
protected:
void setTexelsPerImage();
- //note: do not make this function public.
+public:
/*virtual*/ LLImageGL* getGLTexture() const ;
protected:
diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp
index b6711e44e3..669a09d3ce 100644
--- a/indra/llrender/llrender.cpp
+++ b/indra/llrender/llrender.cpp
@@ -229,8 +229,20 @@ void LLTexUnit::disable(void)
}
}
+void LLTexUnit::bindFast(LLTexture* texture)
+{
+ LLImageGL* gl_tex = texture->getGLTexture();
+
+ glActiveTextureARB(GL_TEXTURE0_ARB + mIndex);
+ gGL.mCurrTextureUnitIndex = mIndex;
+ mCurrTexture = gl_tex->getTexName();
+ glBindTexture(sGLTextureType[gl_tex->getTarget()], mCurrTexture);
+ mHasMipMaps = gl_tex->mHasMipMaps;
+}
+
bool LLTexUnit::bind(LLTexture* texture, bool for_rendering, bool forceBind)
{
+ LL_PROFILE_ZONE_SCOPED;
stop_glerror();
if (mIndex >= 0)
{
@@ -459,6 +471,28 @@ void LLTexUnit::unbind(eTextureType type)
}
}
+void LLTexUnit::unbindFast(eTextureType type)
+{
+ activate();
+
+ // Disabled caching of binding state.
+ if (mCurrTexType == type)
+ {
+ mCurrTexture = 0;
+
+ // Always make sure our texture color space is reset to linear. SRGB sampling should be opt-in in the vast majority of cases. Also prevents color space "popping".
+ mTexColorSpace = TCS_LINEAR;
+ if (type == LLTexUnit::TT_TEXTURE)
+ {
+ glBindTexture(sGLTextureType[type], sWhiteTexture);
+ }
+ else
+ {
+ glBindTexture(sGLTextureType[type], 0);
+ }
+ }
+}
+
void LLTexUnit::setTextureAddressMode(eTextureAddressMode mode)
{
if (mIndex < 0 || mCurrTexture == 0) return;
@@ -1243,8 +1277,6 @@ void LLRender::syncLightState()
void LLRender::syncMatrices()
{
- stop_glerror();
-
static const U32 name[] =
{
LLShaderMgr::MODELVIEW_MATRIX,
@@ -1415,8 +1447,6 @@ void LLRender::syncMatrices()
}
}
}
-
- stop_glerror();
}
void LLRender::translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z)
@@ -1927,6 +1957,7 @@ void LLRender::flush()
{
if (mCount > 0)
{
+ LL_PROFILE_ZONE_SCOPED;
if (!mUIOffset.empty())
{
sUICalls++;
diff --git a/indra/llrender/llrender.h b/indra/llrender/llrender.h
index c08c2d6881..6e2647a16b 100644
--- a/indra/llrender/llrender.h
+++ b/indra/llrender/llrender.h
@@ -161,6 +161,17 @@ public:
bool bind(LLImageGL* texture, bool for_rendering = false, bool forceBind = false);
bool bind(LLTexture* texture, bool for_rendering = false, bool forceBind = false);
+ // bind implementation for inner loops
+ // makes the following assumptions:
+ // - No need for gGL.flush()
+ // - texture is not null
+ // - gl_tex->getTexName() is not zero
+ // - This texture is not being bound redundantly
+ // - USE_SRGB_DECODE is disabled
+ // - mTexOptionsDirty is false
+ // -
+ void bindFast(LLTexture* texture);
+
// Binds a cubemap to this texture unit
// (automatically enables the texture unit for cubemaps)
bool bind(LLCubeMap* cubeMap);
@@ -177,6 +188,9 @@ public:
// (only if there's a texture of the given type currently bound)
void unbind(eTextureType type);
+ // Fast but unsafe version of unbind
+ void unbindFast(eTextureType type);
+
// Sets the addressing mode used to sample the texture
// Warning: this stays set for the bound texture forever,
// make sure you want to permanently change the address mode for the bound texture.
diff --git a/indra/llrender/lltexture.h b/indra/llrender/lltexture.h
index 41481fb8a7..256d85ce5a 100644
--- a/indra/llrender/lltexture.h
+++ b/indra/llrender/lltexture.h
@@ -67,11 +67,9 @@ public:
virtual S32 getWidth(S32 discard_level = -1) const;
virtual S32 getHeight(S32 discard_level = -1) const;
virtual bool isActiveFetching();
+ virtual LLImageGL* getGLTexture() const;
private:
- //note: do not make this function public.
- virtual LLImageGL* getGLTexture() const;
-
virtual void updateBindStatsForTester();
};
#endif
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 0449ac392c..103d5388d3 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -787,6 +787,18 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
placeFence();
}
+void LLVertexBuffer::drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const
+{
+ mMappable = false;
+ gGL.syncMatrices();
+
+ U16* idx = ((U16*)(U8*)mAlignedIndexOffset) + indices_offset;
+
+ LL_PROFILER_GPU_ZONEC("gl.DrawRangeElements", 0xFFFF00)
+ glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT,
+ idx);
+}
+
void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const
{
llassert(!LLGLSLShader::sNoFixedFunction || LLGLSLShader::sCurBoundShaderPtr != NULL);
@@ -2272,6 +2284,21 @@ bool LLVertexBuffer::bindGLBuffer(bool force_bind)
return ret;
}
+bool LLVertexBuffer::bindGLBufferFast()
+{
+ if (mGLBuffer != sGLRenderBuffer || !sVBOActive)
+ {
+ glBindBufferARB(GL_ARRAY_BUFFER_ARB, mGLBuffer);
+ sGLRenderBuffer = mGLBuffer;
+ sBindCount++;
+ sVBOActive = true;
+
+ return true;
+ }
+
+ return false;
+}
+
static LLTrace::BlockTimerStatHandle FTM_BIND_GL_INDICES("Bind Indices");
bool LLVertexBuffer::bindGLIndices(bool force_bind)
@@ -2297,6 +2324,21 @@ bool LLVertexBuffer::bindGLIndices(bool force_bind)
return ret;
}
+bool LLVertexBuffer::bindGLIndicesFast()
+{
+ if (mGLIndices != sGLRenderIndices || !sIBOActive)
+ {
+ glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mGLIndices);
+ sGLRenderIndices = mGLIndices;
+ sBindCount++;
+ sIBOActive = true;
+
+ return true;
+ }
+
+ return false;
+}
+
void LLVertexBuffer::flush()
{
if (useVBOs())
@@ -2487,6 +2529,26 @@ void LLVertexBuffer::setBuffer(U32 data_mask)
}
}
+void LLVertexBuffer::setBufferFast(U32 data_mask)
+{
+ //set up pointers if the data mask is different ...
+ bool setup = (sLastMask != data_mask);
+
+
+ const bool bindBuffer = bindGLBufferFast();
+ const bool bindIndices = bindGLIndicesFast();
+
+ setup = setup || bindBuffer || bindIndices;
+
+ setupClientArrays(data_mask);
+
+ if (data_mask && setup)
+ {
+ setupVertexBufferFast(data_mask);
+ sSetCount++;
+ }
+}
+
// virtual (default)
void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
{
@@ -2644,6 +2706,99 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
llglassertok();
}
+void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)
+{
+ U8* base = (U8*)mAlignedOffset;
+
+ if (data_mask & MAP_NORMAL)
+ {
+ S32 loc = TYPE_NORMAL;
+ void* ptr = (void*)(base + mOffsets[TYPE_NORMAL]);
+ glVertexAttribPointerARB(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_NORMAL], ptr);
+ }
+ if (data_mask & MAP_TEXCOORD3)
+ {
+ S32 loc = TYPE_TEXCOORD3;
+ void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD3]);
+ glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD3], ptr);
+ }
+ if (data_mask & MAP_TEXCOORD2)
+ {
+ S32 loc = TYPE_TEXCOORD2;
+ void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD2]);
+ glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD2], ptr);
+ }
+ if (data_mask & MAP_TEXCOORD1)
+ {
+ S32 loc = TYPE_TEXCOORD1;
+ void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD1]);
+ glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD1], ptr);
+ }
+ if (data_mask & MAP_TANGENT)
+ {
+ S32 loc = TYPE_TANGENT;
+ void* ptr = (void*)(base + mOffsets[TYPE_TANGENT]);
+ glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TANGENT], ptr);
+ }
+ if (data_mask & MAP_TEXCOORD0)
+ {
+ S32 loc = TYPE_TEXCOORD0;
+ void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD0]);
+ glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD0], ptr);
+ }
+ if (data_mask & MAP_COLOR)
+ {
+ S32 loc = TYPE_COLOR;
+ //bind emissive instead of color pointer if emissive is present
+ void* ptr = (data_mask & MAP_EMISSIVE) ? (void*)(base + mOffsets[TYPE_EMISSIVE]) : (void*)(base + mOffsets[TYPE_COLOR]);
+ glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_COLOR], ptr);
+ }
+ if (data_mask & MAP_EMISSIVE)
+ {
+ S32 loc = TYPE_EMISSIVE;
+ void* ptr = (void*)(base + mOffsets[TYPE_EMISSIVE]);
+ glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
+
+ if (!(data_mask & MAP_COLOR))
+ { //map emissive to color channel when color is not also being bound to avoid unnecessary shader swaps
+ loc = TYPE_COLOR;
+ glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
+ }
+ }
+ if (data_mask & MAP_WEIGHT)
+ {
+ S32 loc = TYPE_WEIGHT;
+ void* ptr = (void*)(base + mOffsets[TYPE_WEIGHT]);
+ glVertexAttribPointerARB(loc, 1, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_WEIGHT], ptr);
+ }
+ if (data_mask & MAP_WEIGHT4)
+ {
+ S32 loc = TYPE_WEIGHT4;
+ void* ptr = (void*)(base + mOffsets[TYPE_WEIGHT4]);
+ glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_WEIGHT4], ptr);
+ }
+ if (data_mask & MAP_CLOTHWEIGHT)
+ {
+ S32 loc = TYPE_CLOTHWEIGHT;
+ void* ptr = (void*)(base + mOffsets[TYPE_CLOTHWEIGHT]);
+ glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_CLOTHWEIGHT], ptr);
+ }
+ if (data_mask & MAP_TEXTURE_INDEX)
+ {
+#if !LL_DARWIN
+ S32 loc = TYPE_TEXTURE_INDEX;
+ void* ptr = (void*)(base + mOffsets[TYPE_VERTEX] + 12);
+ glVertexAttribIPointer(loc, 1, GL_UNSIGNED_INT, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
+#endif
+ }
+ if (data_mask & MAP_VERTEX)
+ {
+ S32 loc = TYPE_VERTEX;
+ void* ptr = (void*)(base + mOffsets[TYPE_VERTEX]);
+ glVertexAttribPointerARB(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
+ }
+}
+
LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count)
: mType(type), mIndex(index), mCount(count)
{
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 1d60970df4..51ed85510e 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -210,13 +210,17 @@ protected:
virtual ~LLVertexBuffer(); // use unref()
- virtual void setupVertexBuffer(U32 data_mask); // pure virtual, called from mapBuffer()
+ virtual void setupVertexBuffer(U32 data_mask);
+ void setupVertexBufferFast(U32 data_mask);
+
void setupVertexArray();
void genBuffer(U32 size);
void genIndices(U32 size);
bool bindGLBuffer(bool force_bind = false);
+ bool bindGLBufferFast();
bool bindGLIndices(bool force_bind = false);
+ bool bindGLIndicesFast();
bool bindGLArray();
void releaseBuffer();
void releaseIndices();
@@ -239,6 +243,8 @@ public:
// set for rendering
virtual void setBuffer(U32 data_mask); // calls setupVertexBuffer() if data_mask is not 0
+ void setBufferFast(U32 data_mask); // calls setupVertexBufferFast(), assumes data_mask is not 0 among other assumptions
+
void flush(); //flush pending data to GL memory
// allocate buffer
bool allocateBuffer(S32 nverts, S32 nindices, bool create);
@@ -290,6 +296,9 @@ public:
void drawArrays(U32 mode, U32 offset, U32 count) const;
void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
+ //implementation for inner loops that does no safety checking
+ void drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
+
//for debugging, validate data in given range is valid
void validateRange(U32 start, U32 end, U32 count, U32 offset) const;