From 1852ebd65df1748f071947204e440f1dece30f31 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Fri, 28 Jun 2024 11:23:08 +0200 Subject: Apple GPU Optimisations This commit massively improves 2D UI, 3D UI and 3D world rendering performance and smoothness. --- indra/llrender/llfontgl.cpp | 56 +++-- indra/llrender/llfontgl.h | 4 +- indra/llrender/llrender.cpp | 98 ++++---- indra/llrender/llrender.h | 26 ++- indra/llrender/llrender2dutils.cpp | 459 ++++++++++++++++++++++++++++++------- indra/llrender/llvertexbuffer.cpp | 54 ++++- 6 files changed, 511 insertions(+), 186 deletions(-) (limited to 'indra/llrender') diff --git a/indra/llrender/llfontgl.cpp b/indra/llrender/llfontgl.cpp index 3714bb1883..2ac2615c0a 100644 --- a/indra/llrender/llfontgl.cpp +++ b/indra/llrender/llfontgl.cpp @@ -270,10 +270,10 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons const LLFontGlyphInfo* next_glyph = NULL; - const S32 GLYPH_BATCH_SIZE = 30; - LLVector3 vertices[GLYPH_BATCH_SIZE * 4]; - LLVector2 uvs[GLYPH_BATCH_SIZE * 4]; - LLColor4U colors[GLYPH_BATCH_SIZE * 4]; + const S32 GLYPH_BATCH_SIZE = 120; + LLVector4a vertices[GLYPH_BATCH_SIZE * 6]; + LLVector2 uvs[GLYPH_BATCH_SIZE * 6]; + LLColor4U colors[GLYPH_BATCH_SIZE * 6]; LLColor4U text_color(color); // Preserve the transparency to render fading emojis in fading text (e.g. @@ -305,9 +305,9 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons // otherwise the queued glyphs will be taken from wrong textures. if (glyph_count > 0) { - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { - gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4); + gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 6); } gGL.end(); glyph_count = 0; @@ -338,12 +338,11 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons if (glyph_count >= GLYPH_BATCH_SIZE) { - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { - gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4); + gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 6); } gGL.end(); - glyph_count = 0; } @@ -376,9 +375,9 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons cur_render_y = cur_y; } - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { - gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4); + gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 6); } gGL.end(); @@ -1226,31 +1225,42 @@ LLFontGL &LLFontGL::operator=(const LLFontGL &source) return *this; } -void LLFontGL::renderQuad(LLVector3* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, F32 slant_amt) const +void LLFontGL::renderTriangle(LLVector4a* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, F32 slant_amt) const { S32 index = 0; - vertex_out[index] = LLVector3(screen_rect.mRight, screen_rect.mTop, 0.f); + vertex_out[index] = LLVector4a(screen_rect.mRight, screen_rect.mTop, 0.f); uv_out[index] = LLVector2(uv_rect.mRight, uv_rect.mTop); colors_out[index] = color; index++; - vertex_out[index] = LLVector3(screen_rect.mLeft, screen_rect.mTop, 0.f); + vertex_out[index] = LLVector4a(screen_rect.mLeft, screen_rect.mTop, 0.f); uv_out[index] = LLVector2(uv_rect.mLeft, uv_rect.mTop); colors_out[index] = color; index++; - vertex_out[index] = LLVector3(screen_rect.mLeft, screen_rect.mBottom, 0.f); + vertex_out[index] = LLVector4a(screen_rect.mLeft, screen_rect.mBottom, 0.f); + uv_out[index] = LLVector2(uv_rect.mLeft, uv_rect.mBottom); + colors_out[index] = color; + index++; + + + vertex_out[index] = LLVector4a(screen_rect.mRight, screen_rect.mTop, 0.f); + uv_out[index] = LLVector2(uv_rect.mRight, uv_rect.mTop); + colors_out[index] = color; + index++; + + vertex_out[index] = LLVector4a(screen_rect.mLeft, screen_rect.mBottom, 0.f); uv_out[index] = LLVector2(uv_rect.mLeft, uv_rect.mBottom); colors_out[index] = color; index++; - vertex_out[index] = LLVector3(screen_rect.mRight, screen_rect.mBottom, 0.f); + vertex_out[index] = LLVector4a(screen_rect.mRight, screen_rect.mBottom, 0.f); uv_out[index] = LLVector2(uv_rect.mRight, uv_rect.mBottom); colors_out[index] = color; } -void LLFontGL::drawGlyph(S32& glyph_count, LLVector3* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, U8 style, ShadowType shadow, F32 drop_shadow_strength) const +void LLFontGL::drawGlyph(S32& glyph_count, LLVector4a* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, U8 style, ShadowType shadow, F32 drop_shadow_strength) const { F32 slant_offset; slant_offset = ((style & ITALIC) ? ( -mFontFreetype->getAscenderHeight() * 0.2f) : 0.f); @@ -1264,7 +1274,7 @@ void LLFontGL::drawGlyph(S32& glyph_count, LLVector3* vertex_out, LLVector2* uv_ LLRectf screen_rect_offset = screen_rect; screen_rect_offset.translate((F32)(pass * BOLD_OFFSET), 0.f); - renderQuad(&vertex_out[glyph_count * 4], &uv_out[glyph_count * 4], &colors_out[glyph_count * 4], screen_rect_offset, uv_rect, color, slant_offset); + renderTriangle(&vertex_out[glyph_count * 6], &uv_out[glyph_count * 6], &colors_out[glyph_count * 6], screen_rect_offset, uv_rect, color, slant_offset); glyph_count++; } } @@ -1295,10 +1305,10 @@ void LLFontGL::drawGlyph(S32& glyph_count, LLVector3* vertex_out, LLVector2* uv_ break; } - renderQuad(&vertex_out[glyph_count * 4], &uv_out[glyph_count * 4], &colors_out[glyph_count * 4], screen_rect_offset, uv_rect, shadow_color, slant_offset); + renderTriangle(&vertex_out[glyph_count * 6], &uv_out[glyph_count * 6], &colors_out[glyph_count * 6], screen_rect_offset, uv_rect, shadow_color, slant_offset); glyph_count++; } - renderQuad(&vertex_out[glyph_count * 4], &uv_out[glyph_count * 4], &colors_out[glyph_count * 4], screen_rect, uv_rect, color, slant_offset); + renderTriangle(&vertex_out[glyph_count * 6], &uv_out[glyph_count * 6], &colors_out[glyph_count * 6], screen_rect, uv_rect, color, slant_offset); glyph_count++; } else if (shadow == DROP_SHADOW) @@ -1307,14 +1317,14 @@ void LLFontGL::drawGlyph(S32& glyph_count, LLVector3* vertex_out, LLVector2* uv_ shadow_color.mV[VALPHA] = U8(color.mV[VALPHA] * drop_shadow_strength); LLRectf screen_rect_shadow = screen_rect; screen_rect_shadow.translate(1.f, -1.f); - renderQuad(&vertex_out[glyph_count * 4], &uv_out[glyph_count * 4], &colors_out[glyph_count * 4], screen_rect_shadow, uv_rect, shadow_color, slant_offset); + renderTriangle(&vertex_out[glyph_count * 6], &uv_out[glyph_count * 6], &colors_out[glyph_count * 6], screen_rect_shadow, uv_rect, shadow_color, slant_offset); glyph_count++; - renderQuad(&vertex_out[glyph_count * 4], &uv_out[glyph_count * 4], &colors_out[glyph_count * 4], screen_rect, uv_rect, color, slant_offset); + renderTriangle(&vertex_out[glyph_count * 6], &uv_out[glyph_count * 6], &colors_out[glyph_count * 6], screen_rect, uv_rect, color, slant_offset); glyph_count++; } else // normal rendering { - renderQuad(&vertex_out[glyph_count * 4], &uv_out[glyph_count * 4], &colors_out[glyph_count * 4], screen_rect, uv_rect, color, slant_offset); + renderTriangle(&vertex_out[glyph_count * 6], &uv_out[glyph_count * 6], &colors_out[glyph_count * 6], screen_rect, uv_rect, color, slant_offset); glyph_count++; } } diff --git a/indra/llrender/llfontgl.h b/indra/llrender/llfontgl.h index 65f0a8cbfd..81674f4a64 100644 --- a/indra/llrender/llfontgl.h +++ b/indra/llrender/llfontgl.h @@ -238,8 +238,8 @@ private: LLFontDescriptor mFontDescriptor; LLPointer mFontFreetype; - void renderQuad(LLVector3* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, F32 slant_amt) const; - void drawGlyph(S32& glyph_count, LLVector3* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, U8 style, ShadowType shadow, F32 drop_shadow_fade) const; + void renderTriangle(LLVector4a* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, F32 slant_amt) const; + void drawGlyph(S32& glyph_count, LLVector4a* vertex_out, LLVector2* uv_out, LLColor4U* colors_out, const LLRectf& screen_rect, const LLRectf& uv_rect, const LLColor4U& color, U8 style, ShadowType shadow, F32 drop_shadow_fade) const; // Registry holds all instantiated fonts. static LLFontRegistry* sFontRegistry; diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index f28eefcb63..7ca628bac5 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -50,6 +50,8 @@ extern void APIENTRY gl_debug_callback(GLenum source, thread_local LLRender gGL; +const U32 BATCH_SIZE = 16334; + // Handy copies of last good GL matrices F32 gGLModelView[16]; F32 gGLLastModelView[16]; @@ -66,7 +68,7 @@ S32 gGLViewport[4]; U32 LLRender::sUICalls = 0; U32 LLRender::sUIVerts = 0; U32 LLTexUnit::sWhiteTexture = 0; -bool LLRender::sGLCoreProfile = false; +bool LLRender::sGLCoreProfile = true; bool LLRender::sNsightDebugSupport = false; LLVector2 LLRender::sUIGLScaleFactor = LLVector2(1.f, 1.f); @@ -920,7 +922,7 @@ void LLRender::initVertexBuffer() llassert_always(mBuffer.isNull()); stop_glerror(); mBuffer = new LLVertexBuffer(immediate_mask); - mBuffer->allocateBuffer(4096, 0); + mBuffer->allocateBuffer(BATCH_SIZE, 0); mBuffer->getVertexStrider(mVerticesp); mBuffer->getTexCoord0Strider(mTexcoordsp); mBuffer->getColorStrider(mColorsp); @@ -1333,9 +1335,8 @@ void LLRender::translateUI(F32 x, F32 y, F32 z) LL_ERRS() << "Need to push a UI translation frame before offsetting" << LL_ENDL; } - mUIOffset.back().mV[0] += x; - mUIOffset.back().mV[1] += y; - mUIOffset.back().mV[2] += z; + LLVector4a add(x,y,z); + mUIOffset.back().add(add); } void LLRender::scaleUI(F32 x, F32 y, F32 z) @@ -1345,14 +1346,15 @@ void LLRender::scaleUI(F32 x, F32 y, F32 z) LL_ERRS() << "Need to push a UI transformation frame before scaling." << LL_ENDL; } - mUIScale.back().scaleVec(LLVector3(x,y,z)); + LLVector4a scale(x,y,z); + mUIScale.back().mul(scale); } void LLRender::pushUIMatrix() { if (mUIOffset.empty()) { - mUIOffset.push_back(LLVector3(0,0,0)); + mUIOffset.emplace_back(LLVector4a::getZero()); } else { @@ -1361,7 +1363,7 @@ void LLRender::pushUIMatrix() if (mUIScale.empty()) { - mUIScale.push_back(LLVector3(1,1,1)); + mUIScale.emplace_back(LLVector4a(1.f)); } else { @@ -1385,7 +1387,7 @@ LLVector3 LLRender::getUITranslation() { return LLVector3(0,0,0); } - return mUIOffset.back(); + return LLVector3(mUIOffset.back().getF32ptr()); } LLVector3 LLRender::getUIScale() @@ -1394,18 +1396,17 @@ LLVector3 LLRender::getUIScale() { return LLVector3(1,1,1); } - return mUIScale.back(); + return LLVector3(mUIScale.back().getF32ptr()); } - void LLRender::loadUIIdentity() { if (mUIOffset.empty()) { LL_ERRS() << "Need to push UI translation frame before clearing offset." << LL_ENDL; } - mUIOffset.back().setVec(0,0,0); - mUIScale.back().setVec(1,1,1); + mUIOffset.back().splat(0.f); + mUIScale.back().splat(1.f); } void LLRender::setColorMask(bool writeColor, bool writeAlpha) @@ -1594,7 +1595,7 @@ void LLRender::end() mMode != LLRender::LINES && mMode != LLRender::TRIANGLES && mMode != LLRender::POINTS) || - mCount > 2048) + mCount > (BATCH_SIZE / 2)) { flush(); } @@ -1614,28 +1615,28 @@ void LLRender::flush() //store mCount in a local variable to avoid re-entrance (drawArrays may call flush) U32 count = mCount; - if (mMode == LLRender::QUADS && !sGLCoreProfile) + if (mMode == LLRender::QUADS && !sGLCoreProfile) + { + if (mCount%4 != 0) { - if (mCount%4 != 0) - { count -= (mCount % 4); LL_WARNS() << "Incomplete quad requested." << LL_ENDL; - } } + } - if (mMode == LLRender::TRIANGLES) + if (mMode == LLRender::TRIANGLES) + { + if (mCount%3 != 0) { - if (mCount%3 != 0) - { count -= (mCount % 3); LL_WARNS() << "Incomplete triangle requested." << LL_ENDL; - } } + } - if (mMode == LLRender::LINES) + if (mMode == LLRender::LINES) + { + if (mCount%2 != 0) { - if (mCount%2 != 0) - { count -= (mCount % 2); LL_WARNS() << "Incomplete line requested." << LL_ENDL; } @@ -1697,7 +1698,7 @@ void LLRender::flush() vb->setBuffer(); - vb->setPositionData((LLVector4a*) mVerticesp.get()); + vb->setPositionData(mVerticesp.get()); if (attribute_mask & LLVertexBuffer::MAP_TEXCOORD0) { @@ -1764,10 +1765,10 @@ void LLRender::flush() } } -void LLRender::vertex3f(const GLfloat& x, const GLfloat& y, const GLfloat& z) +void LLRender::vertex4a(const LLVector4a& vertex) { //the range of mVerticesp, mColorsp and mTexcoordsp is [0, 4095] - if (mCount > 2048) + if (mCount > BATCH_SIZE / 2) { //break when buffer gets reasonably full to keep GL command buffers happy and avoid overflow below switch (mMode) { @@ -1778,20 +1779,20 @@ void LLRender::vertex3f(const GLfloat& x, const GLfloat& y, const GLfloat& z) } } - if (mCount > 4094) + if (mCount > BATCH_SIZE - 2) { - // LL_WARNS() << "GL immediate mode overflow. Some geometry not drawn." << LL_ENDL; + LL_WARNS() << "GL immediate mode overflow. Some geometry not drawn." << LL_ENDL; return; } if (mUIOffset.empty()) { - mVerticesp[mCount] = LLVector3(x,y,z); + mVerticesp[mCount] = vertex; } else { - LLVector3 vert = (LLVector3(x,y,z)+mUIOffset.back()).scaledVec(mUIScale.back()); - mVerticesp[mCount] = vert; + mVerticesp[mCount].setAdd(vertex, mUIOffset.back()); + mVerticesp[mCount].mul(mUIScale.back()); } if (mMode == LLRender::QUADS && LLRender::sGLCoreProfile) @@ -1819,9 +1820,9 @@ void LLRender::vertex3f(const GLfloat& x, const GLfloat& y, const GLfloat& z) mTexcoordsp[mCount] = mTexcoordsp[mCount-1]; } -void LLRender::vertexBatchPreTransformed(LLVector3* verts, S32 vert_count) +void LLRender::vertexBatchPreTransformed(LLVector4a* verts, S32 vert_count) { - if (mCount + vert_count > 4094) + if (mCount + vert_count > BATCH_SIZE - 2) { // LL_WARNS() << "GL immediate mode overflow. Some geometry not drawn." << LL_ENDL; return; @@ -1877,9 +1878,9 @@ void LLRender::vertexBatchPreTransformed(LLVector3* verts, S32 vert_count) mVerticesp[mCount] = mVerticesp[mCount-1]; } -void LLRender::vertexBatchPreTransformed(LLVector3* verts, LLVector2* uvs, S32 vert_count) +void LLRender::vertexBatchPreTransformed(LLVector4a* verts, LLVector2* uvs, S32 vert_count) { - if (mCount + vert_count > 4094) + if (mCount + vert_count > BATCH_SIZE - 2) { // LL_WARNS() << "GL immediate mode overflow. Some geometry not drawn." << LL_ENDL; return; @@ -1938,9 +1939,9 @@ void LLRender::vertexBatchPreTransformed(LLVector3* verts, LLVector2* uvs, S32 v } } -void LLRender::vertexBatchPreTransformed(LLVector3* verts, LLVector2* uvs, LLColor4U* colors, S32 vert_count) +void LLRender::vertexBatchPreTransformed(LLVector4a* verts, LLVector2* uvs, LLColor4U* colors, S32 vert_count) { - if (mCount + vert_count > 4094) + if (mCount + vert_count > BATCH_SIZE - 2) { // LL_WARNS() << "GL immediate mode overflow. Some geometry not drawn." << LL_ENDL; return; @@ -2001,25 +2002,6 @@ void LLRender::vertexBatchPreTransformed(LLVector3* verts, LLVector2* uvs, LLCol } } -void LLRender::vertex2i(const GLint& x, const GLint& y) -{ - vertex3f((GLfloat) x, (GLfloat) y, 0); -} - -void LLRender::vertex2f(const GLfloat& x, const GLfloat& y) -{ - vertex3f(x,y,0); -} - -void LLRender::vertex2fv(const GLfloat* v) -{ - vertex3f(v[0], v[1], 0); -} - -void LLRender::vertex3fv(const GLfloat* v) -{ - vertex3f(v[0], v[1], v[2]); -} void LLRender::texCoord2f(const GLfloat& x, const GLfloat& y) { diff --git a/indra/llrender/llrender.h b/indra/llrender/llrender.h index 020682f526..555234356f 100644 --- a/indra/llrender/llrender.h +++ b/indra/llrender/llrender.h @@ -416,11 +416,15 @@ public: void begin(const GLuint& mode); void end(); - void vertex2i(const GLint& x, const GLint& y); - void vertex2f(const GLfloat& x, const GLfloat& y); - void vertex3f(const GLfloat& x, const GLfloat& y, const GLfloat& z); - void vertex2fv(const GLfloat* v); - void vertex3fv(const GLfloat* v); + + LL_FORCE_INLINE void vertex2i(const GLint& x, const GLint& y) { vertex4a(LLVector4a((GLfloat)x,(GLfloat)y,0.f)); } + LL_FORCE_INLINE void vertex2f(const GLfloat& x, const GLfloat& y) { vertex4a(LLVector4a(x,y,0.f)); } + LL_FORCE_INLINE void vertex3f(const GLfloat& x, const GLfloat& y, const GLfloat& z) { vertex4a(LLVector4a(x,y,z)); } + LL_FORCE_INLINE void vertex2fv(const GLfloat* v) { vertex4a(LLVector4a(v[0],v[1],0.f)); } + LL_FORCE_INLINE void vertex3fv(const GLfloat* v) { vertex4a(LLVector4a(v[0],v[1],v[2])); } + + void vertex4a(const LLVector4a& v); + void texCoord2i(const GLint& x, const GLint& y); void texCoord2f(const GLfloat& x, const GLfloat& y); @@ -440,9 +444,9 @@ public: void diffuseColor4ubv(const U8* c); void diffuseColor4ub(U8 r, U8 g, U8 b, U8 a); - void vertexBatchPreTransformed(LLVector3* verts, S32 vert_count); - void vertexBatchPreTransformed(LLVector3* verts, LLVector2* uvs, S32 vert_count); - void vertexBatchPreTransformed(LLVector3* verts, LLVector2* uvs, LLColor4U*, S32 vert_count); + void vertexBatchPreTransformed(LLVector4a* verts, S32 vert_count); + void vertexBatchPreTransformed(LLVector4a* verts, LLVector2* uvs, S32 vert_count); + void vertexBatchPreTransformed(LLVector4a* verts, LLVector2* uvs, LLColor4U*, S32 vert_count); void setColorMask(bool writeColor, bool writeAlpha); void setColorMask(bool writeColorR, bool writeColorG, bool writeColorB, bool writeAlpha); @@ -500,7 +504,7 @@ private: bool mCurrColorMask[4]; LLPointer mBuffer; - LLStrider mVerticesp; + LLStrider mVerticesp; LLStrider mTexcoordsp; LLStrider mColorsp; std::array mTexUnits; @@ -512,8 +516,8 @@ private: eBlendFactor mCurrBlendAlphaSFactor; eBlendFactor mCurrBlendAlphaDFactor; - std::vector mUIOffset; - std::vector mUIScale; + std::vector mUIOffset; + std::vector mUIScale; }; diff --git a/indra/llrender/llrender2dutils.cpp b/indra/llrender/llrender2dutils.cpp index 5939fe68dc..c04b571798 100644 --- a/indra/llrender/llrender2dutils.cpp +++ b/indra/llrender/llrender2dutils.cpp @@ -119,14 +119,18 @@ void gl_rect_2d(S32 left, S32 top, S32 right, S32 bottom, BOOL filled ) { gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE); - // Counterclockwise quad will face the viewer if( filled ) { - gGL.begin( LLRender::QUADS ); + gGL.begin( LLRender::TRIANGLES ); + { gGL.vertex2i(left, top); gGL.vertex2i(left, bottom); gGL.vertex2i(right, bottom); + + gGL.vertex2i(left, top); + gGL.vertex2i(right, bottom); gGL.vertex2i(right, top); + } gGL.end(); } else @@ -171,51 +175,73 @@ void gl_drop_shadow(S32 left, S32 top, S32 right, S32 bottom, const LLColor4 &st LLColor4 end_color = start_color; end_color.mV[VALPHA] = 0.f; + gGL.begin(LLRender::TRIANGLES); - gGL.begin(LLRender::QUADS); - - // Right edge, CCW faces screen + // Right edge + gGL.color4fv(start_color.mV); + gGL.vertex2i(right, top-lines); + gGL.vertex2i(right, bottom); + gGL.color4fv(end_color.mV); + gGL.vertex2i(right+lines, bottom); gGL.color4fv(start_color.mV); - gGL.vertex2i(right, top-lines); - gGL.vertex2i(right, bottom); + gGL.vertex2i(right, top-lines); gGL.color4fv(end_color.mV); gGL.vertex2i(right+lines, bottom); gGL.vertex2i(right+lines, top-lines); - // Bottom edge, CCW faces screen + // Bottom edge gGL.color4fv(start_color.mV); - gGL.vertex2i(right, bottom); - gGL.vertex2i(left+lines, bottom); + gGL.vertex2i(right, bottom); + gGL.vertex2i(left+lines, bottom); + gGL.color4fv(end_color.mV); + gGL.vertex2i(left+lines, bottom-lines); + gGL.color4fv(start_color.mV); + gGL.vertex2i(right, bottom); gGL.color4fv(end_color.mV); - gGL.vertex2i(left+lines, bottom-lines); - gGL.vertex2i(right, bottom-lines); + gGL.vertex2i(left+lines, bottom-lines); + gGL.vertex2i(right, bottom-lines); // bottom left Corner gGL.color4fv(start_color.mV); - gGL.vertex2i(left+lines, bottom); + gGL.vertex2i(left+lines, bottom); + gGL.color4fv(end_color.mV); + gGL.vertex2i(left, bottom); + + // bottom left corner + gGL.vertex2i(left+1, bottom-lines+1); + gGL.color4fv(start_color.mV); + gGL.vertex2i(left+lines, bottom); gGL.color4fv(end_color.mV); - gGL.vertex2i(left, bottom); - // make the bottom left corner not sharp - gGL.vertex2i(left+1, bottom-lines+1); - gGL.vertex2i(left+lines, bottom-lines); + gGL.vertex2i(left+1, bottom-lines+1); + gGL.vertex2i(left+lines, bottom-lines); // bottom right corner gGL.color4fv(start_color.mV); - gGL.vertex2i(right, bottom); + gGL.vertex2i(right, bottom); gGL.color4fv(end_color.mV); - gGL.vertex2i(right, bottom-lines); + gGL.vertex2i(right, bottom-lines); + // make the rightmost corner not sharp gGL.vertex2i(right+lines-1, bottom-lines+1); - gGL.vertex2i(right+lines, bottom); + gGL.color4fv(start_color.mV); + gGL.vertex2i(right, bottom); + gGL.color4fv(end_color.mV); + gGL.vertex2i(right+lines-1, bottom-lines+1); + gGL.vertex2i(right+lines, bottom); // top right corner gGL.color4fv(start_color.mV); - gGL.vertex2i( right, top-lines ); + gGL.vertex2i( right, top-lines ); gGL.color4fv(end_color.mV); - gGL.vertex2i( right+lines, top-lines ); + gGL.vertex2i( right+lines, top-lines ); + // make the corner not sharp - gGL.vertex2i( right+lines-1, top-1 ); - gGL.vertex2i( right, top ); + gGL.vertex2i( right+lines-1, top-1 ); + gGL.color4fv(start_color.mV); + gGL.vertex2i( right, top-lines ); + gGL.color4fv(end_color.mV); + gGL.vertex2i( right+lines-1, top-1 ); + gGL.vertex2i( right, top ); gGL.end(); stop_glerror(); @@ -429,165 +455,237 @@ void gl_draw_scaled_image_with_border(S32 x, S32 y, S32 width, S32 height, LLTex gGL.color4fv(color.mV); - const S32 NUM_VERTICES = 9 * 4; // 9 quads + const S32 NUM_VERTICES = 9 * 6; LLVector2 uv[NUM_VERTICES]; - LLVector3 pos[NUM_VERTICES]; + LLVector4a pos[NUM_VERTICES]; S32 index = 0; - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { // draw bottom left uv[index] = LLVector2(uv_outer_rect.mLeft, uv_outer_rect.mBottom); - pos[index] = LLVector3(draw_outer_rect.mLeft, draw_outer_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_outer_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_outer_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_outer_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_outer_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mBottom); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_outer_rect.mLeft, uv_outer_rect.mBottom); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_outer_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mLeft, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_outer_rect.mLeft, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_center_rect.mBottom, 0.f); index++; // draw bottom middle uv[index] = LLVector2(uv_center_rect.mLeft, uv_outer_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_outer_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_outer_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_outer_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mRight, draw_outer_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_outer_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mLeft, uv_outer_rect.mBottom); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_outer_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mBottom); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); index++; // draw bottom right uv[index] = LLVector2(uv_center_rect.mRight, uv_outer_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mRight, draw_outer_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_outer_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mRight, uv_outer_rect.mBottom); - pos[index] = LLVector3(draw_outer_rect.mRight, draw_outer_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_outer_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mRight, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_outer_rect.mRight, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_center_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mRight, uv_outer_rect.mBottom); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_outer_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_outer_rect.mRight, uv_center_rect.mBottom); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); index++; // draw left uv[index] = LLVector2(uv_outer_rect.mLeft, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_outer_rect.mLeft, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mTop); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_outer_rect.mLeft, uv_center_rect.mBottom); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mLeft, uv_center_rect.mTop); - pos[index] = LLVector3(draw_outer_rect.mLeft, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_center_rect.mTop, 0.f); index++; // draw middle uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mBottom); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mTop); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); index++; // draw right uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mRight, uv_center_rect.mBottom); - pos[index] = LLVector3(draw_outer_rect.mRight, draw_center_rect.mBottom, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_center_rect.mBottom, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mRight, uv_center_rect.mTop); - pos[index] = LLVector3(draw_outer_rect.mRight, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_center_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mBottom); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mBottom, 0.f); + index++; + + uv[index] = LLVector2(uv_outer_rect.mRight, uv_center_rect.mTop); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); index++; // draw top left uv[index] = LLVector2(uv_outer_rect.mLeft, uv_center_rect.mTop); - pos[index] = LLVector3(draw_outer_rect.mLeft, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_outer_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_outer_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_outer_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_outer_rect.mLeft, uv_center_rect.mTop); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_center_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mLeft, uv_outer_rect.mTop); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_outer_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mLeft, uv_outer_rect.mTop); - pos[index] = LLVector3(draw_outer_rect.mLeft, draw_outer_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mLeft, draw_outer_rect.mTop, 0.f); index++; // draw top middle uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_outer_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mRight, draw_outer_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_outer_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mLeft, uv_center_rect.mTop); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_center_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mRight, uv_outer_rect.mTop); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_outer_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mLeft, uv_outer_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mLeft, draw_outer_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mLeft, draw_outer_rect.mTop, 0.f); index++; // draw top right uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mRight, uv_center_rect.mTop); - pos[index] = LLVector3(draw_outer_rect.mRight, draw_center_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_center_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_outer_rect.mRight, uv_outer_rect.mTop); - pos[index] = LLVector3(draw_outer_rect.mRight, draw_outer_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_outer_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_center_rect.mRight, uv_center_rect.mTop); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_center_rect.mTop, 0.f); + index++; + + uv[index] = LLVector2(uv_outer_rect.mRight, uv_outer_rect.mTop); + pos[index] = LLVector4a(draw_outer_rect.mRight, draw_outer_rect.mTop, 0.f); index++; uv[index] = LLVector2(uv_center_rect.mRight, uv_outer_rect.mTop); - pos[index] = LLVector3(draw_center_rect.mRight, draw_outer_rect.mTop, 0.f); + pos[index] = LLVector4a(draw_center_rect.mRight, draw_outer_rect.mTop, 0.f); index++; gGL.vertexBatchPreTransformed(pos, uv, NUM_VERTICES); @@ -629,11 +727,11 @@ void gl_draw_scaled_rotated_image(S32 x, S32 y, S32 width, S32 height, F32 degre if (degrees == 0.f) { - const S32 NUM_VERTICES = 4; // 9 quads + const S32 NUM_VERTICES = 6; LLVector2 uv[NUM_VERTICES]; - LLVector3 pos[NUM_VERTICES]; + LLVector4a pos[NUM_VERTICES]; - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { LLVector3 ui_scale = gGL.getUIScale(); LLVector3 ui_translation = gGL.getUITranslation(); @@ -645,19 +743,27 @@ void gl_draw_scaled_rotated_image(S32 x, S32 y, S32 width, S32 height, F32 degre S32 scaled_height = ll_round(height * ui_scale.mV[VY]); uv[index] = LLVector2(uv_rect.mRight, uv_rect.mTop); - pos[index] = LLVector3(ui_translation.mV[VX] + scaled_width, ui_translation.mV[VY] + scaled_height, 0.f); + pos[index] = LLVector4a(ui_translation.mV[VX] + scaled_width, ui_translation.mV[VY] + scaled_height, 0.f); index++; uv[index] = LLVector2(uv_rect.mLeft, uv_rect.mTop); - pos[index] = LLVector3(ui_translation.mV[VX], ui_translation.mV[VY] + scaled_height, 0.f); + pos[index] = LLVector4a(ui_translation.mV[VX], ui_translation.mV[VY] + scaled_height, 0.f); + index++; + + uv[index] = LLVector2(uv_rect.mLeft, uv_rect.mBottom); + pos[index] = LLVector4a(ui_translation.mV[VX], ui_translation.mV[VY], 0.f); + index++; + + uv[index] = LLVector2(uv_rect.mRight, uv_rect.mTop); + pos[index] = LLVector4a(ui_translation.mV[VX] + scaled_width, ui_translation.mV[VY] + scaled_height, 0.f); index++; uv[index] = LLVector2(uv_rect.mLeft, uv_rect.mBottom); - pos[index] = LLVector3(ui_translation.mV[VX], ui_translation.mV[VY], 0.f); + pos[index] = LLVector4a(ui_translation.mV[VX], ui_translation.mV[VY], 0.f); index++; uv[index] = LLVector2(uv_rect.mRight, uv_rect.mBottom); - pos[index] = LLVector3(ui_translation.mV[VX] + scaled_width, ui_translation.mV[VY], 0.f); + pos[index] = LLVector4a(ui_translation.mV[VX] + scaled_width, ui_translation.mV[VY], 0.f); index++; gGL.vertexBatchPreTransformed(pos, uv, NUM_VERTICES); @@ -687,26 +793,32 @@ void gl_draw_scaled_rotated_image(S32 x, S32 y, S32 width, S32 height, F32 degre gGL.color4fv(color.mV); - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { - LLVector3 v; + LLVector3 v1 = LLVector3(offset_x, offset_y, 0.f) * quat; + LLVector3 v2 = LLVector3(-offset_x, offset_y, 0.f) * quat; + LLVector3 v3 = LLVector3(-offset_x, -offset_y, 0.f) * quat; + LLVector3 v4 = LLVector3(offset_x, -offset_y, 0.f) * quat; - v = LLVector3(offset_x, offset_y, 0.f) * quat; gGL.texCoord2f(uv_rect.mRight, uv_rect.mTop); - gGL.vertex2f(v.mV[0], v.mV[1] ); + gGL.vertex2f(v1.mV[0], v1.mV[1] ); - v = LLVector3(-offset_x, offset_y, 0.f) * quat; gGL.texCoord2f(uv_rect.mLeft, uv_rect.mTop); - gGL.vertex2f(v.mV[0], v.mV[1] ); + gGL.vertex2f(v2.mV[0], v2.mV[1] ); + + gGL.texCoord2f(uv_rect.mLeft, uv_rect.mBottom); + gGL.vertex2f(v3.mV[0], v3.mV[1] ); + + gGL.texCoord2f(uv_rect.mRight, uv_rect.mTop); + gGL.vertex2f(v1.mV[0], v1.mV[1] ); - v = LLVector3(-offset_x, -offset_y, 0.f) * quat; gGL.texCoord2f(uv_rect.mLeft, uv_rect.mBottom); - gGL.vertex2f(v.mV[0], v.mV[1] ); + gGL.vertex2f(v3.mV[0], v3.mV[1] ); - v = LLVector3(offset_x, -offset_y, 0.f) * quat; gGL.texCoord2f(uv_rect.mRight, uv_rect.mBottom); - gGL.vertex2f(v.mV[0], v.mV[1] ); + gGL.vertex2f(v4.mV[0], v4.mV[1] ); } + gGL.end(); gGL.popUIMatrix(); } @@ -948,8 +1060,8 @@ void gl_washer_segment_2d(F32 outer_radius, F32 inner_radius, F32 start_radians, void gl_rect_2d_simple_tex( S32 width, S32 height ) { - gGL.begin( LLRender::QUADS ); - + gGL.begin( LLRender::TRIANGLES ); + { gGL.texCoord2f(1.f, 1.f); gGL.vertex2i(width, height); @@ -959,19 +1071,30 @@ void gl_rect_2d_simple_tex( S32 width, S32 height ) gGL.texCoord2f(0.f, 0.f); gGL.vertex2i(0, 0); + gGL.texCoord2f(1.f, 1.f); + gGL.vertex2i(width, height); + + gGL.texCoord2f(0.f, 0.f); + gGL.vertex2i(0, 0); + gGL.texCoord2f(1.f, 0.f); gGL.vertex2i(width, 0); - + } gGL.end(); } void gl_rect_2d_simple( S32 width, S32 height ) { - gGL.begin( LLRender::QUADS ); + gGL.begin( LLRender::TRIANGLES ); + { gGL.vertex2i(width, height); gGL.vertex2i(0, height); gGL.vertex2i(0, 0); + + gGL.vertex2i(width, height); + gGL.vertex2i(0, 0); gGL.vertex2i(width, 0); + } gGL.end(); } @@ -1011,7 +1134,7 @@ void gl_segmented_rect_2d_tex(const S32 left, LLVector2 width_vec((F32)width, 0.f); LLVector2 height_vec(0.f, (F32)height); - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { // draw bottom left gGL.texCoord2f(0.f, 0.f); @@ -1023,6 +1146,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(border_uv_scale.mV[VX], border_uv_scale.mV[VY]); gGL.vertex2fv((border_width_left + border_height_bottom).mV); + gGL.texCoord2f(0.f, 0.f); + gGL.vertex2f(0.f, 0.f); + + gGL.texCoord2f(border_uv_scale.mV[VX], border_uv_scale.mV[VY]); + gGL.vertex2fv((border_width_left + border_height_bottom).mV); + gGL.texCoord2f(0.f, border_uv_scale.mV[VY]); gGL.vertex2fv(border_height_bottom.mV); @@ -1036,6 +1165,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(1.f - border_uv_scale.mV[VX], border_uv_scale.mV[VY]); gGL.vertex2fv((width_vec - border_width_right + border_height_bottom).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 0.f); + gGL.vertex2fv(border_width_left.mV); + + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], border_uv_scale.mV[VY]); + gGL.vertex2fv((width_vec - border_width_right + border_height_bottom).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], border_uv_scale.mV[VY]); gGL.vertex2fv((border_width_left + border_height_bottom).mV); @@ -1049,6 +1184,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(1.f, border_uv_scale.mV[VY]); gGL.vertex2fv((width_vec + border_height_bottom).mV); + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 0.f); + gGL.vertex2fv((width_vec - border_width_right).mV); + + gGL.texCoord2f(1.f, border_uv_scale.mV[VY]); + gGL.vertex2fv((width_vec + border_height_bottom).mV); + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], border_uv_scale.mV[VY]); gGL.vertex2fv((width_vec - border_width_right + border_height_bottom).mV); @@ -1062,6 +1203,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((border_width_left + height_vec - border_height_top).mV); + gGL.texCoord2f(0.f, border_uv_scale.mV[VY]); + gGL.vertex2fv(border_height_bottom.mV); + + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((border_width_left + height_vec - border_height_top).mV); + gGL.texCoord2f(0.f, 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((height_vec - border_height_top).mV); @@ -1075,6 +1222,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((width_vec - border_width_right + height_vec - border_height_top).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], border_uv_scale.mV[VY]); + gGL.vertex2fv((border_width_left + border_height_bottom).mV); + + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((width_vec - border_width_right + height_vec - border_height_top).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((border_width_left + height_vec - border_height_top).mV); @@ -1088,6 +1241,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(1.f, 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((width_vec + height_vec - border_height_top).mV); + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], border_uv_scale.mV[VY]); + gGL.vertex2fv((width_vec - border_width_right + border_height_bottom).mV); + + gGL.texCoord2f(1.f, 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((width_vec + height_vec - border_height_top).mV); + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((width_vec - border_width_right + height_vec - border_height_top).mV); @@ -1101,6 +1260,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(border_uv_scale.mV[VX], 1.f); gGL.vertex2fv((border_width_left + height_vec).mV); + gGL.texCoord2f(0.f, 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((height_vec - border_height_top).mV); + + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f); + gGL.vertex2fv((border_width_left + height_vec).mV); + gGL.texCoord2f(0.f, 1.f); gGL.vertex2fv((height_vec).mV); @@ -1114,6 +1279,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f); gGL.vertex2fv((width_vec - border_width_right + height_vec).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((border_width_left + height_vec - border_height_top).mV); + + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f); + gGL.vertex2fv((width_vec - border_width_right + height_vec).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f); gGL.vertex2fv((border_width_left + height_vec).mV); @@ -1127,6 +1298,12 @@ void gl_segmented_rect_2d_tex(const S32 left, gGL.texCoord2f(1.f, 1.f); gGL.vertex2fv((width_vec + height_vec).mV); + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((width_vec - border_width_right + height_vec - border_height_top).mV); + + gGL.texCoord2f(1.f, 1.f); + gGL.vertex2fv((width_vec + height_vec).mV); + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f); gGL.vertex2fv((width_vec - border_width_right + height_vec).mV); } @@ -1181,7 +1358,7 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, LLVector2 x_min; LLVector2 x_max; - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { if (start_fragment < middle_start) { @@ -1200,6 +1377,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(u_max, border_uv_scale.mV[VY]); gGL.vertex2fv((x_max + border_height_bottom).mV); + gGL.texCoord2f(u_min, 0.f); + gGL.vertex2fv(x_min.mV); + + gGL.texCoord2f(u_max, border_uv_scale.mV[VY]); + gGL.vertex2fv((x_max + border_height_bottom).mV); + gGL.texCoord2f(u_min, border_uv_scale.mV[VY]); gGL.vertex2fv((x_min + border_height_bottom).mV); @@ -1213,6 +1396,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(u_max, 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((x_max + height_vec - border_height_top).mV); + gGL.texCoord2f(u_min, border_uv_scale.mV[VY]); + gGL.vertex2fv((x_min + border_height_bottom).mV); + + gGL.texCoord2f(u_max, 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((x_max + height_vec - border_height_top).mV); + gGL.texCoord2f(u_min, 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((x_min + height_vec - border_height_top).mV); @@ -1226,6 +1415,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(u_max, 1.f); gGL.vertex2fv((x_max + height_vec).mV); + gGL.texCoord2f(u_min, 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((x_min + height_vec - border_height_top).mV); + + gGL.texCoord2f(u_max, 1.f); + gGL.vertex2fv((x_max + height_vec).mV); + gGL.texCoord2f(u_min, 1.f); gGL.vertex2fv((x_min + height_vec).mV); } @@ -1245,6 +1440,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(1.f - border_uv_scale.mV[VX], border_uv_scale.mV[VY]); gGL.vertex2fv((x_max + border_height_bottom).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 0.f); + gGL.vertex2fv(x_min.mV); + + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], border_uv_scale.mV[VY]); + gGL.vertex2fv((x_max + border_height_bottom).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], border_uv_scale.mV[VY]); gGL.vertex2fv((x_min + border_height_bottom).mV); @@ -1258,6 +1459,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((x_max + height_vec - border_height_top).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], border_uv_scale.mV[VY]); + gGL.vertex2fv((x_min + border_height_bottom).mV); + + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((x_max + height_vec - border_height_top).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((x_min + height_vec - border_height_top).mV); @@ -1271,6 +1478,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f); gGL.vertex2fv((x_max + height_vec).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((x_min + height_vec - border_height_top).mV); + + gGL.texCoord2f(1.f - border_uv_scale.mV[VX], 1.f); + gGL.vertex2fv((x_max + height_vec).mV); + gGL.texCoord2f(border_uv_scale.mV[VX], 1.f); gGL.vertex2fv((x_min + height_vec).mV); } @@ -1292,6 +1505,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(u_max, border_uv_scale.mV[VY]); gGL.vertex2fv((x_max + border_height_bottom).mV); + gGL.texCoord2f(u_min, 0.f); + gGL.vertex2fv((x_min).mV); + + gGL.texCoord2f(u_max, border_uv_scale.mV[VY]); + gGL.vertex2fv((x_max + border_height_bottom).mV); + gGL.texCoord2f(u_min, border_uv_scale.mV[VY]); gGL.vertex2fv((x_min + border_height_bottom).mV); @@ -1305,6 +1524,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(u_max, 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((x_max + height_vec - border_height_top).mV); + gGL.texCoord2f(u_min, border_uv_scale.mV[VY]); + gGL.vertex2fv((x_min + border_height_bottom).mV); + + gGL.texCoord2f(u_max, 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((x_max + height_vec - border_height_top).mV); + gGL.texCoord2f(u_min, 1.f - border_uv_scale.mV[VY]); gGL.vertex2fv((x_min + height_vec - border_height_top).mV); @@ -1318,6 +1543,12 @@ void gl_segmented_rect_2d_fragment_tex(const LLRect& rect, gGL.texCoord2f(u_max, 1.f); gGL.vertex2fv((x_max + height_vec).mV); + gGL.texCoord2f(u_min, 1.f - border_uv_scale.mV[VY]); + gGL.vertex2fv((x_min + height_vec - border_height_top).mV); + + gGL.texCoord2f(u_max, 1.f); + gGL.vertex2fv((x_max + height_vec).mV); + gGL.texCoord2f(u_min, 1.f); gGL.vertex2fv((x_min + height_vec).mV); } @@ -1332,7 +1563,7 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv { LL_PROFILE_ZONE_SCOPED_CATEGORY_UI; - gGL.begin(LLRender::QUADS); + gGL.begin(LLRender::TRIANGLES); { // draw bottom left gGL.texCoord2f(clip_rect.mLeft, clip_rect.mBottom); @@ -1344,6 +1575,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mBottom); gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mBottom * height_vec).mV); + gGL.texCoord2f(clip_rect.mLeft, clip_rect.mBottom); + gGL.vertex3f(0.f, 0.f, 0.f); + + gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mBottom * height_vec).mV); + gGL.texCoord2f(clip_rect.mLeft, center_uv_rect.mBottom); gGL.vertex3fv((center_draw_rect.mBottom * height_vec).mV); @@ -1357,6 +1594,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mBottom); gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mBottom * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mLeft, clip_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mLeft * width_vec).mV); + + gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mBottom * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mBottom); gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mBottom * height_vec).mV); @@ -1370,6 +1613,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(clip_rect.mRight, center_uv_rect.mBottom); gGL.vertex3fv((width_vec + center_draw_rect.mBottom * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mRight, clip_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mRight * width_vec).mV); + + gGL.texCoord2f(clip_rect.mRight, center_uv_rect.mBottom); + gGL.vertex3fv((width_vec + center_draw_rect.mBottom * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mBottom); gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mBottom * height_vec).mV); @@ -1383,6 +1632,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mTop); gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mTop * height_vec).mV); + gGL.texCoord2f(clip_rect.mLeft, center_uv_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mBottom * height_vec).mV); + + gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mTop); + gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mTop * height_vec).mV); + gGL.texCoord2f(clip_rect.mLeft, center_uv_rect.mTop); gGL.vertex3fv((center_draw_rect.mTop * height_vec).mV); @@ -1396,6 +1651,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mTop); gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mTop * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mBottom * height_vec).mV); + + gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mTop); + gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mTop * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mTop); gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mTop * height_vec).mV); @@ -1409,6 +1670,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(clip_rect.mRight, center_uv_rect.mTop); gGL.vertex3fv((width_vec + center_draw_rect.mTop * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mBottom); + gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mBottom * height_vec).mV); + + gGL.texCoord2f(clip_rect.mRight, center_uv_rect.mTop); + gGL.vertex3fv((width_vec + center_draw_rect.mTop * height_vec).mV); + gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mTop); gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mTop * height_vec).mV); @@ -1422,6 +1689,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(center_uv_rect.mLeft, clip_rect.mTop); gGL.vertex3fv((center_draw_rect.mLeft * width_vec + height_vec).mV); + gGL.texCoord2f(clip_rect.mLeft, center_uv_rect.mTop); + gGL.vertex3fv((center_draw_rect.mTop * height_vec).mV); + + gGL.texCoord2f(center_uv_rect.mLeft, clip_rect.mTop); + gGL.vertex3fv((center_draw_rect.mLeft * width_vec + height_vec).mV); + gGL.texCoord2f(clip_rect.mLeft, clip_rect.mTop); gGL.vertex3fv((height_vec).mV); @@ -1435,6 +1708,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(center_uv_rect.mRight, clip_rect.mTop); gGL.vertex3fv((center_draw_rect.mRight * width_vec + height_vec).mV); + gGL.texCoord2f(center_uv_rect.mLeft, center_uv_rect.mTop); + gGL.vertex3fv((center_draw_rect.mLeft * width_vec + center_draw_rect.mTop * height_vec).mV); + + gGL.texCoord2f(center_uv_rect.mRight, clip_rect.mTop); + gGL.vertex3fv((center_draw_rect.mRight * width_vec + height_vec).mV); + gGL.texCoord2f(center_uv_rect.mLeft, clip_rect.mTop); gGL.vertex3fv((center_draw_rect.mLeft * width_vec + height_vec).mV); @@ -1448,6 +1727,12 @@ void gl_segmented_rect_3d_tex(const LLRectf& clip_rect, const LLRectf& center_uv gGL.texCoord2f(clip_rect.mRight, clip_rect.mTop); gGL.vertex3fv((width_vec + height_vec).mV); + gGL.texCoord2f(center_uv_rect.mRight, center_uv_rect.mTop); + gGL.vertex3fv((center_draw_rect.mRight * width_vec + center_draw_rect.mTop * height_vec).mV); + + gGL.texCoord2f(clip_rect.mRight, clip_rect.mTop); + gGL.vertex3fv((width_vec + height_vec).mV); + gGL.texCoord2f(center_uv_rect.mRight, clip_rect.mTop); gGL.vertex3fv((center_draw_rect.mRight * width_vec + height_vec).mV); } diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 7caf20f40b..783794512c 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -359,7 +359,8 @@ public: mMisses++; name = gen_buffer(); glBindBuffer(type, name); - glBufferData(type, size, nullptr, GL_DYNAMIC_DRAW); + //glBufferData(type, size, nullptr, GL_DYNAMIC_DRAW); + glBufferData(type, size, nullptr, GL_STREAM_DRAW); if (type == GL_ELEMENT_ARRAY_BUFFER) { LLVertexBuffer::sGLRenderIndices = name; @@ -1147,20 +1148,63 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) { if (end != 0) { + //Note (observeur): I maintained the profile "glBufferSubData" names because i'm not sure if it would impact any statistics part somewhere in the code. LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); LL_PROFILE_ZONE_NUM(start); LL_PROFILE_ZONE_NUM(end); LL_PROFILE_ZONE_NUM(end-start); - constexpr U32 block_size = 8192; + U32 size = end-start+1; + + //Note (observeur): glBufferSubData() was causing synchronization stalls, specialy on Apple GPUs, possibly to the fact Apple GPU is a tiled gpu, resulting to heavy stutters, and spacialy when called several times per frame on the same buffer. + + //Note (observeur): I maintained the notion of block_size for testing purpose, but i think it's a bad idea. We don't know the overhead of glMapBufferRange() depending on the driver, so it's better avoiding calling it more than necessary.(0 -> loop is disabled, 8192 -> original value, 524288 -> a resonable value). + constexpr U32 block_size = 0; + + if(block_size == 0) + { + U8 * mptr = NULL; + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); + + mptr = (U8*) glMapBufferRange( target, start, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + if(mptr) + { + std::memcpy(mptr, (U8*) data, size); + glUnmapBuffer(target); + } + else + { + LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; + } + return; + } + + //Note (observeur): This is for analysis purpose only + if(size > block_size) + { + LL_INFOS() << "Large data range : " << size << LL_ENDL; + } + + U8 * mptr = NULL; for (U32 i = start; i <= end; i += block_size) { LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - //LL_PROFILE_GPU_ZONE("glBufferSubData"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); U32 tend = llmin(i + block_size, end); - U32 size = tend - i + 1; - glBufferSubData(target, i, size, (U8*) data + (i-start)); + size = tend - i + 1; + + mptr = (U8*) glMapBufferRange( target, i, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + if(mptr) + { + std::memcpy(mptr, (U8*) data + (i-start), size); + glUnmapBuffer(target); + } + else + { + LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; + } } } } -- cgit v1.2.3 From 714038e0f35329b614d12cf467230f9cb6eea017 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Sat, 29 Jun 2024 22:48:41 +0200 Subject: Apple GPU Detection and Apple GPU performance optimisations This commit adds a mIsApple member to the gl manager and refines the buffers optimisations for Apple GPU in llvertexbuffer.cpp --- indra/llrender/llgl.cpp | 7 +++++++ indra/llrender/llgl.h | 1 + 2 files changed, 8 insertions(+) (limited to 'indra/llrender') diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 10920f1de3..9207e6ad73 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -1001,6 +1001,7 @@ LLGLManager::LLGLManager() : mIsAMD(FALSE), mIsNVIDIA(FALSE), mIsIntel(FALSE), + mIsApple(FALSE), #if LL_DARWIN mIsMobileGF(FALSE), #endif @@ -1174,6 +1175,11 @@ bool LLGLManager::initGL() mGLVendorShort = "INTEL"; mIsIntel = TRUE; } + else if(mGLVendor.find("APPLE") != std::string::npos) + { + mGLVendorShort = "APPLE"; + mIsApple = TRUE; + } else { mGLVendorShort = "MISC"; @@ -1373,6 +1379,7 @@ void LLGLManager::asLLSD(LLSD& info) info["is_ati"] = mIsAMD; // note, do not rename is_ati to is_amd without coordinating with DW info["is_nvidia"] = mIsNVIDIA; info["is_intel"] = mIsIntel; + info["is_apple"] = mIsApple; info["gl_renderer"] = mGLRenderer; } diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 5a7ad943df..e4b106c999 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -100,6 +100,7 @@ public: BOOL mIsAMD; BOOL mIsNVIDIA; BOOL mIsIntel; + BOOL mIsApple; #if LL_DARWIN // Needed to distinguish problem cards on older Macs that break with Materials -- cgit v1.2.3 From 2e16d1b365e465c0c3e505770e213200ec25fc12 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Sat, 29 Jun 2024 22:57:51 +0200 Subject: vertex buffer optimisations for Apple GPU The vertex buffer will use the original mapping for non Apple GPUs and the new optimized mapping for Apple GPUs. It needs the Apple gpu detection in the gl manager --- indra/llrender/llvertexbuffer.cpp | 42 ++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'indra/llrender') diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 783794512c..52fb58187a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -1155,19 +1155,40 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) LL_PROFILE_ZONE_NUM(end-start); U32 size = end-start+1; + U32 block_size = 65536; - //Note (observeur): glBufferSubData() was causing synchronization stalls, specialy on Apple GPUs, possibly to the fact Apple GPU is a tiled gpu, resulting to heavy stutters, and spacialy when called several times per frame on the same buffer. + //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). + if(!gGLManager.mIsApple) + { + for (U32 i = start; i <= end; i += block_size) + { + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); + U32 tend = llmin(i + block_size, end); + U32 size = tend - i + 1; + glBufferSubData(target, i, size, (U8*) data + (i-start)); + } + + return; + } + + //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. - //Note (observeur): I maintained the notion of block_size for testing purpose, but i think it's a bad idea. We don't know the overhead of glMapBufferRange() depending on the driver, so it's better avoiding calling it more than necessary.(0 -> loop is disabled, 8192 -> original value, 524288 -> a resonable value). - constexpr U32 block_size = 0; + //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. + U32 MapBits = GL_MAP_WRITE_BIT; + //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. + block_size = 524288; + + //Note (observeur): This is called in case block_size is set to 0 (All in one mapping). if(block_size == 0) { U8 * mptr = NULL; LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); LL_PROFILE_GPU_ZONE("glBufferSubData"); - mptr = (U8*) glMapBufferRange( target, start, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + mptr = (U8*) glMapBufferRange( target, start, size, MapBits); + if(mptr) { std::memcpy(mptr, (U8*) data, size); @@ -1180,11 +1201,13 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) return; } + //Note (observeur): The following code is executed in case of block_size is superior to 0 + //Note (observeur): This is for analysis purpose only - if(size > block_size) - { - LL_INFOS() << "Large data range : " << size << LL_ENDL; - } + //if(size > block_size) + //{ + // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; + //} U8 * mptr = NULL; @@ -1195,7 +1218,8 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) U32 tend = llmin(i + block_size, end); size = tend - i + 1; - mptr = (U8*) glMapBufferRange( target, i, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); + if(mptr) { std::memcpy(mptr, (U8*) data + (i-start), size); -- cgit v1.2.3