diff options
author | Dave Parks <davep@lindenlab.com> | 2010-05-21 04:49:12 -0500 |
---|---|---|
committer | Dave Parks <davep@lindenlab.com> | 2010-05-21 04:49:12 -0500 |
commit | 05a23f8dbaa45c64bcf6c55dd09a468ba2b1f144 (patch) | |
tree | 2e4ad0ae03f1c48e1a325501ebdb96bd843fde67 /indra/llrender/llvertexbuffer.cpp | |
parent | bf5f215fbc29102cfd8b5418f29ea0ed6edd14ee (diff) |
Vectorized memcpy.
16-byte aligned vertex buffers.
(almost) fully vectorized avatar vertex buffer updating --- index buffers still need to be vectorized
Diffstat (limited to 'indra/llrender/llvertexbuffer.cpp')
-rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 195 |
1 files changed, 190 insertions, 5 deletions
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 7fa47cd171..a50eb7211c 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -39,6 +39,7 @@ #include "llglheaders.h" #include "llmemtype.h" #include "llrender.h" +#include "llvector4a.h" //============================================================================ @@ -66,6 +67,27 @@ S32 LLVertexBuffer::sWeight4Loc = -1; std::vector<U32> LLVertexBuffer::sDeleteList; +#define LL_ALIGNED_VB 1 + +#if LL_ALIGNED_VB + +S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] = +{ + sizeof(LLVector4), // TYPE_VERTEX, + sizeof(LLVector4), // TYPE_NORMAL, + sizeof(LLVector2), // TYPE_TEXCOORD0, + sizeof(LLVector2), // TYPE_TEXCOORD1, + sizeof(LLVector2), // TYPE_TEXCOORD2, + sizeof(LLVector2), // TYPE_TEXCOORD3, + sizeof(LLColor4U), // TYPE_COLOR, + sizeof(LLVector4), // TYPE_BINORMAL, + sizeof(F32), // TYPE_WEIGHT, + sizeof(LLVector4), // TYPE_WEIGHT4, + sizeof(LLVector4), // TYPE_CLOTHWEIGHT, +}; + +#else + S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] = { sizeof(LLVector3), // TYPE_VERTEX, @@ -81,6 +103,8 @@ S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] = sizeof(LLVector4), // TYPE_CLOTHWEIGHT, }; +#endif + U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] = { GL_TRIANGLES, @@ -428,11 +452,41 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask, S32 usage) : mTypeMask = typemask; mStride = stride; + mAlignedOffset = 0; + sCount++; } +#if LL_ALIGNED_VB +//static +S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets, S32 num_vertices) +{ + S32 offset = 0; + for (S32 i=0; i<TYPE_MAX; i++) + { + U32 mask = 1<<i; + if (typemask & mask) + { + if (offsets) + { + offsets[i] = offset; + offset += LLVertexBuffer::sTypeOffsets[i]*num_vertices; + offset = (offset + 0xF) & ~0xF; + } + } + } + + return offset+16; +} + +S32 LLVertexBuffer::getSize() const +{ + return mStride; +} + +#else //static -S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets) +S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets, S32 num_vertices) { S32 stride = 0; for (S32 i=0; i<TYPE_MAX; i++) @@ -451,6 +505,12 @@ S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets) return stride; } +S32 LLVertexBuffer::getSize() const +{ + return mNumVerts*mStride; +} + +#endif // protected, use unref() //virtual LLVertexBuffer::~LLVertexBuffer() @@ -560,7 +620,7 @@ void LLVertexBuffer::createGLBuffer() { static int gl_buffer_idx = 0; mGLBuffer = ++gl_buffer_idx; - mMappedData = new U8[size]; + mMappedData = (U8*) _mm_malloc(size, 16); memset(mMappedData, 0, size); } } @@ -612,7 +672,7 @@ void LLVertexBuffer::destroyGLBuffer() } else { - delete [] mMappedData; + _mm_free(mMappedData); mMappedData = NULL; mEmpty = TRUE; } @@ -664,7 +724,7 @@ void LLVertexBuffer::updateNumVerts(S32 nverts) } mRequestedNumVerts = nverts; - + if (!mDynamicSize) { mNumVerts = nverts; @@ -679,6 +739,9 @@ void LLVertexBuffer::updateNumVerts(S32 nverts) } mNumVerts = nverts; } +#if LL_ALIGNED_VB + mStride = calcStride(mTypeMask, mOffsets, mNumVerts); +#endif } @@ -886,7 +949,11 @@ U8* LLVertexBuffer::mapBuffer(S32 access) setBuffer(0); mLocked = TRUE; stop_glerror(); - mMappedData = (U8*) glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + + U8* src = (U8*) glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + mMappedData = LL_NEXT_ALIGNED_ADDRESS<U8>(src); + mAlignedOffset = mMappedData - src; + stop_glerror(); } { @@ -975,6 +1042,45 @@ void LLVertexBuffer::unmapBuffer() //---------------------------------------------------------------------------- +#if LL_ALIGNED_VB + +template <class T,S32 type> struct VertexBufferStrider +{ + typedef LLStrider<T> strider_t; + static bool get(LLVertexBuffer& vbo, + strider_t& strider, + S32 index) + { + if (vbo.mapBuffer() == NULL) + { + llwarns << "mapBuffer failed!" << llendl; + return FALSE; + } + + if (type == LLVertexBuffer::TYPE_INDEX) + { + S32 stride = sizeof(T); + strider = (T*)(vbo.getMappedIndices() + index*stride); + strider.setStride(0); + return TRUE; + } + else if (vbo.hasDataType(type)) + { + S32 stride = LLVertexBuffer::sTypeOffsets[type]; + strider = (T*)(vbo.getMappedData() + vbo.getOffset(type)+index*stride); + strider.setStride(stride); + return TRUE; + } + else + { + llerrs << "VertexBufferStrider could not find valid vertex data." << llendl; + } + return FALSE; + } +}; + +#else + template <class T,S32 type> struct VertexBufferStrider { typedef LLStrider<T> strider_t; @@ -1010,6 +1116,7 @@ template <class T,S32 type> struct VertexBufferStrider } }; +#endif bool LLVertexBuffer::getVertexStrider(LLStrider<LLVector3>& strider, S32 index) { @@ -1272,6 +1379,82 @@ void LLVertexBuffer::setBuffer(U32 data_mask) } } +#if LL_ALIGNED_VB + +// virtual (default) +void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const +{ + LLMemType mt2(LLMemType::MTYPE_VERTEX_SETUP_VERTEX_BUFFER); + stop_glerror(); + U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData; + + if ((data_mask & mTypeMask) != data_mask) + { + llerrs << "LLVertexBuffer::setupVertexBuffer missing required components for supplied data mask." << llendl; + } + + + if (data_mask & MAP_NORMAL) + { + glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL])); + } + if (data_mask & MAP_TEXCOORD3) + { + glClientActiveTextureARB(GL_TEXTURE3_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD3], (void*)(base + mOffsets[TYPE_TEXCOORD3])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD2) + { + glClientActiveTextureARB(GL_TEXTURE2_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD2], (void*)(base + mOffsets[TYPE_TEXCOORD2])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD1) + { + glClientActiveTextureARB(GL_TEXTURE1_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_BINORMAL) + { + glClientActiveTextureARB(GL_TEXTURE2_ARB); + glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD0) + { + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0])); + } + if (data_mask & MAP_COLOR) + { + glColorPointer(4, GL_UNSIGNED_BYTE, LLVertexBuffer::sTypeOffsets[TYPE_COLOR], (void*)(base + mOffsets[TYPE_COLOR])); + } + + if (data_mask & MAP_WEIGHT) + { + glVertexAttribPointerARB(1, 1, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT], (void*)(base + mOffsets[TYPE_WEIGHT])); + } + + if (data_mask & MAP_WEIGHT4 && sWeight4Loc != -1) + { + glVertexAttribPointerARB(sWeight4Loc, 4, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT4], (void*)(base+mOffsets[TYPE_WEIGHT4])); + } + + if (data_mask & MAP_CLOTHWEIGHT) + { + glVertexAttribPointerARB(4, 4, GL_FLOAT, TRUE, LLVertexBuffer::sTypeOffsets[TYPE_CLOTHWEIGHT], (void*)(base + mOffsets[TYPE_CLOTHWEIGHT])); + } + if (data_mask & MAP_VERTEX) + { + glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_VERTEX], (void*)(base + 0)); + } + + llglassertok(); +} + +#else + // virtual (default) void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const { @@ -1344,6 +1527,8 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const llglassertok(); } +#endif + void LLVertexBuffer::markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count) { // TODO: use GL_APPLE_flush_buffer_range here |