diff options
author | Steven Bennetts <steve@lindenlab.com> | 2007-03-02 21:25:50 +0000 |
---|---|---|
committer | Steven Bennetts <steve@lindenlab.com> | 2007-03-02 21:25:50 +0000 |
commit | 4dabd9c0472deb49573fdafef2fa413e59703f19 (patch) | |
tree | 06c680d6a2047e03838d6548bccd26c7baf9d652 /indra/newview/llviewerjointmesh.cpp | |
parent | d4462963c6ba5db2088723bbedc7b60f1184c594 (diff) |
merge release@58699 beta-1-14-0@58707 -> release
Diffstat (limited to 'indra/newview/llviewerjointmesh.cpp')
-rw-r--r-- | indra/newview/llviewerjointmesh.cpp | 824 |
1 files changed, 149 insertions, 675 deletions
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index aec15a8d6c..512ddc8565 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -19,7 +19,6 @@ #include "llfasttimer.h" #include "llagent.h" -#include "llagparray.h" #include "llbox.h" #include "lldrawable.h" #include "lldrawpoolavatar.h" @@ -43,6 +42,10 @@ extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB; #endif extern BOOL gRenderForSelect; +static LLPointer<LLVertexBuffer> sRenderBuffer = NULL; +static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX | + LLVertexBuffer::MAP_NORMAL | + LLVertexBuffer::MAP_TEXCOORD; LLMatrix4 gBlendMat; //----------------------------------------------------------------------------- @@ -375,11 +378,11 @@ void LLViewerJointMesh::setupJoint(LLViewerJoint* current_joint) } // depth-first traversal - for (LLJoint *child_joint = current_joint->mChildren.getFirstData(); - child_joint; - child_joint = current_joint->mChildren.getNextData()) + for (LLJoint::child_list_t::iterator iter = current_joint->mChildren.begin(); + iter != current_joint->mChildren.end(); ++iter) { - setupJoint((LLViewerJoint*)child_joint); + LLViewerJoint* child_joint = (LLViewerJoint*)(*iter); + setupJoint(child_joint); } } @@ -412,7 +415,7 @@ void LLViewerJointMesh::uploadJointMatrices() if (hardware_skinning) { - joint_mat *= gCamera->getModelview(); + joint_mat *= LLDrawPoolAvatar::getModelView(); } gJointMat[joint_num] = joint_mat; gJointRot[joint_num] = joint_mat.getMat3(); @@ -513,620 +516,39 @@ int compare_int(const void *a, const void *b) else return 0; } -#if LL_WINDOWS || (LL_DARWIN && __i386__) // SSE optimizations in avatar code - -#if LL_DARWIN -#include <xmmintrin.h> - - // On Windows, this class is defined in fvec.h. I've only reproduced the parts of it we use here for now. - #pragma pack(push,16) /* Must ensure class & union 16-B aligned */ - class F32vec4 - { - protected: - __m128 vec; - public: - - /* Constructors: __m128, 4 floats, 1 float */ - F32vec4() {} - - /* initialize 4 SP FP with __m128 data type */ - F32vec4(__m128 m) { vec = m;} - - /* Explicitly initialize each of 4 SP FPs with same float */ - explicit F32vec4(float f) { vec = _mm_set_ps1(f); } - }; - #pragma pack(pop) /* 16-B aligned */ - - -#endif - -void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output, - LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights) +void llDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) { - F32 last_weight = F32_MAX; - LLMatrix4 *blend_mat = &gBlendMat; - - for (S32 index = vert_offset; index < vert_offset + vert_count; index++) + if (end-start+1 > (U32) gGLManager.mGLMaxVertexRange || + count > gGLManager.mGLMaxIndexRange) { - F32 w = weights [index]; // register copy of weight - F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever - F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned - - if (w == last_weight) - { - // load input and output vertices, and last blended matrix - __asm { - mov esi, vin - mov edi, vout - - mov edx, blend_mat - movaps xmm4, [edx] - movaps xmm5, [edx+0x10] - movaps xmm6, [edx+0x20] - movaps xmm7, [edx+0x30] - } - } - else - { - last_weight = w; - S32 joint = llfloor(w); - w -= joint; - - LLMatrix4 *m0 = &(gJointMat[joint+1]); - LLMatrix4 *m1 = &(gJointMat[joint+0]); - - // some initial code to load Matrix 0 into SSE registers - __asm { - mov esi, vin - mov edi, vout - - //matrix2 - mov edx, m0 - movaps xmm4, [edx] - movaps xmm5, [edx+0x10] - movaps xmm6, [edx+0x20] - movaps xmm7, [edx+0x30] - }; - - // if w == 1.0f, we don't need to blend. - // but since we do the trick of blending the matrices, here, if w != 1.0, - // we load Matrix 1 into the other 4 SSE registers and blend both matrices - // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w) - - if (w != 1.0f) - { - F32vec4 weight(w); - - __asm { // do blending of matrices instead of verts and normals -- faster - mov edx, m1 - movaps xmm0, [edx] - movaps xmm1, [edx+0x10] - movaps xmm2, [edx+0x20] - movaps xmm3, [edx+0x30] - - subps xmm4, xmm0 // do blend for each matrix column - subps xmm5, xmm1 // diff, then multiply weight and re-add - subps xmm6, xmm2 - subps xmm7, xmm3 - - mulps xmm4, weight - mulps xmm5, weight - mulps xmm6, weight - mulps xmm7, weight - - addps xmm4, xmm0 - addps xmm5, xmm1 - addps xmm6, xmm2 - addps xmm7, xmm3 - }; - } - - __asm { - // save off blended matrix - mov edx, blend_mat; - movaps [edx], xmm4; - movaps [edx+0x10], xmm5; - movaps [edx+0x20], xmm6; - movaps [edx+0x30], xmm7; - } - } - - // now, we have either a blended matrix in xmm4-7 or the original Matrix 0 - // we then multiply each vertex and normal by this one matrix. - - // For SSE2, we would try to keep the original two matrices in other registers - // and avoid reloading them. However, they should ramain in L1 cache in the - // current case. - - // One possible optimization would be to sort the vertices by weight instead - // of just index (we still want to uniqify). If we note when two or more vertices - // share the same weight, we can avoid doing the middle SSE code above and just - // re-use the blended matrix for those vertices - - - // now, we do the actual vertex blending - __asm { - // load Vertex into xmm0. - movaps xmm0, [esi] // change aps to ups when input is no longer 16-baligned - movaps xmm1, xmm0 // copy vector into xmm0 through xmm2 (x,y,z) - movaps xmm2, xmm0 - shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // clone vertex (x) across vector - shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); // clone vertex (y) across vector - shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); // same for Z - mulps xmm0, xmm4 // do the actual matrix multipication for r0 - mulps xmm1, xmm5 // for r1 - mulps xmm2, xmm6 // for r2 - addps xmm0, xmm1 // accumulate - addps xmm0, xmm2 // accumulate - addps xmm0, xmm7 // add in the row 4 which holds the x,y,z translation. assumes w=1 (vertex-w, not weight) - - movaps [edi], xmm0 // store aligned in output array - - // load Normal into xmm0. - movaps xmm0, [esi + 0x10] // change aps to ups when input no longer 16-byte aligned - movaps xmm1, xmm0 // - movaps xmm2, xmm0 - shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // since UV sits between vertex and normal, normal starts at element 1, not 0 - shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); - shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); - mulps xmm0, xmm4 // multiply by matrix - mulps xmm1, xmm5 // multiply - mulps xmm2, xmm6 // multiply - addps xmm0, xmm1 // accumulate - addps xmm0, xmm2 // accumulate. note: do not add translation component to normals, save time too - movaps [edi + 0x10], xmm0 // store aligned - } - - *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot. - } -} - -#elif LL_LINUX - -void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output, - LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights) -{ - assert(0); -} - -#elif LL_DARWIN -// AltiVec versions of the same... - -static inline vector float loadAlign(int offset, vector float *addr) -{ - vector float in0 = vec_ld(offset, addr); - vector float in1 = vec_ld(offset + 16, addr); - vector unsigned char perm = vec_lvsl(0, (unsigned char*)addr); - - return(vec_perm(in0, in1, perm)); -} - -static inline void storeAlign(vector float v, int offset, vector float *addr) -{ - vector float in0 = vec_ld(offset, addr); - vector float in1 = vec_ld(offset + 16, addr); - vector unsigned char perm = vec_lvsr(0, (unsigned char *)addr); - vector float temp = vec_perm(v, v, perm); - vector unsigned char mask = (vector unsigned char)vec_cmpgt(perm, vec_splat_u8(15)); - - in0 = vec_sel(in0, temp, (vector unsigned int)mask); - in1 = vec_sel(temp, in1, (vector unsigned int)mask); - - vec_st(in0, offset, addr); - vec_st(in1, offset + 16, addr); -} - -void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output, - LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights) -{ - F32 last_weight = F32_MAX; -// LLMatrix4 &blend_mat = gBlendMat; - - vector float matrix0_0, matrix0_1, matrix0_2, matrix0_3; - vector unsigned char out0perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F ); -// vector unsigned char out1perm = (vector unsigned char) ( 0x00,0x01,0x02,0x03, 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B ); - vector unsigned char out1perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F ); - - vector float zero = (vector float)vec_splat_u32(0); - - for (U32 index = vert_offset; index < vert_offset + vert_count; index++) - { - F32 w = weights [index]; // register copy of weight - F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever - F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned - - // MBW -- XXX -- If this isn't the case, this code gets more complicated. - if(0x0000000F & (U32)vin) - { - llerrs << "blend_SSE_batch: input not 16-byte aligned!" << llendl; - } - if(0x0000000F & (U32)vout) - { - llerrs << "blend_SSE_batch: output not 16-byte aligned!" << llendl; - } -// if(0x0000000F & (U32)&(blend_mat.mMatrix)) -// { -// llerrs << "blend_SSE_batch: blend_mat not 16-byte aligned!" << llendl; -// } - - if (w == last_weight) - { - // load last blended matrix - // Still loaded from last time through the loop. -// matrix0_0 = vec_ld(0x00, (vector float*)&(blend_mat.mMatrix)); -// matrix0_1 = vec_ld(0x10, (vector float*)&(blend_mat.mMatrix)); -// matrix0_2 = vec_ld(0x20, (vector float*)&(blend_mat.mMatrix)); -// matrix0_3 = vec_ld(0x30, (vector float*)&(blend_mat.mMatrix)); - } - else - { - last_weight = w; - S32 joint = llfloor(w); - w -= joint; - - LLMatrix4 &m0 = gJointMat[joint+1]; - LLMatrix4 &m1 = gJointMat[joint+0]; - - // load Matrix 0 into vector registers - matrix0_0 = vec_ld(0x00, (vector float*)&(m0.mMatrix)); - matrix0_1 = vec_ld(0x10, (vector float*)&(m0.mMatrix)); - matrix0_2 = vec_ld(0x20, (vector float*)&(m0.mMatrix)); - matrix0_3 = vec_ld(0x30, (vector float*)&(m0.mMatrix)); - - // if w == 1.0f, we don't need to blend. - // but since we do the trick of blending the matrices, here, if w != 1.0, - // we load Matrix 1 into the other 4 SSE registers and blend both matrices - // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w) - - if (w != 1.0f) - { - vector float matrix1_0, matrix1_1, matrix1_2, matrix1_3; - - // This loads the weight somewhere in the vector register - vector float weight = vec_lde(0, &(w)); - // and this splats it to all elements. - weight = vec_splat(vec_perm(weight, weight, vec_lvsl(0, &(w))), 0); - - // do blending of matrices instead of verts and normals -- faster - matrix1_0 = vec_ld(0x00, (vector float*)&(m1.mMatrix)); - matrix1_1 = vec_ld(0x10, (vector float*)&(m1.mMatrix)); - matrix1_2 = vec_ld(0x20, (vector float*)&(m1.mMatrix)); - matrix1_3 = vec_ld(0x30, (vector float*)&(m1.mMatrix)); - - // m0[col] = ((m0[col] - m1[col]) * weight) + m1[col]; - matrix0_0 = vec_madd(vec_sub(matrix0_0, matrix1_0), weight, matrix1_0); - matrix0_1 = vec_madd(vec_sub(matrix0_1, matrix1_1), weight, matrix1_1); - matrix0_2 = vec_madd(vec_sub(matrix0_2, matrix1_2), weight, matrix1_2); - matrix0_3 = vec_madd(vec_sub(matrix0_3, matrix1_3), weight, matrix1_3); - } - - // save off blended matrix -// vec_st(matrix0_0, 0x00, (vector float*)&(blend_mat.mMatrix)); -// vec_st(matrix0_1, 0x10, (vector float*)&(blend_mat.mMatrix)); -// vec_st(matrix0_2, 0x20, (vector float*)&(blend_mat.mMatrix)); -// vec_st(matrix0_3, 0x30, (vector float*)&(blend_mat.mMatrix)); - } - - // now, we have either a blended matrix in matrix0_0-3 or the original Matrix 0 - // we then multiply each vertex and normal by this one matrix. - - // For SSE2, we would try to keep the original two matrices in other registers - // and avoid reloading them. However, they should ramain in L1 cache in the - // current case. - - // One possible optimization would be to sort the vertices by weight instead - // of just index (we still want to uniqify). If we note when two or more vertices - // share the same weight, we can avoid doing the middle SSE code above and just - // re-use the blended matrix for those vertices - - - // now, we do the actual vertex blending - - vector float in0 = vec_ld(AVATAR_OFFSET_POS, (vector float*)vin); - vector float in1 = vec_ld(AVATAR_OFFSET_NORMAL, (vector float*)vin); - - // Matrix multiply vertex - vector float out0 = vec_madd - ( - vec_splat(in0, 0), - matrix0_0, - vec_madd - ( - vec_splat(in0, 1), - matrix0_1, - vec_madd - ( - vec_splat(in0, 2), - matrix0_2, - matrix0_3 - ) - ) - ); - - // Matrix multiply normal - vector float out1 = vec_madd - ( - vec_splat(in1, 0), - matrix0_0, - vec_madd - ( - vec_splat(in1, 1), - matrix0_1, - vec_madd - ( - vec_splat(in1, 2), - matrix0_2, - // no translation for normals - (vector float)vec_splat_u32(0) - ) - ) - ); - - // indexed store - vec_stl(vec_perm(in0, out0, out0perm), AVATAR_OFFSET_POS, (vector float*)vout); // Pos - vec_stl(vec_perm(in1, out1, out1perm), AVATAR_OFFSET_NORMAL, (vector float*)vout); // Norm - *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot. - } -} - -#endif - - -void llDrawElementsBatchBlend(const U32 vert_offset, const U32 vert_count, LLFace *face, const S32 index_count, const U32 *indices) -{ - U8* gAGPVertices = gPipeline.bufferGetScratchMemory(); - - if (gAGPVertices) - { - LLStrider<LLVector3> vertices; - LLStrider<LLVector3> normals; - LLStrider<LLVector2> tcoords0; - LLStrider<F32> weights; - - LLStrider<LLVector3> o_vertices; - LLStrider<LLVector3> o_normals; - LLStrider<LLVector2> o_texcoords0; - - - LLStrider<LLVector3> binormals; - LLStrider<LLVector2> o_texcoords1; - // get the source vertices from the draw pool. We index these ourselves, as there was - // no guarantee the indices for a single jointmesh were contigious - - LLDrawPool *pool = face->getPool(); - pool->getVertexStrider (vertices, 0); - pool->getTexCoordStrider (tcoords0, 0, 0); - pool->getNormalStrider (normals, 0); - pool->getBinormalStrider (binormals, 0); - pool->getVertexWeightStrider(weights, 0); - - // load the addresses of the output striders - o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES); - o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES); - o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES); - o_texcoords1= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX1); o_texcoords1.setStride(AVATAR_VERTEX_BYTES); - -#if !LL_LINUX // !!! *TODO: do the linux implementation - if (gGLManager.mSoftwareBlendSSE) - { - // do SSE blend without binormals or extra texcoords - blend_SSE_32_32_batch(vert_offset, vert_count, (float*)gAGPVertices, - vertices, tcoords0, normals, weights); - } - else // fully backwards compatible software blending, no SSE -#endif - { - LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1; - F32 last_weight = F32_MAX; - LLMatrix3 gBlendRotMat; - - { - for (U32 index=vert_offset; index < vert_offset + vert_count; index++) - { - // blend by first matrix - F32 w = weights [index]; - - if (w != last_weight) - { - last_weight = w; - - S32 joint = llfloor(w); - w -= joint; - - LLMatrix4 &m0 = gJointMat[joint+1]; - LLMatrix4 &m1 = gJointMat[joint+0]; - LLMatrix3 &n0 = gJointRot[joint+1]; - LLMatrix3 &n1 = gJointRot[joint+0]; - - if (w == 1.0f) - { - gBlendMat = m0; - gBlendRotMat = n0; - } - else - { - gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); - gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); - gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); - - gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); - gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); - gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); - - gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); - gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); - gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); - - gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); - gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); - gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); - - gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); - gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); - gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); - - gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); - gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); - gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); - - gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); - gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); - gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); - } - } - - // write result - o_vertices [index] = vertices[index] * gBlendMat; - o_normals [index] = normals [index] * gBlendRotMat; - o_texcoords0[index] = tcoords0[index]; - - /* - // Verification code. Leave this here. It's useful for keeping the SSE and non-SSE versions in sync. - LLVector3 temp; - temp = tpos0; - if( (o_vertices[index] - temp).magVecSquared() > 0.001f ) - { - llerrs << "V SSE: " << o_vertices[index] << " v. " << temp << llendl; - } - - temp = tnorm0; - if( (o_normals[index] - temp).magVecSquared() > 0.001f ) - { - llerrs << "N SSE: " << o_normals[index] << " v. " << temp << llendl; - } - - if( (o_texcoords0[index] - tcoords0[index]).magVecSquared() > 0.001f ) - { - llerrs << "T0 SSE: " << o_texcoords0[index] << " v. " << tcoords0[index] << llendl; - } - */ - } - } - } - -#if LL_DARWIN - // *HACK* *CHOKE* *PUKE* - // No way does this belong here. - glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * vert_count, gAGPVertices + (AVATAR_VERTEX_BYTES * vert_offset)); -#endif - glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); // draw it! + glDrawElements(mode,count,type,indices); } else { - glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); + glDrawRangeElements(mode,start,end,count,type,indices); } } - - -//-------------------------------------------------------------------- -// DrawElements - -// works just like glDrawElements, except it assumes GL_TRIANGLES and GL_UNSIGNED_INT indices - -// why? because the destination buffer may not be the AGP buffer and the eyes do not use blending -// separate the eyes into their own drawpools and this code goes away. - -//-------------------------------------------------------------------- - -void llDrawElements(const S32 count, const U32 *indices, LLFace *face) -{ - U8* gAGPVertices = gPipeline.bufferGetScratchMemory(); - - if (gAGPVertices) - { -#if LL_DARWIN - U32 minIndex = indices[0]; - U32 maxIndex = indices[0]; -#endif - { - LLStrider<LLVector3> vertices; - LLStrider<LLVector3> normals; - LLStrider<LLVector2> tcoords; - LLStrider<F32> weights; - - LLStrider<LLVector3> o_vertices; - LLStrider<LLVector3> o_normals; - LLStrider<LLVector2> o_texcoords0; - - LLDrawPool *pool = face->getPool(); - pool->getVertexStrider (vertices,0); - pool->getNormalStrider (normals, 0); - pool->getTexCoordStrider (tcoords, 0); - - o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES); - o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES); - o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES); - - for (S32 i=0; i < count; i++) - { - U32 index = indices[i]; - - o_vertices [index] = vertices[index]; - o_normals [index] = normals [index]; - o_texcoords0[index] = tcoords [index]; - -#if LL_DARWIN - maxIndex = llmax(index, maxIndex); - minIndex = llmin(index, minIndex); -#endif - } - } - -#if LL_DARWIN - // *HACK* *CHOKE* *PUKE* - // No way does this belong here. - glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * (maxIndex + 1 - minIndex), gAGPVertices + (AVATAR_VERTEX_BYTES * minIndex)); -#endif - - glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices); - } - else - { - glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices); - } -} - - //-------------------------------------------------------------------- // LLViewerJointMesh::drawShape() //-------------------------------------------------------------------- -U32 LLViewerJointMesh::drawShape( F32 pixelArea ) +U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass) { - if (!mValid || !mVisible) return 0; - - U32 triangle_count = 0; - - //---------------------------------------------------------------- - // if no mesh bail out now - //---------------------------------------------------------------- - if ( !mMesh || !mFace) + if (!mValid || !mMesh || !mFace || !mVisible || + mFace->mVertexBuffer.isNull() || + mMesh->getNumFaces() == 0) { return 0; } - //---------------------------------------------------------------- - // if we have no faces, bail out now - //---------------------------------------------------------------- - if ( mMesh->getNumFaces() == 0 ) - { - return 0; - } + U32 triangle_count = 0; stop_glerror(); //---------------------------------------------------------------- // setup current color //---------------------------------------------------------------- - if (gRenderForSelect) - { - S32 name = mFace->getDrawable() ? mFace->getDrawable()->getVObj()->mGLName : 0; - LLColor4U color((U8)(name >> 16), (U8)(name >> 8), (U8)name, 0xff); - LLColor4 color_float(color); - - glColor4f(color_float.mV[0], color_float.mV[1], color_float.mV[2], 1.f); - } - else + if (!gRenderForSelect) { if ((mFace->getPool()->getVertexShaderLevel() > 0)) { @@ -1150,7 +572,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) stop_glerror(); -// LLGLSSpecular specular(mSpecular, gRenderForSelect ? 0.0f : mShiny); LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0)); LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0); @@ -1160,11 +581,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) //---------------------------------------------------------------- llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive - //GLuint test_image_name = 0; - - // - LLGLState force_alpha_test(GL_ALPHA_TEST, isTransparent()); - if (mTestImageName) { LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D ); @@ -1217,11 +633,12 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) gImageList.getImage(IMG_DEFAULT_AVATAR)->bind(); } + LLGLDisable tex(gRenderForSelect && !isTransparent() ? GL_TEXTURE_2D : 0); + if (gRenderForSelect) { if (isTransparent()) { - //gGLSObjectSelectDepthAlpha.set(); glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_ARB); glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_REPLACE); glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB, GL_MODULATE); @@ -1232,19 +649,14 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); // GL_TEXTURE_ENV_COLOR is set in renderPass1 glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); } - else - { - //gGLSObjectSelectDepth.set(); - } } else { //---------------------------------------------------------------- // by default, backface culling is enabled //---------------------------------------------------------------- - if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER) + /*if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER) { - //LLGLSPipelineAvatar gls_pipeline_avatar; LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); glClientActiveTextureARB(GL_TEXTURE0_ARB); @@ -1284,7 +696,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) } else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER) { - //gGLSPipelineAvatarAlphaPass1.set(); glAlphaFunc(GL_GREATER, 0.1f); LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D ); @@ -1315,81 +726,48 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA); - } - else if ( isTransparent()) - { - //gGLSNoCullFaces.set(); - } - else - { - //gGLSCullFaces.set(); - } + }*/ } - if (mMesh->hasWeights()) - { - uploadJointMatrices(); + mFace->mVertexBuffer->setBuffer(sRenderMask); + U32 start = mMesh->mFaceVertexOffset; + U32 end = start + mMesh->mFaceVertexCount - 1; + U32 count = mMesh->mFaceIndexCount; + U32* indicesp = ((U32*) mFace->mVertexBuffer->getIndicesPointer()) + mMesh->mFaceIndexOffset; + if (mMesh->hasWeights()) + { if ((mFace->getPool()->getVertexShaderLevel() > 0)) { - glMatrixMode(GL_MODELVIEW); - glPushMatrix(); - glLoadIdentity(); - - glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices()); - - glPopMatrix(); + if (first_pass) + { + uploadJointMatrices(); + } + llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp); } else { - if (mFace->getGeomIndex() < 0) - { - llerrs << "Invalid geometry index in LLViewerJointMesh::drawShape() " << mFace->getGeomIndex() << llendl; - } - - if ((S32)(mMesh->mFaceVertexOffset + mMesh->mFaceVertexCount) > mFace->getGeomCount()) - { - ((LLVOAvatar*)mFace->getDrawable()->getVObj())->mRoot.dump(); - llerrs << "Rendering outside of vertex bounds with mesh " << mName << " at pixel area " << pixelArea << llendl; - } - llDrawElementsBatchBlend(mMesh->mFaceVertexOffset, mMesh->mFaceVertexCount, - mFace, mMesh->mFaceIndexCount, mMesh->getIndices()); + llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp); } - } else { glPushMatrix(); LLMatrix4 jointToWorld = getWorldMatrix(); - jointToWorld *= gCamera->getModelview(); - glLoadMatrixf((GLfloat*)jointToWorld.mMatrix); - - if ((mFace->getPool()->getVertexShaderLevel() > 0)) - { - glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices()); - } - else // this else clause handles non-weighted vertices. llDrawElements just copies and draws - { - llDrawElements(mMesh->mFaceIndexCount, mMesh->getIndices(), mFace); - } - + glMultMatrixf((GLfloat*)jointToWorld.mMatrix); + llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp); glPopMatrix(); } triangle_count += mMesh->mFaceIndexCount; - - if (gRenderForSelect) - { - glColor4fv(mColor.mV); - } - + if (mTestImageName) { glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); } - if (sRenderPass != AVATAR_RENDER_PASS_SINGLE) + /*if (sRenderPass != AVATAR_RENDER_PASS_SINGLE) { LLImageGL::unbindTexture(1, GL_TEXTURE_2D); glActiveTextureARB(GL_TEXTURE1_ARB); @@ -1402,7 +780,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_MODULATE); glAlphaFunc(GL_GREATER, 0.01f); - } + }*/ if (mTexture.notNull()) { if (!mTexture->getClampS()) { @@ -1419,19 +797,20 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea ) //----------------------------------------------------------------------------- // updateFaceSizes() //----------------------------------------------------------------------------- -void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area) +void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area) { // Do a pre-alloc pass to determine sizes of data. if (mMesh && mValid) { mMesh->mFaceVertexOffset = num_vertices; mMesh->mFaceVertexCount = mMesh->getNumVertices(); + mMesh->mFaceIndexOffset = num_indices; + mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices; + mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount; - num_vertices += mMesh->getNumVertices(); - mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices; - - mMesh->getSharedData()->genIndices(mMesh->mFaceVertexOffset); + num_vertices += mMesh->getNumVertices(); + num_indices += mMesh->mFaceIndexCount; } } @@ -1441,9 +820,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area) void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind) { U32 i; - - if (!mValid) return; - + mFace = face; LLStrider<LLVector3> verticesp; @@ -1452,13 +829,15 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w LLStrider<LLVector2> tex_coordsp; LLStrider<F32> vertex_weightsp; LLStrider<LLVector4> clothing_weightsp; + LLStrider<U32> indicesp; // Copy data into the faces from the polymesh data. - if (mMesh) + if (mMesh && mValid) { if (mMesh->getNumVertices()) { S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); + face->mVertexBuffer->getIndexStrider(indicesp); if (-1 == index) { @@ -1474,11 +853,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i); if (damp_wind) { - clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(0,0,0,0); + clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0); } else { - clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(*(mMesh->getClothingWeights() + i)); + clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i)); + } + } + + for (S32 i = 0; i < mMesh->getNumFaces(); i++) + { + for (U32 j = 0; j < 3; j++) + { + U32 k = i*3+j+mMesh->mFaceIndexOffset; + indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset; } } } @@ -1495,6 +883,92 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate) return (valid != activate); } +void LLViewerJointMesh::updateGeometry() +{ + if (mValid && mMesh && mFace && + mMesh->hasWeights() && + mFace->mVertexBuffer.notNull() && + gPipeline.getVertexShaderLevel(LLPipeline::SHADER_AVATAR) == 0) + { + uploadJointMatrices(); + LLStrider<LLVector3> o_vertices; + LLStrider<LLVector3> o_normals; + + //get vertex and normal striders + LLVertexBuffer *buffer = mFace->mVertexBuffer; + buffer->getVertexStrider(o_vertices, 0); + buffer->getNormalStrider(o_normals, 0); + + { + LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1; + F32 last_weight = F32_MAX; + LLMatrix3 gBlendRotMat; + + + for (U32 index= 0; index < mMesh->getNumVertices(); index++) + { + // blend by first matrix + F32 w = mMesh->getWeights()[index]; + + if (w != last_weight) + { + last_weight = w; + + S32 joint = llfloor(w); + w -= joint; + + LLMatrix4 &m0 = gJointMat[joint+1]; + LLMatrix4 &m1 = gJointMat[joint+0]; + LLMatrix3 &n0 = gJointRot[joint+1]; + LLMatrix3 &n1 = gJointRot[joint+0]; + + if (w == 1.0f) + { + gBlendMat = m0; + gBlendRotMat = n0; + } + else + { + gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w); + gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w); + gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w); + + gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w); + gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w); + gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w); + + gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w); + gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w); + gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w); + + gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w); + gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w); + gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w); + + gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w); + gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w); + gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w); + + gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w); + gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w); + gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w); + + gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w); + gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w); + gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w); + } + } + + // write result + U32 bidx = index + mMesh->mFaceVertexOffset; + + o_vertices[bidx] = mMesh->getCoords()[index] * gBlendMat; + o_normals[bidx] = mMesh->getNormals()[index] * gBlendRotMat; + } + } + } +} + void LLViewerJointMesh::dump() { if (mValid) |