summaryrefslogtreecommitdiff
path: root/indra/newview/llviewerjointmesh.cpp
diff options
context:
space:
mode:
authorSteven Bennetts <steve@lindenlab.com>2007-03-02 21:25:50 +0000
committerSteven Bennetts <steve@lindenlab.com>2007-03-02 21:25:50 +0000
commit4dabd9c0472deb49573fdafef2fa413e59703f19 (patch)
tree06c680d6a2047e03838d6548bccd26c7baf9d652 /indra/newview/llviewerjointmesh.cpp
parentd4462963c6ba5db2088723bbedc7b60f1184c594 (diff)
merge release@58699 beta-1-14-0@58707 -> release
Diffstat (limited to 'indra/newview/llviewerjointmesh.cpp')
-rw-r--r--indra/newview/llviewerjointmesh.cpp824
1 files changed, 149 insertions, 675 deletions
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index aec15a8d6c..512ddc8565 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -19,7 +19,6 @@
#include "llfasttimer.h"
#include "llagent.h"
-#include "llagparray.h"
#include "llbox.h"
#include "lldrawable.h"
#include "lldrawpoolavatar.h"
@@ -43,6 +42,10 @@ extern PFNGLVERTEXBLENDARBPROC glVertexBlendARB;
#endif
extern BOOL gRenderForSelect;
+static LLPointer<LLVertexBuffer> sRenderBuffer = NULL;
+static const U32 sRenderMask = LLVertexBuffer::MAP_VERTEX |
+ LLVertexBuffer::MAP_NORMAL |
+ LLVertexBuffer::MAP_TEXCOORD;
LLMatrix4 gBlendMat;
//-----------------------------------------------------------------------------
@@ -375,11 +378,11 @@ void LLViewerJointMesh::setupJoint(LLViewerJoint* current_joint)
}
// depth-first traversal
- for (LLJoint *child_joint = current_joint->mChildren.getFirstData();
- child_joint;
- child_joint = current_joint->mChildren.getNextData())
+ for (LLJoint::child_list_t::iterator iter = current_joint->mChildren.begin();
+ iter != current_joint->mChildren.end(); ++iter)
{
- setupJoint((LLViewerJoint*)child_joint);
+ LLViewerJoint* child_joint = (LLViewerJoint*)(*iter);
+ setupJoint(child_joint);
}
}
@@ -412,7 +415,7 @@ void LLViewerJointMesh::uploadJointMatrices()
if (hardware_skinning)
{
- joint_mat *= gCamera->getModelview();
+ joint_mat *= LLDrawPoolAvatar::getModelView();
}
gJointMat[joint_num] = joint_mat;
gJointRot[joint_num] = joint_mat.getMat3();
@@ -513,620 +516,39 @@ int compare_int(const void *a, const void *b)
else return 0;
}
-#if LL_WINDOWS || (LL_DARWIN && __i386__) // SSE optimizations in avatar code
-
-#if LL_DARWIN
-#include <xmmintrin.h>
-
- // On Windows, this class is defined in fvec.h. I've only reproduced the parts of it we use here for now.
- #pragma pack(push,16) /* Must ensure class & union 16-B aligned */
- class F32vec4
- {
- protected:
- __m128 vec;
- public:
-
- /* Constructors: __m128, 4 floats, 1 float */
- F32vec4() {}
-
- /* initialize 4 SP FP with __m128 data type */
- F32vec4(__m128 m) { vec = m;}
-
- /* Explicitly initialize each of 4 SP FPs with same float */
- explicit F32vec4(float f) { vec = _mm_set_ps1(f); }
- };
- #pragma pack(pop) /* 16-B aligned */
-
-
-#endif
-
-void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
- LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
+void llDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices)
{
- F32 last_weight = F32_MAX;
- LLMatrix4 *blend_mat = &gBlendMat;
-
- for (S32 index = vert_offset; index < vert_offset + vert_count; index++)
+ if (end-start+1 > (U32) gGLManager.mGLMaxVertexRange ||
+ count > gGLManager.mGLMaxIndexRange)
{
- F32 w = weights [index]; // register copy of weight
- F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever
- F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned
-
- if (w == last_weight)
- {
- // load input and output vertices, and last blended matrix
- __asm {
- mov esi, vin
- mov edi, vout
-
- mov edx, blend_mat
- movaps xmm4, [edx]
- movaps xmm5, [edx+0x10]
- movaps xmm6, [edx+0x20]
- movaps xmm7, [edx+0x30]
- }
- }
- else
- {
- last_weight = w;
- S32 joint = llfloor(w);
- w -= joint;
-
- LLMatrix4 *m0 = &(gJointMat[joint+1]);
- LLMatrix4 *m1 = &(gJointMat[joint+0]);
-
- // some initial code to load Matrix 0 into SSE registers
- __asm {
- mov esi, vin
- mov edi, vout
-
- //matrix2
- mov edx, m0
- movaps xmm4, [edx]
- movaps xmm5, [edx+0x10]
- movaps xmm6, [edx+0x20]
- movaps xmm7, [edx+0x30]
- };
-
- // if w == 1.0f, we don't need to blend.
- // but since we do the trick of blending the matrices, here, if w != 1.0,
- // we load Matrix 1 into the other 4 SSE registers and blend both matrices
- // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w)
-
- if (w != 1.0f)
- {
- F32vec4 weight(w);
-
- __asm { // do blending of matrices instead of verts and normals -- faster
- mov edx, m1
- movaps xmm0, [edx]
- movaps xmm1, [edx+0x10]
- movaps xmm2, [edx+0x20]
- movaps xmm3, [edx+0x30]
-
- subps xmm4, xmm0 // do blend for each matrix column
- subps xmm5, xmm1 // diff, then multiply weight and re-add
- subps xmm6, xmm2
- subps xmm7, xmm3
-
- mulps xmm4, weight
- mulps xmm5, weight
- mulps xmm6, weight
- mulps xmm7, weight
-
- addps xmm4, xmm0
- addps xmm5, xmm1
- addps xmm6, xmm2
- addps xmm7, xmm3
- };
- }
-
- __asm {
- // save off blended matrix
- mov edx, blend_mat;
- movaps [edx], xmm4;
- movaps [edx+0x10], xmm5;
- movaps [edx+0x20], xmm6;
- movaps [edx+0x30], xmm7;
- }
- }
-
- // now, we have either a blended matrix in xmm4-7 or the original Matrix 0
- // we then multiply each vertex and normal by this one matrix.
-
- // For SSE2, we would try to keep the original two matrices in other registers
- // and avoid reloading them. However, they should ramain in L1 cache in the
- // current case.
-
- // One possible optimization would be to sort the vertices by weight instead
- // of just index (we still want to uniqify). If we note when two or more vertices
- // share the same weight, we can avoid doing the middle SSE code above and just
- // re-use the blended matrix for those vertices
-
-
- // now, we do the actual vertex blending
- __asm {
- // load Vertex into xmm0.
- movaps xmm0, [esi] // change aps to ups when input is no longer 16-baligned
- movaps xmm1, xmm0 // copy vector into xmm0 through xmm2 (x,y,z)
- movaps xmm2, xmm0
- shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // clone vertex (x) across vector
- shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1); // clone vertex (y) across vector
- shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2); // same for Z
- mulps xmm0, xmm4 // do the actual matrix multipication for r0
- mulps xmm1, xmm5 // for r1
- mulps xmm2, xmm6 // for r2
- addps xmm0, xmm1 // accumulate
- addps xmm0, xmm2 // accumulate
- addps xmm0, xmm7 // add in the row 4 which holds the x,y,z translation. assumes w=1 (vertex-w, not weight)
-
- movaps [edi], xmm0 // store aligned in output array
-
- // load Normal into xmm0.
- movaps xmm0, [esi + 0x10] // change aps to ups when input no longer 16-byte aligned
- movaps xmm1, xmm0 //
- movaps xmm2, xmm0
- shufps xmm0, xmm0, _MM_SHUFFLE(0,0,0,0); // since UV sits between vertex and normal, normal starts at element 1, not 0
- shufps xmm1, xmm1, _MM_SHUFFLE(1,1,1,1);
- shufps xmm2, xmm2, _MM_SHUFFLE(2,2,2,2);
- mulps xmm0, xmm4 // multiply by matrix
- mulps xmm1, xmm5 // multiply
- mulps xmm2, xmm6 // multiply
- addps xmm0, xmm1 // accumulate
- addps xmm0, xmm2 // accumulate. note: do not add translation component to normals, save time too
- movaps [edi + 0x10], xmm0 // store aligned
- }
-
- *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot.
- }
-}
-
-#elif LL_LINUX
-
-void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
- LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
-{
- assert(0);
-}
-
-#elif LL_DARWIN
-// AltiVec versions of the same...
-
-static inline vector float loadAlign(int offset, vector float *addr)
-{
- vector float in0 = vec_ld(offset, addr);
- vector float in1 = vec_ld(offset + 16, addr);
- vector unsigned char perm = vec_lvsl(0, (unsigned char*)addr);
-
- return(vec_perm(in0, in1, perm));
-}
-
-static inline void storeAlign(vector float v, int offset, vector float *addr)
-{
- vector float in0 = vec_ld(offset, addr);
- vector float in1 = vec_ld(offset + 16, addr);
- vector unsigned char perm = vec_lvsr(0, (unsigned char *)addr);
- vector float temp = vec_perm(v, v, perm);
- vector unsigned char mask = (vector unsigned char)vec_cmpgt(perm, vec_splat_u8(15));
-
- in0 = vec_sel(in0, temp, (vector unsigned int)mask);
- in1 = vec_sel(temp, in1, (vector unsigned int)mask);
-
- vec_st(in0, offset, addr);
- vec_st(in1, offset + 16, addr);
-}
-
-void blend_SSE_32_32_batch(const int vert_offset, const int vert_count, float* output,
- LLStrider<LLVector3>& vertices, LLStrider<LLVector2>& texcoords, LLStrider<LLVector3>& normals, LLStrider<F32>& weights)
-{
- F32 last_weight = F32_MAX;
-// LLMatrix4 &blend_mat = gBlendMat;
-
- vector float matrix0_0, matrix0_1, matrix0_2, matrix0_3;
- vector unsigned char out0perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F );
-// vector unsigned char out1perm = (vector unsigned char) ( 0x00,0x01,0x02,0x03, 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B );
- vector unsigned char out1perm = (vector unsigned char) ( 0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17, 0x18,0x19,0x1A,0x1B, 0x0C,0x0D,0x0E,0x0F );
-
- vector float zero = (vector float)vec_splat_u32(0);
-
- for (U32 index = vert_offset; index < vert_offset + vert_count; index++)
- {
- F32 w = weights [index]; // register copy of weight
- F32 *vin = &vertices[index].mV[0]; // pointer to input vertex data, assumed to be V3+T2+N3+whatever
- F32 *vout = output + index * (AVATAR_VERTEX_BYTES/sizeof(F32)); // pointer to the output vertex data, assumed to be 16 byte aligned
-
- // MBW -- XXX -- If this isn't the case, this code gets more complicated.
- if(0x0000000F & (U32)vin)
- {
- llerrs << "blend_SSE_batch: input not 16-byte aligned!" << llendl;
- }
- if(0x0000000F & (U32)vout)
- {
- llerrs << "blend_SSE_batch: output not 16-byte aligned!" << llendl;
- }
-// if(0x0000000F & (U32)&(blend_mat.mMatrix))
-// {
-// llerrs << "blend_SSE_batch: blend_mat not 16-byte aligned!" << llendl;
-// }
-
- if (w == last_weight)
- {
- // load last blended matrix
- // Still loaded from last time through the loop.
-// matrix0_0 = vec_ld(0x00, (vector float*)&(blend_mat.mMatrix));
-// matrix0_1 = vec_ld(0x10, (vector float*)&(blend_mat.mMatrix));
-// matrix0_2 = vec_ld(0x20, (vector float*)&(blend_mat.mMatrix));
-// matrix0_3 = vec_ld(0x30, (vector float*)&(blend_mat.mMatrix));
- }
- else
- {
- last_weight = w;
- S32 joint = llfloor(w);
- w -= joint;
-
- LLMatrix4 &m0 = gJointMat[joint+1];
- LLMatrix4 &m1 = gJointMat[joint+0];
-
- // load Matrix 0 into vector registers
- matrix0_0 = vec_ld(0x00, (vector float*)&(m0.mMatrix));
- matrix0_1 = vec_ld(0x10, (vector float*)&(m0.mMatrix));
- matrix0_2 = vec_ld(0x20, (vector float*)&(m0.mMatrix));
- matrix0_3 = vec_ld(0x30, (vector float*)&(m0.mMatrix));
-
- // if w == 1.0f, we don't need to blend.
- // but since we do the trick of blending the matrices, here, if w != 1.0,
- // we load Matrix 1 into the other 4 SSE registers and blend both matrices
- // based on the weight (which we load ingo a 16-byte aligned vector: w,w,w,w)
-
- if (w != 1.0f)
- {
- vector float matrix1_0, matrix1_1, matrix1_2, matrix1_3;
-
- // This loads the weight somewhere in the vector register
- vector float weight = vec_lde(0, &(w));
- // and this splats it to all elements.
- weight = vec_splat(vec_perm(weight, weight, vec_lvsl(0, &(w))), 0);
-
- // do blending of matrices instead of verts and normals -- faster
- matrix1_0 = vec_ld(0x00, (vector float*)&(m1.mMatrix));
- matrix1_1 = vec_ld(0x10, (vector float*)&(m1.mMatrix));
- matrix1_2 = vec_ld(0x20, (vector float*)&(m1.mMatrix));
- matrix1_3 = vec_ld(0x30, (vector float*)&(m1.mMatrix));
-
- // m0[col] = ((m0[col] - m1[col]) * weight) + m1[col];
- matrix0_0 = vec_madd(vec_sub(matrix0_0, matrix1_0), weight, matrix1_0);
- matrix0_1 = vec_madd(vec_sub(matrix0_1, matrix1_1), weight, matrix1_1);
- matrix0_2 = vec_madd(vec_sub(matrix0_2, matrix1_2), weight, matrix1_2);
- matrix0_3 = vec_madd(vec_sub(matrix0_3, matrix1_3), weight, matrix1_3);
- }
-
- // save off blended matrix
-// vec_st(matrix0_0, 0x00, (vector float*)&(blend_mat.mMatrix));
-// vec_st(matrix0_1, 0x10, (vector float*)&(blend_mat.mMatrix));
-// vec_st(matrix0_2, 0x20, (vector float*)&(blend_mat.mMatrix));
-// vec_st(matrix0_3, 0x30, (vector float*)&(blend_mat.mMatrix));
- }
-
- // now, we have either a blended matrix in matrix0_0-3 or the original Matrix 0
- // we then multiply each vertex and normal by this one matrix.
-
- // For SSE2, we would try to keep the original two matrices in other registers
- // and avoid reloading them. However, they should ramain in L1 cache in the
- // current case.
-
- // One possible optimization would be to sort the vertices by weight instead
- // of just index (we still want to uniqify). If we note when two or more vertices
- // share the same weight, we can avoid doing the middle SSE code above and just
- // re-use the blended matrix for those vertices
-
-
- // now, we do the actual vertex blending
-
- vector float in0 = vec_ld(AVATAR_OFFSET_POS, (vector float*)vin);
- vector float in1 = vec_ld(AVATAR_OFFSET_NORMAL, (vector float*)vin);
-
- // Matrix multiply vertex
- vector float out0 = vec_madd
- (
- vec_splat(in0, 0),
- matrix0_0,
- vec_madd
- (
- vec_splat(in0, 1),
- matrix0_1,
- vec_madd
- (
- vec_splat(in0, 2),
- matrix0_2,
- matrix0_3
- )
- )
- );
-
- // Matrix multiply normal
- vector float out1 = vec_madd
- (
- vec_splat(in1, 0),
- matrix0_0,
- vec_madd
- (
- vec_splat(in1, 1),
- matrix0_1,
- vec_madd
- (
- vec_splat(in1, 2),
- matrix0_2,
- // no translation for normals
- (vector float)vec_splat_u32(0)
- )
- )
- );
-
- // indexed store
- vec_stl(vec_perm(in0, out0, out0perm), AVATAR_OFFSET_POS, (vector float*)vout); // Pos
- vec_stl(vec_perm(in1, out1, out1perm), AVATAR_OFFSET_NORMAL, (vector float*)vout); // Norm
- *(LLVector2*)(vout + (AVATAR_OFFSET_TEX0/sizeof(F32))) = texcoords[index]; // write texcoord into appropriate spot.
- }
-}
-
-#endif
-
-
-void llDrawElementsBatchBlend(const U32 vert_offset, const U32 vert_count, LLFace *face, const S32 index_count, const U32 *indices)
-{
- U8* gAGPVertices = gPipeline.bufferGetScratchMemory();
-
- if (gAGPVertices)
- {
- LLStrider<LLVector3> vertices;
- LLStrider<LLVector3> normals;
- LLStrider<LLVector2> tcoords0;
- LLStrider<F32> weights;
-
- LLStrider<LLVector3> o_vertices;
- LLStrider<LLVector3> o_normals;
- LLStrider<LLVector2> o_texcoords0;
-
-
- LLStrider<LLVector3> binormals;
- LLStrider<LLVector2> o_texcoords1;
- // get the source vertices from the draw pool. We index these ourselves, as there was
- // no guarantee the indices for a single jointmesh were contigious
-
- LLDrawPool *pool = face->getPool();
- pool->getVertexStrider (vertices, 0);
- pool->getTexCoordStrider (tcoords0, 0, 0);
- pool->getNormalStrider (normals, 0);
- pool->getBinormalStrider (binormals, 0);
- pool->getVertexWeightStrider(weights, 0);
-
- // load the addresses of the output striders
- o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES);
- o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES);
- o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES);
- o_texcoords1= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX1); o_texcoords1.setStride(AVATAR_VERTEX_BYTES);
-
-#if !LL_LINUX // !!! *TODO: do the linux implementation
- if (gGLManager.mSoftwareBlendSSE)
- {
- // do SSE blend without binormals or extra texcoords
- blend_SSE_32_32_batch(vert_offset, vert_count, (float*)gAGPVertices,
- vertices, tcoords0, normals, weights);
- }
- else // fully backwards compatible software blending, no SSE
-#endif
- {
- LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1;
- F32 last_weight = F32_MAX;
- LLMatrix3 gBlendRotMat;
-
- {
- for (U32 index=vert_offset; index < vert_offset + vert_count; index++)
- {
- // blend by first matrix
- F32 w = weights [index];
-
- if (w != last_weight)
- {
- last_weight = w;
-
- S32 joint = llfloor(w);
- w -= joint;
-
- LLMatrix4 &m0 = gJointMat[joint+1];
- LLMatrix4 &m1 = gJointMat[joint+0];
- LLMatrix3 &n0 = gJointRot[joint+1];
- LLMatrix3 &n1 = gJointRot[joint+0];
-
- if (w == 1.0f)
- {
- gBlendMat = m0;
- gBlendRotMat = n0;
- }
- else
- {
- gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
- gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
- gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
-
- gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
- gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
- gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
-
- gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
- gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
- gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
-
- gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
- gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
- gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
-
- gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
- gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
- gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
-
- gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
- gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
- gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
-
- gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
- gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
- gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
- }
- }
-
- // write result
- o_vertices [index] = vertices[index] * gBlendMat;
- o_normals [index] = normals [index] * gBlendRotMat;
- o_texcoords0[index] = tcoords0[index];
-
- /*
- // Verification code. Leave this here. It's useful for keeping the SSE and non-SSE versions in sync.
- LLVector3 temp;
- temp = tpos0;
- if( (o_vertices[index] - temp).magVecSquared() > 0.001f )
- {
- llerrs << "V SSE: " << o_vertices[index] << " v. " << temp << llendl;
- }
-
- temp = tnorm0;
- if( (o_normals[index] - temp).magVecSquared() > 0.001f )
- {
- llerrs << "N SSE: " << o_normals[index] << " v. " << temp << llendl;
- }
-
- if( (o_texcoords0[index] - tcoords0[index]).magVecSquared() > 0.001f )
- {
- llerrs << "T0 SSE: " << o_texcoords0[index] << " v. " << tcoords0[index] << llendl;
- }
- */
- }
- }
- }
-
-#if LL_DARWIN
- // *HACK* *CHOKE* *PUKE*
- // No way does this belong here.
- glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * vert_count, gAGPVertices + (AVATAR_VERTEX_BYTES * vert_offset));
-#endif
- glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices); // draw it!
+ glDrawElements(mode,count,type,indices);
}
else
{
- glDrawElements(GL_TRIANGLES, index_count, GL_UNSIGNED_INT, indices);
+ glDrawRangeElements(mode,start,end,count,type,indices);
}
}
-
-
-//--------------------------------------------------------------------
-// DrawElements
-
-// works just like glDrawElements, except it assumes GL_TRIANGLES and GL_UNSIGNED_INT indices
-
-// why? because the destination buffer may not be the AGP buffer and the eyes do not use blending
-// separate the eyes into their own drawpools and this code goes away.
-
-//--------------------------------------------------------------------
-
-void llDrawElements(const S32 count, const U32 *indices, LLFace *face)
-{
- U8* gAGPVertices = gPipeline.bufferGetScratchMemory();
-
- if (gAGPVertices)
- {
-#if LL_DARWIN
- U32 minIndex = indices[0];
- U32 maxIndex = indices[0];
-#endif
- {
- LLStrider<LLVector3> vertices;
- LLStrider<LLVector3> normals;
- LLStrider<LLVector2> tcoords;
- LLStrider<F32> weights;
-
- LLStrider<LLVector3> o_vertices;
- LLStrider<LLVector3> o_normals;
- LLStrider<LLVector2> o_texcoords0;
-
- LLDrawPool *pool = face->getPool();
- pool->getVertexStrider (vertices,0);
- pool->getNormalStrider (normals, 0);
- pool->getTexCoordStrider (tcoords, 0);
-
- o_vertices = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_POS); o_vertices.setStride( AVATAR_VERTEX_BYTES);
- o_normals = (LLVector3*)(gAGPVertices + AVATAR_OFFSET_NORMAL); o_normals.setStride( AVATAR_VERTEX_BYTES);
- o_texcoords0= (LLVector2*)(gAGPVertices + AVATAR_OFFSET_TEX0); o_texcoords0.setStride(AVATAR_VERTEX_BYTES);
-
- for (S32 i=0; i < count; i++)
- {
- U32 index = indices[i];
-
- o_vertices [index] = vertices[index];
- o_normals [index] = normals [index];
- o_texcoords0[index] = tcoords [index];
-
-#if LL_DARWIN
- maxIndex = llmax(index, maxIndex);
- minIndex = llmin(index, minIndex);
-#endif
- }
- }
-
-#if LL_DARWIN
- // *HACK* *CHOKE* *PUKE*
- // No way does this belong here.
- glFlushVertexArrayRangeAPPLE(AVATAR_VERTEX_BYTES * (maxIndex + 1 - minIndex), gAGPVertices + (AVATAR_VERTEX_BYTES * minIndex));
-#endif
-
- glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices);
- }
- else
- {
- glDrawElements(GL_TRIANGLES, count, GL_UNSIGNED_INT, indices);
- }
-}
-
-
//--------------------------------------------------------------------
// LLViewerJointMesh::drawShape()
//--------------------------------------------------------------------
-U32 LLViewerJointMesh::drawShape( F32 pixelArea )
+U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass)
{
- if (!mValid || !mVisible) return 0;
-
- U32 triangle_count = 0;
-
- //----------------------------------------------------------------
- // if no mesh bail out now
- //----------------------------------------------------------------
- if ( !mMesh || !mFace)
+ if (!mValid || !mMesh || !mFace || !mVisible ||
+ mFace->mVertexBuffer.isNull() ||
+ mMesh->getNumFaces() == 0)
{
return 0;
}
- //----------------------------------------------------------------
- // if we have no faces, bail out now
- //----------------------------------------------------------------
- if ( mMesh->getNumFaces() == 0 )
- {
- return 0;
- }
+ U32 triangle_count = 0;
stop_glerror();
//----------------------------------------------------------------
// setup current color
//----------------------------------------------------------------
- if (gRenderForSelect)
- {
- S32 name = mFace->getDrawable() ? mFace->getDrawable()->getVObj()->mGLName : 0;
- LLColor4U color((U8)(name >> 16), (U8)(name >> 8), (U8)name, 0xff);
- LLColor4 color_float(color);
-
- glColor4f(color_float.mV[0], color_float.mV[1], color_float.mV[2], 1.f);
- }
- else
+ if (!gRenderForSelect)
{
if ((mFace->getPool()->getVertexShaderLevel() > 0))
{
@@ -1150,7 +572,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
stop_glerror();
-// LLGLSSpecular specular(mSpecular, gRenderForSelect ? 0.0f : mShiny);
LLGLSSpecular specular(LLColor4(1.f,1.f,1.f,1.f), gRenderForSelect ? 0.0f : mShiny && !(mFace->getPool()->getVertexShaderLevel() > 0));
LLGLEnable texture_2d((gRenderForSelect && isTransparent()) ? GL_TEXTURE_2D : 0);
@@ -1160,11 +581,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
//----------------------------------------------------------------
llassert( !(mTexture.notNull() && mLayerSet) ); // mutually exclusive
- //GLuint test_image_name = 0;
-
- //
- LLGLState force_alpha_test(GL_ALPHA_TEST, isTransparent());
-
if (mTestImageName)
{
LLImageGL::bindExternalTexture( mTestImageName, 0, GL_TEXTURE_2D );
@@ -1217,11 +633,12 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
gImageList.getImage(IMG_DEFAULT_AVATAR)->bind();
}
+ LLGLDisable tex(gRenderForSelect && !isTransparent() ? GL_TEXTURE_2D : 0);
+
if (gRenderForSelect)
{
if (isTransparent())
{
- //gGLSObjectSelectDepthAlpha.set();
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE_ARB);
glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_REPLACE);
glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA_ARB, GL_MODULATE);
@@ -1232,19 +649,14 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE); // GL_TEXTURE_ENV_COLOR is set in renderPass1
glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA);
}
- else
- {
- //gGLSObjectSelectDepth.set();
- }
}
else
{
//----------------------------------------------------------------
// by default, backface culling is enabled
//----------------------------------------------------------------
- if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER)
+ /*if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_INNER)
{
- //LLGLSPipelineAvatar gls_pipeline_avatar;
LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D );
glClientActiveTextureARB(GL_TEXTURE0_ARB);
@@ -1284,7 +696,6 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
}
else if (sRenderPass == AVATAR_RENDER_PASS_CLOTHING_OUTER)
{
- //gGLSPipelineAvatarAlphaPass1.set();
glAlphaFunc(GL_GREATER, 0.1f);
LLImageGL::bindExternalTexture( sClothingMaskImageName, 1, GL_TEXTURE_2D );
@@ -1315,81 +726,48 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
glTexEnvi(GL_TEXTURE_ENV, GL_SOURCE0_ALPHA_ARB, GL_TEXTURE);
glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA_ARB, GL_SRC_ALPHA);
- }
- else if ( isTransparent())
- {
- //gGLSNoCullFaces.set();
- }
- else
- {
- //gGLSCullFaces.set();
- }
+ }*/
}
- if (mMesh->hasWeights())
- {
- uploadJointMatrices();
+ mFace->mVertexBuffer->setBuffer(sRenderMask);
+ U32 start = mMesh->mFaceVertexOffset;
+ U32 end = start + mMesh->mFaceVertexCount - 1;
+ U32 count = mMesh->mFaceIndexCount;
+ U32* indicesp = ((U32*) mFace->mVertexBuffer->getIndicesPointer()) + mMesh->mFaceIndexOffset;
+ if (mMesh->hasWeights())
+ {
if ((mFace->getPool()->getVertexShaderLevel() > 0))
{
- glMatrixMode(GL_MODELVIEW);
- glPushMatrix();
- glLoadIdentity();
-
- glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices());
-
- glPopMatrix();
+ if (first_pass)
+ {
+ uploadJointMatrices();
+ }
+ llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
}
else
{
- if (mFace->getGeomIndex() < 0)
- {
- llerrs << "Invalid geometry index in LLViewerJointMesh::drawShape() " << mFace->getGeomIndex() << llendl;
- }
-
- if ((S32)(mMesh->mFaceVertexOffset + mMesh->mFaceVertexCount) > mFace->getGeomCount())
- {
- ((LLVOAvatar*)mFace->getDrawable()->getVObj())->mRoot.dump();
- llerrs << "Rendering outside of vertex bounds with mesh " << mName << " at pixel area " << pixelArea << llendl;
- }
- llDrawElementsBatchBlend(mMesh->mFaceVertexOffset, mMesh->mFaceVertexCount,
- mFace, mMesh->mFaceIndexCount, mMesh->getIndices());
+ llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
}
-
}
else
{
glPushMatrix();
LLMatrix4 jointToWorld = getWorldMatrix();
- jointToWorld *= gCamera->getModelview();
- glLoadMatrixf((GLfloat*)jointToWorld.mMatrix);
-
- if ((mFace->getPool()->getVertexShaderLevel() > 0))
- {
- glDrawElements(GL_TRIANGLES, mMesh->mFaceIndexCount, GL_UNSIGNED_INT, mMesh->getIndices());
- }
- else // this else clause handles non-weighted vertices. llDrawElements just copies and draws
- {
- llDrawElements(mMesh->mFaceIndexCount, mMesh->getIndices(), mFace);
- }
-
+ glMultMatrixf((GLfloat*)jointToWorld.mMatrix);
+ llDrawRangeElements(GL_TRIANGLES, start, end, count, GL_UNSIGNED_INT, indicesp);
glPopMatrix();
}
triangle_count += mMesh->mFaceIndexCount;
-
- if (gRenderForSelect)
- {
- glColor4fv(mColor.mV);
- }
-
+
if (mTestImageName)
{
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
}
- if (sRenderPass != AVATAR_RENDER_PASS_SINGLE)
+ /*if (sRenderPass != AVATAR_RENDER_PASS_SINGLE)
{
LLImageGL::unbindTexture(1, GL_TEXTURE_2D);
glActiveTextureARB(GL_TEXTURE1_ARB);
@@ -1402,7 +780,7 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB_ARB, GL_MODULATE);
glAlphaFunc(GL_GREATER, 0.01f);
- }
+ }*/
if (mTexture.notNull()) {
if (!mTexture->getClampS()) {
@@ -1419,19 +797,20 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea )
//-----------------------------------------------------------------------------
// updateFaceSizes()
//-----------------------------------------------------------------------------
-void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area)
+void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area)
{
// Do a pre-alloc pass to determine sizes of data.
if (mMesh && mValid)
{
mMesh->mFaceVertexOffset = num_vertices;
mMesh->mFaceVertexCount = mMesh->getNumVertices();
+ mMesh->mFaceIndexOffset = num_indices;
+ mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices;
+
mMesh->getReferenceMesh()->mCurVertexCount = mMesh->mFaceVertexCount;
- num_vertices += mMesh->getNumVertices();
- mMesh->mFaceIndexCount = mMesh->getSharedData()->mNumTriangleIndices;
-
- mMesh->getSharedData()->genIndices(mMesh->mFaceVertexOffset);
+ num_vertices += mMesh->getNumVertices();
+ num_indices += mMesh->mFaceIndexCount;
}
}
@@ -1441,9 +820,7 @@ void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, F32 pixel_area)
void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind)
{
U32 i;
-
- if (!mValid) return;
-
+
mFace = face;
LLStrider<LLVector3> verticesp;
@@ -1452,13 +829,15 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
LLStrider<LLVector2> tex_coordsp;
LLStrider<F32> vertex_weightsp;
LLStrider<LLVector4> clothing_weightsp;
+ LLStrider<U32> indicesp;
// Copy data into the faces from the polymesh data.
- if (mMesh)
+ if (mMesh && mValid)
{
if (mMesh->getNumVertices())
{
S32 index = face->getGeometryAvatar(verticesp, normalsp, binormalsp, tex_coordsp, vertex_weightsp, clothing_weightsp);
+ face->mVertexBuffer->getIndexStrider(indicesp);
if (-1 == index)
{
@@ -1474,11 +853,20 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
vertex_weightsp[mMesh->mFaceVertexOffset + i] = *(mMesh->getWeights() + i);
if (damp_wind)
{
- clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(0,0,0,0);
+ clothing_weightsp[mMesh->mFaceVertexOffset + i] = LLVector4(0,0,0,0);
}
else
{
- clothing_weightsp[mMesh->mFaceVertexOffset + i].setVec(*(mMesh->getClothingWeights() + i));
+ clothing_weightsp[mMesh->mFaceVertexOffset + i] = (*(mMesh->getClothingWeights() + i));
+ }
+ }
+
+ for (S32 i = 0; i < mMesh->getNumFaces(); i++)
+ {
+ for (U32 j = 0; j < 3; j++)
+ {
+ U32 k = i*3+j+mMesh->mFaceIndexOffset;
+ indicesp[k] = mMesh->getFaces()[i][j] + mMesh->mFaceVertexOffset;
}
}
}
@@ -1495,6 +883,92 @@ BOOL LLViewerJointMesh::updateLOD(F32 pixel_area, BOOL activate)
return (valid != activate);
}
+void LLViewerJointMesh::updateGeometry()
+{
+ if (mValid && mMesh && mFace &&
+ mMesh->hasWeights() &&
+ mFace->mVertexBuffer.notNull() &&
+ gPipeline.getVertexShaderLevel(LLPipeline::SHADER_AVATAR) == 0)
+ {
+ uploadJointMatrices();
+ LLStrider<LLVector3> o_vertices;
+ LLStrider<LLVector3> o_normals;
+
+ //get vertex and normal striders
+ LLVertexBuffer *buffer = mFace->mVertexBuffer;
+ buffer->getVertexStrider(o_vertices, 0);
+ buffer->getNormalStrider(o_normals, 0);
+
+ {
+ LLVector4 tpos0, tnorm0, tpos1, tnorm1, tbinorm0, tbinorm1;
+ F32 last_weight = F32_MAX;
+ LLMatrix3 gBlendRotMat;
+
+
+ for (U32 index= 0; index < mMesh->getNumVertices(); index++)
+ {
+ // blend by first matrix
+ F32 w = mMesh->getWeights()[index];
+
+ if (w != last_weight)
+ {
+ last_weight = w;
+
+ S32 joint = llfloor(w);
+ w -= joint;
+
+ LLMatrix4 &m0 = gJointMat[joint+1];
+ LLMatrix4 &m1 = gJointMat[joint+0];
+ LLMatrix3 &n0 = gJointRot[joint+1];
+ LLMatrix3 &n1 = gJointRot[joint+0];
+
+ if (w == 1.0f)
+ {
+ gBlendMat = m0;
+ gBlendRotMat = n0;
+ }
+ else
+ {
+ gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX], m0.mMatrix[VX][VX], w);
+ gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY], m0.mMatrix[VX][VY], w);
+ gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ], m0.mMatrix[VX][VZ], w);
+
+ gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX], m0.mMatrix[VY][VX], w);
+ gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY], m0.mMatrix[VY][VY], w);
+ gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ], m0.mMatrix[VY][VZ], w);
+
+ gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX], m0.mMatrix[VZ][VX], w);
+ gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY], m0.mMatrix[VZ][VY], w);
+ gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ], m0.mMatrix[VZ][VZ], w);
+
+ gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX], m0.mMatrix[VW][VX], w);
+ gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY], m0.mMatrix[VW][VY], w);
+ gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ], m0.mMatrix[VW][VZ], w);
+
+ gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX], n0.mMatrix[VX][VX], w);
+ gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY], n0.mMatrix[VX][VY], w);
+ gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ], n0.mMatrix[VX][VZ], w);
+
+ gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX], n0.mMatrix[VY][VX], w);
+ gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY], n0.mMatrix[VY][VY], w);
+ gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ], n0.mMatrix[VY][VZ], w);
+
+ gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX], n0.mMatrix[VZ][VX], w);
+ gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY], n0.mMatrix[VZ][VY], w);
+ gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ], n0.mMatrix[VZ][VZ], w);
+ }
+ }
+
+ // write result
+ U32 bidx = index + mMesh->mFaceVertexOffset;
+
+ o_vertices[bidx] = mMesh->getCoords()[index] * gBlendMat;
+ o_normals[bidx] = mMesh->getNormals()[index] * gBlendRotMat;
+ }
+ }
+ }
+}
+
void LLViewerJointMesh::dump()
{
if (mValid)