1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
/**
* @file llviewerjointmesh_sse.cpp
* @brief SSE vectorized joint skinning code, only used when video card does
* not support avatar vertex programs.
*
* *NOTE: Disabled on Windows builds. See llv4math.h for details.
*
* $LicenseInfo:firstyear=2007&license=viewerlgpl$
* Second Life Viewer Source Code
* Copyright (C) 2010, Linden Research, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License only.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
//-----------------------------------------------------------------------------
// Header Files
//-----------------------------------------------------------------------------
#include "llviewerprecompiledheaders.h"
#include "llviewerjointmesh.h"
// project includes
#include "llface.h"
#include "llpolymesh.h"
// library includes
#include "lldarray.h"
#include "llv4math.h" // for LL_VECTORIZE
#include "llv4matrix3.h"
#include "llv4matrix4.h"
#include "v3math.h"
#if LL_VECTORIZE
inline void matrix_translate(LLV4Matrix4& m, const LLMatrix4* w, const LLVector3& j)
{
m.mV[VX] = _mm_loadu_ps(w->mMatrix[VX]);
m.mV[VY] = _mm_loadu_ps(w->mMatrix[VY]);
m.mV[VZ] = _mm_loadu_ps(w->mMatrix[VZ]);
m.mV[VW] = _mm_loadu_ps(w->mMatrix[VW]);
m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VX]), m.mV[VX])); // ( ax * vx ) + vw
m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VY]), m.mV[VY]));
m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VZ]), m.mV[VZ]));
}
// static
void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh)
{
// This cannot be a file-level static because it will be initialized
// before main() using SSE code, which will crash on non-SSE processors.
static LLV4Matrix4 sJointMat[32];
LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData;
//upload joint pivots/matrices
for(S32 j = 0, jend = joint_data.count(); j < jend ; ++j )
{
matrix_translate(sJointMat[j], joint_data[j]->mWorldMatrix,
joint_data[j]->mSkinJoint ?
joint_data[j]->mSkinJoint->mRootToJointSkinOffset
: joint_data[j+1]->mSkinJoint->mRootToParentJointSkinOffset);
}
F32 weight = F32_MAX;
LLV4Matrix4 blend_mat;
LLStrider<LLVector3> o_vertices;
LLStrider<LLVector3> o_normals;
LLVertexBuffer *buffer = face->mVertexBuffer;
buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset);
buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset);
const F32* weights = mesh->getWeights();
const LLVector3* coords = (const LLVector3*)mesh->getCoords();
const LLVector3* normals = (const LLVector3*)mesh->getNormals();
for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index)
{
if( weight != weights[index])
{
S32 joint = llfloor(weight = weights[index]);
blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint);
}
blend_mat.multiply(coords[index], o_vertices[index]);
((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]);
}
buffer->setBuffer(0);
}
#else
void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh)
{
LLViewerJointMesh::updateGeometryVectorized(face, mesh);
}
#endif
|