/** * @file llviewerjointmesh_sse.cpp * @brief SSE vectorized joint skinning code, only used when video card does * not support avatar vertex programs. * * *NOTE: Disabled on Windows builds. See llv4math.h for details. * * $LicenseInfo:firstyear=2007&license=viewerlgpl$ * Second Life Viewer Source Code * Copyright (C) 2010, Linden Research, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License only. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ //----------------------------------------------------------------------------- // Header Files //----------------------------------------------------------------------------- #include "llviewerprecompiledheaders.h" #include "llviewerjointmesh.h" // project includes #include "llface.h" #include "llpolymesh.h" // library includes #include "lldarray.h" #include "llv4math.h" // for LL_VECTORIZE #include "llv4matrix3.h" #include "llv4matrix4.h" #include "v3math.h" #if LL_VECTORIZE inline void matrix_translate(LLV4Matrix4& m, const LLMatrix4* w, const LLVector3& j) { m.mV[VX] = _mm_loadu_ps(w->mMatrix[VX]); m.mV[VY] = _mm_loadu_ps(w->mMatrix[VY]); m.mV[VZ] = _mm_loadu_ps(w->mMatrix[VZ]); m.mV[VW] = _mm_loadu_ps(w->mMatrix[VW]); m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VX]), m.mV[VX])); // ( ax * vx ) + vw m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VY]), m.mV[VY])); m.mV[VW] = _mm_add_ps(m.mV[VW], _mm_mul_ps(_mm_set1_ps(j.mV[VZ]), m.mV[VZ])); } // static void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) { // This cannot be a file-level static because it will be initialized // before main() using SSE code, which will crash on non-SSE processors. static LLV4Matrix4 sJointMat[32]; LLDynamicArray<LLJointRenderData*>& joint_data = mesh->getReferenceMesh()->mJointRenderData; //upload joint pivots/matrices for(S32 j = 0, jend = joint_data.count(); j < jend ; ++j ) { matrix_translate(sJointMat[j], joint_data[j]->mWorldMatrix, joint_data[j]->mSkinJoint ? joint_data[j]->mSkinJoint->mRootToJointSkinOffset : joint_data[j+1]->mSkinJoint->mRootToParentJointSkinOffset); } F32 weight = F32_MAX; LLV4Matrix4 blend_mat; LLStrider<LLVector3> o_vertices; LLStrider<LLVector3> o_normals; LLVertexBuffer *buffer = face->mVertexBuffer; buffer->getVertexStrider(o_vertices, mesh->mFaceVertexOffset); buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); const F32* weights = mesh->getWeights(); const LLVector3* coords = mesh->getCoords(); const LLVector3* normals = mesh->getNormals(); for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) { if( weight != weights[index]) { S32 joint = llfloor(weight = weights[index]); blend_mat.lerp(sJointMat[joint], sJointMat[joint+1], weight - joint); } blend_mat.multiply(coords[index], o_vertices[index]); ((LLV4Matrix3)blend_mat).multiply(normals[index], o_normals[index]); } buffer->setBuffer(0); } #else void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) { LLViewerJointMesh::updateGeometryVectorized(face, mesh); } #endif