summaryrefslogtreecommitdiff
path: root/indra/llmath/llvolume.cpp
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2010-08-19 12:25:15 -0500
committerDave Parks <davep@lindenlab.com>2010-08-19 12:25:15 -0500
commit2fea1d5d33ec1b41a3cfa4307a1bfa58d8014f88 (patch)
tree0438f2363b2a91a5ffe970a8130faa118f260e7e /indra/llmath/llvolume.cpp
parentbd0b3a2ddeafaf0d1669ede7ab5aee22d8da9af7 (diff)
Integrate SIMD API from oreh/server-trunk-oreh
Diffstat (limited to 'indra/llmath/llvolume.cpp')
-rw-r--r--indra/llmath/llvolume.cpp123
1 files changed, 62 insertions, 61 deletions
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index bba0a6d089..ab9f8c4c24 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -45,7 +45,7 @@
#include "v4math.h"
#include "m4math.h"
#include "m3math.h"
-#include "llmatrix4a.h"
+#include "llmatrix3a.h"
#include "lloctree.h"
#include "lldarray.h"
#include "llvolume.h"
@@ -53,6 +53,7 @@
#include "llstl.h"
#include "llsdserialize.h"
#include "llvector4a.h"
+#include "llmatrix4a.h"
#define DEBUG_SILHOUETTE_BINORMALS 0
#define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette
@@ -161,7 +162,7 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
LLVector4a det;
det.setAllDot3(edge1, pvec);
- if (det.greaterEqual4(LLVector4a::getApproximatelyZero()).getComparisonMask() & 0x7)
+ if (det.greaterEqual(LLVector4a::getEpsilon()).getGatheredBits() & 0x7)
{
/* calculate distance from vert0 to ray origin */
LLVector4a tvec;
@@ -171,8 +172,8 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
LLVector4a u;
u.setAllDot3(tvec,pvec);
- if ((u.greaterEqual4(LLVector4a::getZero()).getComparisonMask() & 0x7) &&
- (u.lessEqual4(det).getComparisonMask() & 0x7))
+ if ((u.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7) &&
+ (u.lessEqual(det).getGatheredBits() & 0x7))
{
/* prepare to test V parameter */
LLVector4a qvec;
@@ -188,8 +189,8 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
LLVector4a sum_uv;
sum_uv.setAdd(u, v);
- S32 v_gequal = v.greaterEqual4(LLVector4a::getZero()).getComparisonMask() & 0x7;
- S32 sum_lequal = sum_uv.lessEqual4(det).getComparisonMask() & 0x7;
+ S32 v_gequal = v.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7;
+ S32 sum_lequal = sum_uv.lessEqual(det).getGatheredBits() & 0x7;
if (v_gequal && sum_lequal)
{
@@ -230,7 +231,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
pvec.setCross3(dir, edge2);
/* if determinant is near zero, ray lies in plane of triangle */
- F32 det = edge1.dot3(pvec);
+ F32 det = edge1.dot3(pvec).getF32();
if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO)
@@ -245,7 +246,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
tvec.setSub(orig, vert0);
/* calculate U parameter and test bounds */
- u = (tvec.dot3(pvec)) * inv_det;
+ u = (tvec.dot3(pvec).getF32()) * inv_det;
if (u < 0.f || u > 1.f)
{
return FALSE;
@@ -255,7 +256,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
tvec.sub(edge1);
/* calculate V parameter and test bounds */
- v = (dir.dot3(tvec)) * inv_det;
+ v = (dir.dot3(tvec).getF32()) * inv_det;
if (v < 0.f || u + v > 1.f)
{
@@ -263,7 +264,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
}
/* calculate t, ray intersects triangle */
- t = (edge2.dot3(tvec)) * inv_det;
+ t = (edge2.dot3(tvec).getF32()) * inv_det;
intersection_a = u;
intersection_b = v;
@@ -326,20 +327,20 @@ public:
//stretch by triangles in node
tri = *iter;
- min.setMin(*tri->mV[0]);
- min.setMin(*tri->mV[1]);
- min.setMin(*tri->mV[2]);
+ min.setMin(min, *tri->mV[0]);
+ min.setMin(min, *tri->mV[1]);
+ min.setMin(min, *tri->mV[2]);
- max.setMax(*tri->mV[0]);
- max.setMax(*tri->mV[1]);
- max.setMax(*tri->mV[2]);
+ max.setMax(max, *tri->mV[0]);
+ max.setMax(max, *tri->mV[1]);
+ max.setMax(max, *tri->mV[2]);
}
for (S32 i = 0; i < branch->getChildCount(); ++i)
{ //stretch by child extents
LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0);
- min.setMin(child->mExtents[0]);
- max.setMax(child->mExtents[1]);
+ min.setMin(min, child->mExtents[0]);
+ max.setMax(min, child->mExtents[1]);
}
}
else if (branch->getChildCount() != 0)
@@ -352,8 +353,8 @@ public:
for (S32 i = 1; i < branch->getChildCount(); ++i)
{ //stretch by child extents
child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0);
- min.setMin(child->mExtents[0]);
- max.setMax(child->mExtents[1]);
+ min.setMin(min, child->mExtents[0]);
+ max.setMax(max, child->mExtents[1]);
}
}
else
@@ -2011,7 +2012,7 @@ const LLVolumeFace::VertexData& LLVolumeFace::VertexData::operator=(const LLVolu
if (this != &rhs)
{
init();
- LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8);
+ LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8*sizeof(F32));
mTexCoord = rhs.mTexCoord;
}
return *this;
@@ -2055,8 +2056,8 @@ void LLVolumeFace::VertexData::setNormal(const LLVector4a& norm)
bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)const
{
- const F32* lp = this->getPosition().getF32();
- const F32* rp = rhs.getPosition().getF32();
+ const F32* lp = this->getPosition().getF32ptr();
+ const F32* rp = rhs.getPosition().getF32ptr();
if (lp[0] != rp[0])
{
@@ -2073,8 +2074,8 @@ bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)con
return lp[2] < rp[2];
}
- lp = getNormal().getF32();
- rp = rhs.getNormal().getF32();
+ lp = getNormal().getF32ptr();
+ rp = rhs.getNormal().getF32ptr();
if (lp[0] != rp[0])
{
@@ -2101,23 +2102,23 @@ bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)con
bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)const
{
- return mData[POSITION].equal3(rhs.getPosition()) &&
- mData[NORMAL].equal3(rhs.getNormal()) &&
+ return mData[POSITION].equals3(rhs.getPosition()) &&
+ mData[NORMAL].equals3(rhs.getNormal()) &&
mTexCoord == rhs.mTexCoord;
}
bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const
{
bool retval = false;
- if (rhs.mData[POSITION].equal3(mData[POSITION]) && rhs.mTexCoord == mTexCoord)
+ if (rhs.mData[POSITION].equals3(mData[POSITION]) && rhs.mTexCoord == mTexCoord)
{
if (angle_cutoff > 1.f)
{
- retval = (mData[NORMAL].equal3(rhs.mData[NORMAL]));
+ retval = (mData[NORMAL].equals3(rhs.mData[NORMAL]));
}
else
{
- F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]);
+ F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]).getF32();
retval = cur_angle > angle_cutoff;
}
}
@@ -2331,8 +2332,8 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
}
else
{
- min.setMin(*pos_out);
- max.setMax(*pos_out);
+ min.setMin(min, *pos_out);
+ max.setMax(max, *pos_out);
}
pos_out++;
@@ -2944,7 +2945,7 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32
ratio = (F32) width / (F32) height;
- s = (S32)fsqrtf(((F32)vertices / ratio));
+ s = (S32)(F32) sqrt(((F32)vertices / ratio));
s = llmax(s, 4); // no degenerate sizes, please
t = vertices / s;
@@ -5280,16 +5281,15 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
freeData();
- LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 12);
+ LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 12*sizeof(F32));
resizeVertices(src.mNumVertices);
resizeIndices(src.mNumIndices);
if (mNumVertices)
{
- S32 vert_size = mNumVertices*4;
+ S32 vert_size = mNumVertices*4*sizeof(F32);
S32 tc_size = (mNumVertices*8+0xF) & ~0xF;
- tc_size /= 4;
LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size);
LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size);
@@ -5322,8 +5322,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
if (mNumIndices)
{
S32 idx_size = (mNumIndices*2+0xF) & ~0xF;
- idx_size /= 4;
-
+
LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size);
}
@@ -5388,9 +5387,9 @@ void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv)
bool LLVolumeFace::VertexMapData::operator==(const LLVolumeFace::VertexData& rhs) const
{
- return getPosition().equal3(rhs.getPosition()) &&
+ return getPosition().equals3(rhs.getPosition()) &&
mTexCoord == rhs.mTexCoord &&
- getNormal().equal3(rhs.getNormal());
+ getNormal().equals3(rhs.getNormal());
}
bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector3& a, const LLVector3& b) const
@@ -5423,7 +5422,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
getVertexData(index, cv);
BOOL found = FALSE;
- VertexMapData::PointMap::iterator point_iter = point_map.find(LLVector3(cv.getPosition().getF32()));
+ VertexMapData::PointMap::iterator point_iter = point_map.find(LLVector3(cv.getPosition().getF32ptr()));
if (point_iter != point_map.end())
{ //duplicate point might exist
for (U32 j = 0; j < point_iter->second.size(); ++j)
@@ -5455,7 +5454,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
}
else
{
- point_map[LLVector3(d.getPosition().getF32())].push_back(d);
+ point_map[LLVector3(d.getPosition().getF32ptr())].push_back(d);
}
}
}
@@ -5491,12 +5490,12 @@ void LLVolumeFace::createOctree()
tri->mIndex[2] = mIndices[i+2];
LLVector4a min = v0;
- min.setMin(v1);
- min.setMin(v2);
+ min.setMin(min, v1);
+ min.setMin(min, v2);
LLVector4a max = v0;
- max.setMax(v1);
- max.setMax(v2);
+ max.setMax(max, v1);
+ max.setMax(max, v2);
LLVector4a center;
center.setAdd(min, max);
@@ -5507,7 +5506,7 @@ void LLVolumeFace::createOctree()
LLVector4a size;
size.setSub(max,min);
- tri->mRadius = size.length3() * 0.5f;
+ tri->mRadius = size.getLength3().getF32() * 0.5f;
mOctree->insert(tri);
}
@@ -5655,12 +5654,13 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
if (gx == 0 && gy == 0)
{
- min = max = newVert.getPosition();
+ min = newVert.getPosition();
+ max = min;
}
else
{
- min.setMin(newVert.getPosition());
- max.setMax(newVert.getPosition());
+ min.setMin(min, newVert.getPosition());
+ max.setMax(max, newVert.getPosition());
}
}
}
@@ -5795,7 +5795,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
if (i == 0)
{
- min = max = pos[i];
+ max = pos[i];
+ min = max;
min_uv = max_uv = tc[i];
}
else
@@ -5848,8 +5849,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
for (S32 i = 0; i < num_vertices; i++)
{
- binorm[i].load4a((F32*) &binormal.mQ);
- norm[i].load4a((F32*) &normal.mQ);
+ binorm[i].load4a(binormal.getF32ptr());
+ norm[i].load4a(normal.getF32ptr());
}
if (partial_build)
@@ -6186,7 +6187,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
LLVector4a* dst = (LLVector4a*) ll_aligned_malloc_16(new_size);
if (mPositions)
{
- LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, old_size/4);
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, old_size);
ll_aligned_free_16(mPositions);
}
mPositions = dst;
@@ -6195,7 +6196,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
dst = (LLVector4a*) ll_aligned_malloc_16(new_size);
if (mNormals)
{
- LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, old_size/4);
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, old_size);
ll_aligned_free_16(mNormals);
}
mNormals = dst;
@@ -6209,7 +6210,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
LLVector2* dst = (LLVector2*) ll_aligned_malloc_16(new_size);
if (mTexCoords)
{
- LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, old_size/4);
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, old_size);
ll_aligned_free_16(mTexCoords);
}
}
@@ -6268,7 +6269,7 @@ void LLVolumeFace::pushIndex(const U16& idx)
U16* dst = (U16*) ll_aligned_malloc_16(new_size);
if (mIndices)
{
- LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, old_size/4);
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, old_size);
ll_aligned_free_16(mIndices);
}
mIndices = dst;
@@ -6319,9 +6320,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
if (mNumVertices > 0)
{ //copy old buffers
- LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, mNumVertices*4);
- LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, mNumVertices*4);
- LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, mNumVertices*2);
+ LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, mNumVertices*4*sizeof(F32));
+ LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, mNumVertices*4*sizeof(F32));
+ LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, mNumVertices*2*sizeof(F32));
}
//free old buffer space
@@ -6382,7 +6383,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
if (mNumIndices > 0)
{ //copy old index buffer
S32 old_size = (mNumIndices*2+0xF) & ~0xF;
- LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, old_size/4);
+ LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, old_size);
}
//free old index buffer