summaryrefslogtreecommitdiff
path: root/indra/llmath
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llmath')
-rw-r--r--indra/llmath/llmatrix4a.h11
-rw-r--r--indra/llmath/llvector4a.inl13
-rw-r--r--indra/llmath/llvolume.cpp1593
-rw-r--r--indra/llmath/llvolume.h79
4 files changed, 608 insertions, 1088 deletions
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h
index c4cefdb4fa..d141298f69 100644
--- a/indra/llmath/llmatrix4a.h
+++ b/indra/llmath/llmatrix4a.h
@@ -107,15 +107,14 @@ public:
inline void rotate(const LLVector4a& v, LLVector4a& res)
{
+ LLVector4a y,z;
+
res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
- res.mul(mMatrix[0]);
-
- LLVector4a y;
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
- y.mul(mMatrix[1]);
-
- LLVector4a z;
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
+
+ res.mul(mMatrix[0]);
+ y.mul(mMatrix[1]);
z.mul(mMatrix[2]);
res.add(y);
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 7c52ffef21..35a67204ec 100644
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs)
mQ = _mm_max_ps(lhs.mQ, rhs.mQ);
}
-// Set this to (c * lhs) + rhs * ( 1 - c)
+// Set this to lhs + (rhs-lhs)*c
inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c)
{
- LLVector4a a = lhs;
- a.mul(c);
-
- LLVector4a b = rhs;
- b.mul(1.f-c);
-
- setAdd(a, b);
+ LLVector4a t;
+ t.setSub(rhs,lhs);
+ t.mul(c);
+ setAdd(lhs, t);
}
inline LLBool32 LLVector4a::isFinite3() const
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index f989e8ed17..9fc72fd801 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -94,6 +94,95 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1;
extern BOOL gDebugGL;
+bool less_than_max_mag(const LLVector4a& vec);
+
+template <class T, U32 alignment>
+LLAlignedArray<T, alignment>::LLAlignedArray()
+{
+ mArray = NULL;
+ mElementCount = 0;
+ mCapacity = 0;
+}
+
+template <class T, U32 alignment>
+LLAlignedArray<T, alignment>::~LLAlignedArray()
+{
+ ll_aligned_free(mArray);
+ mArray = NULL;
+ mElementCount = 0;
+ mCapacity = 0;
+}
+
+template <class T, U32 alignment>
+void LLAlignedArray<T, alignment>::push_back(const T& elem)
+{
+ T* old_buf = NULL;
+ if (mCapacity <= mElementCount)
+ {
+ mCapacity++;
+ mCapacity *= 2;
+ T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment);
+ if (mArray)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount);
+ }
+ old_buf = mArray;
+ mArray = new_buf;
+ }
+
+ mArray[mElementCount++] = elem;
+
+ //delete old array here to prevent error on a.push_back(a[0])
+ ll_aligned_free(old_buf);
+}
+
+template <class T, U32 alignment>
+void LLAlignedArray<T, alignment>::resize(U32 size)
+{
+ if (mCapacity < size)
+ {
+ mCapacity = size+mCapacity*2;
+ T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL;
+ if (mArray)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount);
+ ll_aligned_free(mArray);
+ }
+
+ /*for (U32 i = mElementCount; i < mCapacity; ++i)
+ {
+ new(new_buf+i) T();
+ }*/
+ mArray = new_buf;
+ }
+
+ mElementCount = size;
+}
+
+
+template <class T, U32 alignment>
+T& LLAlignedArray<T, alignment>::operator[](int idx)
+{
+ llassert(idx < mElementCount);
+ return mArray[idx];
+}
+
+template <class T, U32 alignment>
+const T& LLAlignedArray<T, alignment>::operator[](int idx) const
+{
+ llassert(idx < mElementCount);
+ return mArray[idx];
+}
+
+template <class T, U32 alignment>
+T* LLAlignedArray<T, alignment>::append(S32 N)
+{
+ U32 sz = size();
+ resize(sz+N);
+ return &((*this)[sz]);
+}
+
+
BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
{
LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -474,7 +563,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f };
F32 scale = 0.5f;
F32 t, t_step, t_first, t_fraction, ang, ang_step;
- LLVector3 pt1,pt2;
+ LLVector4a pt1,pt2;
F32 begin = params.getBegin();
F32 end = params.getEnd();
@@ -497,20 +586,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
// Starting t and ang values for the first face
t = t_first;
ang = 2.0f*F_PI*(t*ang_scale + offset);
- pt1.setVec(cos(ang)*scale,sin(ang)*scale, t);
+ pt1.set(cos(ang)*scale,sin(ang)*scale, t);
// Increment to the next point.
// pt2 is the end point on the fractional face
t += t_step;
ang += ang_step;
- pt2.setVec(cos(ang)*scale,sin(ang)*scale,t);
+ pt2.set(cos(ang)*scale,sin(ang)*scale,t);
t_fraction = (begin - t_first)*sides;
// Only use if it's not almost exactly on an edge.
if (t_fraction < 0.9999f)
{
- LLVector3 new_pt = lerp(pt1, pt2, t_fraction);
+ LLVector4a new_pt;
+ new_pt.setLerp(pt1, pt2, t_fraction);
mProfile.push_back(new_pt);
}
@@ -518,12 +608,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
while (t < end)
{
// Iterate through all the integer steps of t.
- pt1.setVec(cos(ang)*scale,sin(ang)*scale,t);
+ pt1.set(cos(ang)*scale,sin(ang)*scale,t);
if (mProfile.size() > 0) {
- LLVector3 p = mProfile[mProfile.size()-1];
+ LLVector4a p = mProfile[mProfile.size()-1];
for (S32 i = 0; i < split && mProfile.size() > 0; i++) {
- mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1));
+ //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1));
+ LLVector4a new_pt;
+ new_pt.setSub(pt1, p);
+ new_pt.mul(1.0f/(float)(split+1) * (float)(i+1));
+ new_pt.add(p);
+ mProfile.push_back(new_pt);
}
}
mProfile.push_back(pt1);
@@ -536,18 +631,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
// pt1 is the first point on the fractional face
// pt2 is the end point on the fractional face
- pt2.setVec(cos(ang)*scale,sin(ang)*scale,t);
+ pt2.set(cos(ang)*scale,sin(ang)*scale,t);
// Find the fraction that we need to add to the end point.
t_fraction = (end - (t - t_step))*sides;
if (t_fraction > 0.0001f)
{
- LLVector3 new_pt = lerp(pt1, pt2, t_fraction);
+ LLVector4a new_pt;
+ new_pt.setLerp(pt1, pt2, t_fraction);
if (mProfile.size() > 0) {
- LLVector3 p = mProfile[mProfile.size()-1];
+ LLVector4a p = mProfile[mProfile.size()-1];
for (S32 i = 0; i < split && mProfile.size() > 0; i++) {
- mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1));
+ //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1));
+
+ LLVector4a pt1;
+ pt1.setSub(new_pt, p);
+ pt1.mul(1.0f/(float)(split+1) * (float)(i+1));
+ pt1.add(p);
+ mProfile.push_back(pt1);
}
}
mProfile.push_back(new_pt);
@@ -568,7 +670,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
if (params.getHollow() <= 0)
{
// put center point if not hollow.
- mProfile.push_back(LLVector3(0,0,0));
+ mProfile.push_back(LLVector4a(0,0,0));
}
}
else
@@ -581,103 +683,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3
mTotal = mProfile.size();
}
-void LLProfile::genNormals(const LLProfileParams& params)
-{
- S32 count = mProfile.size();
-
- S32 outer_count;
- if (mTotalOut)
- {
- outer_count = mTotalOut;
- }
- else
- {
- outer_count = mTotal / 2;
- }
-
- mEdgeNormals.resize(count * 2);
- mEdgeCenters.resize(count * 2);
- mNormals.resize(count);
-
- LLVector2 pt0,pt1;
-
- BOOL hollow = (params.getHollow() > 0);
-
- S32 i0, i1, i2, i3, i4;
-
- // Parametrically generate normal
- for (i2 = 0; i2 < count; i2++)
- {
- mNormals[i2].mV[0] = mProfile[i2].mV[0];
- mNormals[i2].mV[1] = mProfile[i2].mV[1];
- if (hollow && (i2 >= outer_count))
- {
- mNormals[i2] *= -1.f;
- }
- if (mNormals[i2].magVec() < 0.001)
- {
- // Special case for point at center, get adjacent points.
- i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1;
- i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1;
- i3 = (i2 + 1) < count ? i2 + 1 : 0;
- i4 = (i3 + 1) < count ? i3 + 1 : 0;
-
- pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX],
- mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]);
- pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX],
- mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]);
-
- mNormals[i2] = pt0 + pt1;
- mNormals[i2] *= 0.5f;
- }
- mNormals[i2].normVec();
- }
-
- S32 num_normal_sets = isConcave() ? 2 : 1;
- for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++)
- {
- S32 point_num;
- for (point_num = 0; point_num < mTotal; point_num++)
- {
- LLVector3 point_1 = mProfile[point_num];
- point_1.mV[VZ] = 0.f;
-
- LLVector3 point_2;
-
- if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2)
- {
- point_2 = mProfile[mTotal - 1];
- }
- else if (isConcave() && normal_set == 1 && point_num == mTotal - 1)
- {
- point_2 = mProfile[(mTotal - 1) / 2];
- }
- else
- {
- LLVector3 delta_pos;
- S32 neighbor_point = (point_num + 1) % mTotal;
- while(delta_pos.magVecSquared() < 0.01f * 0.01f)
- {
- point_2 = mProfile[neighbor_point];
- delta_pos = point_2 - point_1;
- neighbor_point = (neighbor_point + 1) % mTotal;
- if (neighbor_point == point_num)
- {
- break;
- }
- }
- }
-
- point_2.mV[VZ] = 0.f;
- LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis;
- face_normal.normVec();
- mEdgeNormals[normal_set * count + point_num] = face_normal;
- mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f);
- }
- }
-}
-
-
// Hollow is percent of the original bounding box, not of this particular
// profile's geometry. Thus, a swept triangle needs lower hollow values than
// a swept square.
@@ -693,12 +698,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3
Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat);
- std::vector<LLVector3> pt;
+ static LLAlignedArray<LLVector4a,64> pt;
pt.resize(mTotal) ;
for (S32 i=mTotalOut;i<mTotal;i++)
{
- pt[i] = mProfile[i] * box_hollow;
+ pt[i] = mProfile[i];
+ pt[i].mul(box_hollow);
}
S32 j=mTotal-1;
@@ -844,8 +850,8 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
detail = MIN_LOD;
}
- mProfile.clear();
- mFaces.clear();
+ mProfile.resize(0);
+ mFaces.resize(0);
// Generate the face data
S32 i;
@@ -877,10 +883,12 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
addFace((face_num++) * (split +1), split+2, 1, LL_FACE_OUTER_SIDE_0 << i, TRUE);
}
+ LLVector4a scale(1,1,4,1);
+
for (i = 0; i <(S32) mProfile.size(); i++)
{
// Scale by 4 to generate proper tex coords.
- mProfile[i].mV[2] *= 4.f;
+ mProfile[i].mul(scale);
}
if (hollow)
@@ -913,10 +921,11 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
case LL_PCODE_PROFILE_EQUALTRI:
{
genNGon(params, 3,0, 0, 1, split);
+ LLVector4a scale(1,1,3,1);
for (i = 0; i <(S32) mProfile.size(); i++)
{
// Scale by 3 to generate proper tex coords.
- mProfile[i].mV[2] *= 3.f;
+ mProfile[i].mul(scale);
}
if (path_open)
@@ -1094,8 +1103,6 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai
addFace(mTotal-2, 2,0.5,LL_FACE_PROFILE_END, TRUE);
}
}
-
- //genNormals(params);
return TRUE;
}
@@ -1379,25 +1386,29 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en
// the path begins at the correct cut.
F32 step= 1.0f / sides;
F32 t = params.getBegin();
- pt = vector_append(mPath, 1);
+ pt = mPath.append(1);
ang = 2.0f*F_PI*revolutions * t;
s = sin(ang)*lerp(radius_start, radius_end, t);
c = cos(ang)*lerp(radius_start, radius_end, t);
- pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s)
+ pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s)
+ lerp(-skew ,skew, t) * 0.5f,
c + lerp(0,params.getShear().mV[1],s),
s);
- pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
- pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
+ pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t),
+ hole_y * lerp(taper_y_begin, taper_y_end, t),
+ 0,1);
pt->mTexT = t;
// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
// Rotate the point around the circle's center.
qang.setQuat (ang,path_axis);
- pt->mRot = twist * qang;
+
+ LLMatrix3 rot(twist * qang);
+
+ pt->mRot.loadu(rot);
t+=step;
@@ -1408,51 +1419,55 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en
// Run through the non-cut dependent points.
while (t < params.getEnd())
{
- pt = vector_append(mPath, 1);
+ pt = mPath.append(1);
ang = 2.0f*F_PI*revolutions * t;
c = cos(ang)*lerp(radius_start, radius_end, t);
s = sin(ang)*lerp(radius_start, radius_end, t);
- pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s)
+ pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s)
+ lerp(-skew ,skew, t) * 0.5f,
c + lerp(0,params.getShear().mV[1],s),
s);
- pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
- pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
+ pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t),
+ hole_y * lerp(taper_y_begin, taper_y_end, t),
+ 0,1);
pt->mTexT = t;
// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
// Rotate the point around the circle's center.
qang.setQuat (ang,path_axis);
- pt->mRot = twist * qang;
-
+ LLMatrix3 tmp(twist*qang);
+ pt->mRot.loadu(tmp);
+
t+=step;
}
// Make one final pass for the end cut.
t = params.getEnd();
- pt = vector_append(mPath, 1);
+ pt = mPath.append(1);
ang = 2.0f*F_PI*revolutions * t;
c = cos(ang)*lerp(radius_start, radius_end, t);
s = sin(ang)*lerp(radius_start, radius_end, t);
- pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s)
+ pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s)
+ lerp(-skew ,skew, t) * 0.5f,
c + lerp(0,params.getShear().mV[1],s),
s);
- pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
- pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
+ pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t),
+ hole_y * lerp(taper_y_begin, taper_y_end, t),
+ 0,1);
pt->mTexT = t;
// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
// Rotate the point around the circle's center.
qang.setQuat (ang,path_axis);
- pt->mRot = twist * qang;
-
+ LLMatrix3 tmp(twist*qang);
+ pt->mRot.loadu(tmp);
+
mTotal = mPath.size();
}
@@ -1549,7 +1564,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
mDirty = FALSE;
S32 np = 2; // hardcode for line
- mPath.clear();
+ mPath.resize(0);
mOpen = TRUE;
// Is this 0xf0 mask really necessary? DK 03/02/05
@@ -1575,12 +1590,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
for (S32 i=0;i<np;i++)
{
F32 t = lerp(params.getBegin(),params.getEnd(),(F32)i * mStep);
- mPath[i].mPos.setVec(lerp(0,params.getShear().mV[0],t),
+ mPath[i].mPos.set(lerp(0,params.getShear().mV[0],t),
lerp(0,params.getShear().mV[1],t),
t - 0.5f);
- mPath[i].mRot.setQuat(lerp(F_PI * params.getTwistBegin(),F_PI * params.getTwist(),t),0,0,1);
- mPath[i].mScale.mV[0] = lerp(start_scale.mV[0],end_scale.mV[0],t);
- mPath[i].mScale.mV[1] = lerp(start_scale.mV[1],end_scale.mV[1],t);
+ LLQuaternion quat;
+ quat.setQuat(lerp(F_PI * params.getTwistBegin(),F_PI * params.getTwist(),t),0,0,1);
+ LLMatrix3 tmp(quat);
+ mPath[i].mRot.loadu(tmp);
+ mPath[i].mScale.set(lerp(start_scale.mV[0],end_scale.mV[0],t),
+ lerp(start_scale.mV[1],end_scale.mV[1],t),
+ 0,1);
mPath[i].mTexT = t;
}
}
@@ -1617,7 +1636,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
F32 toggle = 0.5f;
for (S32 i=0;i<(S32)mPath.size();i++)
{
- mPath[i].mPos.mV[0] = toggle;
+ mPath[i].mPos.getF32ptr()[0] = toggle;
if (toggle == 0.5f)
toggle = -0.5f;
else
@@ -1638,13 +1657,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
for (S32 i=0;i<np;i++)
{
F32 t = (F32)i * mStep;
- mPath[i].mPos.setVec(0,
+ mPath[i].mPos.set(0,
lerp(0, -sin(F_PI*params.getTwist()*t)*0.5f,t),
lerp(-0.5, cos(F_PI*params.getTwist()*t)*0.5f,t));
- mPath[i].mScale.mV[0] = lerp(1,params.getScale().mV[0],t);
- mPath[i].mScale.mV[1] = lerp(1,params.getScale().mV[1],t);
+ mPath[i].mScale.set(lerp(1,params.getScale().mV[0],t),
+ lerp(1,params.getScale().mV[1],t), 0,1);
mPath[i].mTexT = t;
- mPath[i].mRot.setQuat(F_PI * params.getTwist() * t,1,0,0);
+ LLQuaternion quat;
+ quat.setQuat(F_PI * params.getTwist() * t,1,0,0);
+ LLMatrix3 tmp(quat);
+ mPath[i].mRot.loadu(tmp);
}
break;
@@ -1668,11 +1690,15 @@ BOOL LLDynamicPath::generate(const LLPathParams& params, F32 detail, S32 split,
// Path hasn't been generated yet.
// Some algorithms later assume at least TWO path points.
resizePath(2);
+ LLQuaternion quat;
+ quat.setQuat(0,0,0);
+ LLMatrix3 tmp(quat);
+
for (U32 i = 0; i < 2; i++)
{
- mPath[i].mPos.setVec(0, 0, 0);
- mPath[i].mRot.setQuat(0, 0, 0);
- mPath[i].mScale.setVec(1, 1);
+ mPath[i].mPos.set(0, 0, 0);
+ mPath[i].mRot.loadu(tmp);
+ mPath[i].mScale.set(1, 1, 0, 1);
mPath[i].mTexT = 0;
}
}
@@ -2045,7 +2071,7 @@ LLVolume::LLVolume(const LLVolumeParams &params, const F32 detail, const BOOL ge
mHullIndices = NULL;
mNumHullPoints = 0;
mNumHullIndices = 0;
-
+
// set defaults
if (mParams.getPathParams().getCurveType() == LL_PCODE_PATH_FLEXIBLE)
{
@@ -2105,6 +2131,7 @@ LLVolume::~LLVolume()
BOOL LLVolume::generate()
{
+ LL_CHECK_MEMORY
llassert_always(mProfilep);
//Added 10.03.05 Dave Parks
@@ -2141,20 +2168,6 @@ BOOL LLVolume::generate()
mLODScaleBias.setVec(0.6f, 0.6f, 0.6f);
}
- //********************************************************************
- //debug info, to be removed
- if((U32)(mPathp->mPath.size() * mProfilep->mProfile.size()) > (1u << 20))
- {
- llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ;
- llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ;
- llinfos << mParams << llendl ;
- llinfos << "more info to check if mProfilep is deleted or not." << llendl ;
- llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ;
-
- llerrs << "LLVolume corrupted!" << llendl ;
- }
- //********************************************************************
-
BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split);
BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split);
@@ -2163,21 +2176,6 @@ BOOL LLVolume::generate()
S32 sizeS = mPathp->mPath.size();
S32 sizeT = mProfilep->mProfile.size();
- //********************************************************************
- //debug info, to be removed
- if((U32)(sizeS * sizeT) > (1u << 20))
- {
- llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ;
- llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ;
- llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ;
- llinfos << mParams << llendl ;
- llinfos << "more info to check if mProfilep is deleted or not." << llendl ;
- llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ;
-
- llerrs << "LLVolume corrupted!" << llendl ;
- }
- //********************************************************************
-
sNumMeshPoints -= mMesh.size();
mMesh.resize(sizeT * sizeS);
sNumMeshPoints += mMesh.size();
@@ -2185,22 +2183,39 @@ BOOL LLVolume::generate()
//generate vertex positions
// Run along the path.
+ LLVector4a* dst = mMesh.mArray;
+
for (S32 s = 0; s < sizeS; ++s)
{
- LLVector2 scale = mPathp->mPath[s].mScale;
- LLQuaternion rot = mPathp->mPath[s].mRot;
+ F32* scale = mPathp->mPath[s].mScale.getF32ptr();
+
+ F32 sc [] =
+ { scale[0], 0, 0, 0,
+ 0, scale[1], 0, 0,
+ 0, 0, scale[2], 0,
+ 0, 0, 0, 1 };
+
+ LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix);
+ LLMatrix4 scale_mat(sc);
+
+ scale_mat *= rot;
+
+ LLMatrix4a rot_mat;
+ rot_mat.loadu(scale_mat);
+
+ LLVector4a* profile = mProfilep->mProfile.mArray;
+ LLVector4a* end_profile = profile+sizeT;
+ LLVector4a offset = mPathp->mPath[s].mPos;
+
+ LLVector4a tmp;
// Run along the profile.
- for (S32 t = 0; t < sizeT; ++t)
+ while (profile < end_profile)
{
- S32 m = s*sizeT + t;
- Point& pt = mMesh[m];
-
- pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0];
- pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1];
- pt.mPos.mV[2] = 0.0f;
- pt.mPos = pt.mPos * rot;
- pt.mPos += mPathp->mPath[s].mPos;
+ rot_mat.rotate(*profile++, tmp);
+ dst->setAdd(tmp,offset);
+ llassert(less_than_max_mag(*dst));
+ ++dst;
}
}
@@ -2210,9 +2225,11 @@ BOOL LLVolume::generate()
LLFaceID id = iter->mFaceID;
mFaceMask |= id;
}
-
+ LL_CHECK_MEMORY
return TRUE;
}
+
+ LL_CHECK_MEMORY
return FALSE;
}
@@ -2790,14 +2807,16 @@ void LLVolume::createVolumeFaces()
}
-inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b)
+inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b)
{
// maps RGB values to vector values [0..255] -> [-0.5..0.5]
- LLVector3 value;
- value.mV[VX] = r / 255.f - 0.5f;
- value.mV[VY] = g / 255.f - 0.5f;
- value.mV[VZ] = b / 255.f - 0.5f;
+ LLVector4a value;
+ LLVector4a sub(0.5f, 0.5f, 0.5f);
+ value.set(r,g,b);
+ value.mul(1.f/255.f);
+ value.sub(sub);
+
return value;
}
@@ -2817,21 +2836,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w
}
-inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data)
+inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data)
{
- LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]);
+ LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]);
return v;
}
-inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
+inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
{
U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components);
return sculpt_index_to_vector(index, sculpt_data);
}
-inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
+inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data)
{
U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components);
@@ -2853,15 +2872,26 @@ F32 LLVolume::sculptGetSurfaceArea()
for (S32 t = 0; t < sizeT-1; t++)
{
// get four corners of quad
- LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos;
- LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos;
- LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos;
- LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos;
+ LLVector4a& p1 = mMesh[(s )*sizeT + (t )];
+ LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )];
+ LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)];
+ LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)];
// compute the area of the quad by taking the length of the cross product of the two triangles
- LLVector3 cross1 = (p1 - p2) % (p1 - p3);
- LLVector3 cross2 = (p4 - p2) % (p4 - p3);
- area += (cross1.magVec() + cross2.magVec()) / 2.f;
+ LLVector4a v0,v1,v2,v3;
+ v0.setSub(p1,p2);
+ v1.setSub(p1,p3);
+ v2.setSub(p4,p2);
+ v3.setSub(p4,p3);
+
+ LLVector4a cross1, cross2;
+ cross1.setCross3(v0,v1);
+ cross2.setCross3(v2,v3);
+
+ //LLVector3 cross1 = (p1 - p2) % (p1 - p3);
+ //LLVector3 cross2 = (p4 - p2) % (p4 - p3);
+
+ area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f;
}
}
@@ -2882,17 +2912,19 @@ void LLVolume::sculptGeneratePlaceholder()
for (S32 t = 0; t < sizeT; t++)
{
S32 i = t + line;
- Point& pt = mMesh[i];
+ LLVector4a& pt = mMesh[i];
F32 u = (F32)s/(sizeS-1);
F32 v = (F32)t/(sizeT-1);
const F32 RADIUS = (F32) 0.3;
-
- pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS);
- pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS);
- pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS);
+
+ F32* p = pt.getF32ptr();
+
+ p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS);
+ p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS);
+ p[2] = (F32)(cos(F_PI * v) * RADIUS);
}
line += sizeT;
@@ -2917,7 +2949,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8
for (S32 t = 0; t < sizeT; t++)
{
S32 i = t + line;
- Point& pt = mMesh[i];
+ LLVector4a& pt = mMesh[i];
S32 reversed_t = t;
@@ -2974,11 +3006,12 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8
}
}
- pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data);
+ pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data);
if (sculpt_mirror)
{
- pt.mPos.mV[VX] *= -1.f;
+ LLVector4a scale(-1.f,1,1,1);
+ pt.mul(scale);
}
}
@@ -3560,627 +3593,6 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h
return true;
}
-S32 *LLVolume::getTriangleIndices(U32 &num_indices) const
-{
- S32 expected_num_triangle_indices = getNumTriangleIndices();
- if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES)
- {
- // we don't allow LLVolumes with this many vertices
- llwarns << "Couldn't allocate triangle indices" << llendl;
- num_indices = 0;
- return NULL;
- }
-
- S32* index = new S32[expected_num_triangle_indices];
- S32 count = 0;
-
- // Let's do this totally diffently, as we don't care about faces...
- // Counter-clockwise triangles are forward facing...
-
- BOOL open = getProfile().isOpen();
- BOOL hollow = (mParams.getProfileParams().getHollow() > 0);
- BOOL path_open = getPath().isOpen();
- S32 size_s, size_s_out, size_t;
- S32 s, t, i;
- size_s = getProfile().getTotal();
- size_s_out = getProfile().getTotalOut();
- size_t = getPath().mPath.size();
-
- // NOTE -- if the construction of the triangles below ever changes
- // then getNumTriangleIndices() method may also have to be updated.
-
- if (open) /* Flawfinder: ignore */
- {
- if (hollow)
- {
- // Open hollow -- much like the closed solid, except we
- // we need to stitch up the gap between s=0 and s=size_s-1
-
- for (t = 0; t < size_t - 1; t++)
- {
- // The outer face, first cut, and inner face
- for (s = 0; s < size_s - 1; s++)
- {
- i = s + t*size_s;
- index[count++] = i; // x,y
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s; // x,y+1
-
- index[count++] = i + size_s; // x,y+1
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s + 1; // x+1,y+1
- }
-
- // The other cut face
- index[count++] = s + t*size_s; // x,y
- index[count++] = 0 + t*size_s; // x+1,y
- index[count++] = s + (t+1)*size_s; // x,y+1
-
- index[count++] = s + (t+1)*size_s; // x,y+1
- index[count++] = 0 + t*size_s; // x+1,y
- index[count++] = 0 + (t+1)*size_s; // x+1,y+1
- }
-
- // Do the top and bottom caps, if necessary
- if (path_open)
- {
- // Top cap
- S32 pt1 = 0;
- S32 pt2 = size_s-1;
- S32 i = (size_t - 1)*size_s;
-
- while (pt2 - pt1 > 1)
- {
- // Use the profile points instead of the mesh, since you want
- // the un-transformed profile distances.
- LLVector3 p1 = getProfile().mProfile[pt1];
- LLVector3 p2 = getProfile().mProfile[pt2];
- LLVector3 pa = getProfile().mProfile[pt1+1];
- LLVector3 pb = getProfile().mProfile[pt2-1];
-
- p1.mV[VZ] = 0.f;
- p2.mV[VZ] = 0.f;
- pa.mV[VZ] = 0.f;
- pb.mV[VZ] = 0.f;
-
- // Use area of triangle to determine backfacing
- F32 area_1a2, area_1ba, area_21b, area_2ab;
- area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
- (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
- (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
- area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
- (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
- area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
- (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
- (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- BOOL use_tri1a2 = TRUE;
- BOOL tri_1a2 = TRUE;
- BOOL tri_21b = TRUE;
-
- if (area_1a2 < 0)
- {
- tri_1a2 = FALSE;
- }
- if (area_2ab < 0)
- {
- // Can't use, because it contains point b
- tri_1a2 = FALSE;
- }
- if (area_21b < 0)
- {
- tri_21b = FALSE;
- }
- if (area_1ba < 0)
- {
- // Can't use, because it contains point b
- tri_21b = FALSE;
- }
-
- if (!tri_1a2)
- {
- use_tri1a2 = FALSE;
- }
- else if (!tri_21b)
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- LLVector3 d1 = p1 - pa;
- LLVector3 d2 = p2 - pb;
-
- if (d1.magVecSquared() < d2.magVecSquared())
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- use_tri1a2 = FALSE;
- }
- }
-
- if (use_tri1a2)
- {
- index[count++] = pt1 + i;
- index[count++] = pt1 + 1 + i;
- index[count++] = pt2 + i;
- pt1++;
- }
- else
- {
- index[count++] = pt1 + i;
- index[count++] = pt2 - 1 + i;
- index[count++] = pt2 + i;
- pt2--;
- }
- }
-
- // Bottom cap
- pt1 = 0;
- pt2 = size_s-1;
- while (pt2 - pt1 > 1)
- {
- // Use the profile points instead of the mesh, since you want
- // the un-transformed profile distances.
- LLVector3 p1 = getProfile().mProfile[pt1];
- LLVector3 p2 = getProfile().mProfile[pt2];
- LLVector3 pa = getProfile().mProfile[pt1+1];
- LLVector3 pb = getProfile().mProfile[pt2-1];
-
- p1.mV[VZ] = 0.f;
- p2.mV[VZ] = 0.f;
- pa.mV[VZ] = 0.f;
- pb.mV[VZ] = 0.f;
-
- // Use area of triangle to determine backfacing
- F32 area_1a2, area_1ba, area_21b, area_2ab;
- area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
- (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
- (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
- area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
- (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
- area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
- (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
- (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- BOOL use_tri1a2 = TRUE;
- BOOL tri_1a2 = TRUE;
- BOOL tri_21b = TRUE;
-
- if (area_1a2 < 0)
- {
- tri_1a2 = FALSE;
- }
- if (area_2ab < 0)
- {
- // Can't use, because it contains point b
- tri_1a2 = FALSE;
- }
- if (area_21b < 0)
- {
- tri_21b = FALSE;
- }
- if (area_1ba < 0)
- {
- // Can't use, because it contains point b
- tri_21b = FALSE;
- }
-
- if (!tri_1a2)
- {
- use_tri1a2 = FALSE;
- }
- else if (!tri_21b)
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- LLVector3 d1 = p1 - pa;
- LLVector3 d2 = p2 - pb;
-
- if (d1.magVecSquared() < d2.magVecSquared())
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- use_tri1a2 = FALSE;
- }
- }
-
- if (use_tri1a2)
- {
- index[count++] = pt1;
- index[count++] = pt2;
- index[count++] = pt1 + 1;
- pt1++;
- }
- else
- {
- index[count++] = pt1;
- index[count++] = pt2;
- index[count++] = pt2 - 1;
- pt2--;
- }
- }
- }
- }
- else
- {
- // Open solid
-
- for (t = 0; t < size_t - 1; t++)
- {
- // Outer face + 1 cut face
- for (s = 0; s < size_s - 1; s++)
- {
- i = s + t*size_s;
-
- index[count++] = i; // x,y
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s; // x,y+1
-
- index[count++] = i + size_s; // x,y+1
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s + 1; // x+1,y+1
- }
-
- // The other cut face
- index[count++] = (size_s - 1) + (t*size_s); // x,y
- index[count++] = 0 + t*size_s; // x+1,y
- index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1
-
- index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1
- index[count++] = 0 + (t*size_s); // x+1,y
- index[count++] = 0 + (t+1)*size_s; // x+1,y+1
- }
-
- // Do the top and bottom caps, if necessary
- if (path_open)
- {
- for (s = 0; s < size_s - 2; s++)
- {
- index[count++] = s+1;
- index[count++] = s;
- index[count++] = size_s - 1;
- }
-
- // We've got a top cap
- S32 offset = (size_t - 1)*size_s;
- for (s = 0; s < size_s - 2; s++)
- {
- // Inverted ordering from bottom cap.
- index[count++] = offset + size_s - 1;
- index[count++] = offset + s;
- index[count++] = offset + s + 1;
- }
- }
- }
- }
- else if (hollow)
- {
- // Closed hollow
- // Outer face
-
- for (t = 0; t < size_t - 1; t++)
- {
- for (s = 0; s < size_s_out - 1; s++)
- {
- i = s + t*size_s;
-
- index[count++] = i; // x,y
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s; // x,y+1
-
- index[count++] = i + size_s; // x,y+1
- index[count++] = i + 1; // x+1,y
- index[count++] = i + 1 + size_s; // x+1,y+1
- }
- }
-
- // Inner face
- // Invert facing from outer face
- for (t = 0; t < size_t - 1; t++)
- {
- for (s = size_s_out; s < size_s - 1; s++)
- {
- i = s + t*size_s;
-
- index[count++] = i; // x,y
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s; // x,y+1
-
- index[count++] = i + size_s; // x,y+1
- index[count++] = i + 1; // x+1,y
- index[count++] = i + 1 + size_s; // x+1,y+1
- }
- }
-
- // Do the top and bottom caps, if necessary
- if (path_open)
- {
- // Top cap
- S32 pt1 = 0;
- S32 pt2 = size_s-1;
- S32 i = (size_t - 1)*size_s;
-
- while (pt2 - pt1 > 1)
- {
- // Use the profile points instead of the mesh, since you want
- // the un-transformed profile distances.
- LLVector3 p1 = getProfile().mProfile[pt1];
- LLVector3 p2 = getProfile().mProfile[pt2];
- LLVector3 pa = getProfile().mProfile[pt1+1];
- LLVector3 pb = getProfile().mProfile[pt2-1];
-
- p1.mV[VZ] = 0.f;
- p2.mV[VZ] = 0.f;
- pa.mV[VZ] = 0.f;
- pb.mV[VZ] = 0.f;
-
- // Use area of triangle to determine backfacing
- F32 area_1a2, area_1ba, area_21b, area_2ab;
- area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
- (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
- (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
- area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
- (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
- area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
- (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
- (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- BOOL use_tri1a2 = TRUE;
- BOOL tri_1a2 = TRUE;
- BOOL tri_21b = TRUE;
-
- if (area_1a2 < 0)
- {
- tri_1a2 = FALSE;
- }
- if (area_2ab < 0)
- {
- // Can't use, because it contains point b
- tri_1a2 = FALSE;
- }
- if (area_21b < 0)
- {
- tri_21b = FALSE;
- }
- if (area_1ba < 0)
- {
- // Can't use, because it contains point b
- tri_21b = FALSE;
- }
-
- if (!tri_1a2)
- {
- use_tri1a2 = FALSE;
- }
- else if (!tri_21b)
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- LLVector3 d1 = p1 - pa;
- LLVector3 d2 = p2 - pb;
-
- if (d1.magVecSquared() < d2.magVecSquared())
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- use_tri1a2 = FALSE;
- }
- }
-
- if (use_tri1a2)
- {
- index[count++] = pt1 + i;
- index[count++] = pt1 + 1 + i;
- index[count++] = pt2 + i;
- pt1++;
- }
- else
- {
- index[count++] = pt1 + i;
- index[count++] = pt2 - 1 + i;
- index[count++] = pt2 + i;
- pt2--;
- }
- }
-
- // Bottom cap
- pt1 = 0;
- pt2 = size_s-1;
- while (pt2 - pt1 > 1)
- {
- // Use the profile points instead of the mesh, since you want
- // the un-transformed profile distances.
- LLVector3 p1 = getProfile().mProfile[pt1];
- LLVector3 p2 = getProfile().mProfile[pt2];
- LLVector3 pa = getProfile().mProfile[pt1+1];
- LLVector3 pb = getProfile().mProfile[pt2-1];
-
- p1.mV[VZ] = 0.f;
- p2.mV[VZ] = 0.f;
- pa.mV[VZ] = 0.f;
- pb.mV[VZ] = 0.f;
-
- // Use area of triangle to determine backfacing
- F32 area_1a2, area_1ba, area_21b, area_2ab;
- area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
- (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
- (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
-
- area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
- (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
-
- area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
- (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
- (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
-
- BOOL use_tri1a2 = TRUE;
- BOOL tri_1a2 = TRUE;
- BOOL tri_21b = TRUE;
-
- if (area_1a2 < 0)
- {
- tri_1a2 = FALSE;
- }
- if (area_2ab < 0)
- {
- // Can't use, because it contains point b
- tri_1a2 = FALSE;
- }
- if (area_21b < 0)
- {
- tri_21b = FALSE;
- }
- if (area_1ba < 0)
- {
- // Can't use, because it contains point b
- tri_21b = FALSE;
- }
-
- if (!tri_1a2)
- {
- use_tri1a2 = FALSE;
- }
- else if (!tri_21b)
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- LLVector3 d1 = p1 - pa;
- LLVector3 d2 = p2 - pb;
-
- if (d1.magVecSquared() < d2.magVecSquared())
- {
- use_tri1a2 = TRUE;
- }
- else
- {
- use_tri1a2 = FALSE;
- }
- }
-
- if (use_tri1a2)
- {
- index[count++] = pt1;
- index[count++] = pt2;
- index[count++] = pt1 + 1;
- pt1++;
- }
- else
- {
- index[count++] = pt1;
- index[count++] = pt2;
- index[count++] = pt2 - 1;
- pt2--;
- }
- }
- }
- }
- else
- {
- // Closed solid. Easy case.
- for (t = 0; t < size_t - 1; t++)
- {
- for (s = 0; s < size_s - 1; s++)
- {
- // Should wrap properly, but for now...
- i = s + t*size_s;
-
- index[count++] = i; // x,y
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s; // x,y+1
-
- index[count++] = i + size_s; // x,y+1
- index[count++] = i + 1; // x+1,y
- index[count++] = i + size_s + 1; // x+1,y+1
- }
- }
-
- // Do the top and bottom caps, if necessary
- if (path_open)
- {
- // bottom cap
- for (s = 1; s < size_s - 2; s++)
- {
- index[count++] = s+1;
- index[count++] = s;
- index[count++] = 0;
- }
-
- // top cap
- S32 offset = (size_t - 1)*size_s;
- for (s = 1; s < size_s - 2; s++)
- {
- // Inverted ordering from bottom cap.
- index[count++] = offset;
- index[count++] = offset + s;
- index[count++] = offset + s + 1;
- }
- }
- }
-
-#ifdef LL_DEBUG
- // assert that we computed the correct number of indices
- if (count != expected_num_triangle_indices )
- {
- llerrs << "bad index count prediciton:"
- << " expected=" << expected_num_triangle_indices
- << " actual=" << count << llendl;
- }
-#endif
-
-#if 0
- // verify that each index does not point beyond the size of the mesh
- S32 num_vertices = mMesh.size();
- for (i = 0; i < count; i+=3)
- {
- llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl;
- llassert(index[i] < num_vertices);
- llassert(index[i+1] < num_vertices);
- llassert(index[i+2] < num_vertices);
- }
-#endif
-
- num_indices = count;
- return index;
-}
-
void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts)
{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the
//supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost
@@ -4198,63 +3610,6 @@ void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts)
}
}
-S32 LLVolume::getNumTriangleIndices() const
-{
- BOOL profile_open = getProfile().isOpen();
- BOOL hollow = (mParams.getProfileParams().getHollow() > 0);
- BOOL path_open = getPath().isOpen();
-
- S32 size_s, size_s_out, size_t;
- size_s = getProfile().getTotal();
- size_s_out = getProfile().getTotalOut();
- size_t = getPath().mPath.size();
-
- S32 count = 0;
- if (profile_open) /* Flawfinder: ignore */
- {
- if (hollow)
- {
- // Open hollow -- much like the closed solid, except we
- // we need to stitch up the gap between s=0 and s=size_s-1
- count = (size_t - 1) * (((size_s -1) * 6) + 6);
- }
- else
- {
- count = (size_t - 1) * (((size_s -1) * 6) + 6);
- }
- }
- else if (hollow)
- {
- // Closed hollow
- // Outer face
- count = (size_t - 1) * (size_s_out - 1) * 6;
-
- // Inner face
- count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6;
- }
- else
- {
- // Closed solid. Easy case.
- count = (size_t - 1) * (size_s - 1) * 6;
- }
-
- if (path_open)
- {
- S32 cap_triangle_count = size_s - 3;
- if ( profile_open
- || hollow )
- {
- cap_triangle_count = size_s - 2;
- }
- if ( cap_triangle_count > 0 )
- {
- // top and bottom caps
- count += cap_triangle_count * 2 * 3;
- }
- }
- return count;
-}
-
S32 LLVolume::getNumTriangles(S32* vcount) const
{
@@ -5243,8 +4598,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
freeData();
- LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a));
-
resizeVertices(src.mNumVertices);
resizeIndices(src.mNumIndices);
@@ -5307,7 +4660,7 @@ LLVolumeFace::~LLVolumeFace()
void LLVolumeFace::freeData()
{
- ll_aligned_free_16(mPositions);
+ ll_aligned_free(mPositions);
mPositions = NULL;
//normals and texture coordinates are part of the same buffer as mPositions, do not free them separately
@@ -5331,52 +4684,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)
delete mOctree;
mOctree = NULL;
+ LL_CHECK_MEMORY
BOOL ret = FALSE ;
if (mTypeMask & CAP_MASK)
{
ret = createCap(volume, partial_build);
+ LL_CHECK_MEMORY
}
else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK))
{
ret = createSide(volume, partial_build);
+ LL_CHECK_MEMORY
}
else
{
llerrs << "Unknown/uninitialized face type!" << llendl;
}
- //update the range of the texture coordinates
- if(ret)
- {
- mTexCoordExtents[0].setVec(1.f, 1.f) ;
- mTexCoordExtents[1].setVec(0.f, 0.f) ;
-
- for(U32 i = 0 ; i < mNumVertices ; i++)
- {
- if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0])
- {
- mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ;
- }
- if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0])
- {
- mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ;
- }
-
- if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1])
- {
- mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ;
- }
- if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1])
- {
- mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ;
- }
- }
- mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ;
- mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ;
- mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ;
- mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ;
- }
-
return ret ;
}
@@ -6068,8 +5392,10 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0,
BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
{
- const std::vector<LLVolume::Point>& mesh = volume->getMesh();
- const std::vector<LLVector3>& profile = volume->getProfile().mProfile;
+ LL_CHECK_MEMORY
+
+ const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh();
+ const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
S32 max_s = volume->getProfile().getTotal();
S32 max_t = volume->getPath().mPath.size();
@@ -6099,9 +5425,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
VertexData baseVert;
for(S32 t = 0; t < 4; t++)
{
- corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
- corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f;
- corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1];
+ corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr());
+ corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f;
+ corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1];
}
{
@@ -6182,6 +5508,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
mCenter->mul(0.5f);
}
+ llassert(less_than_max_mag(mExtents[0]));
+ llassert(less_than_max_mag(mExtents[1]));
+
if (!partial_build)
{
resizeIndices(grid_size*grid_size*6);
@@ -6212,6 +5541,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
}
}
+ LL_CHECK_MEMORY
return TRUE;
}
@@ -6230,8 +5560,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
S32 num_vertices = 0, num_indices = 0;
- const std::vector<LLVolume::Point>& mesh = volume->getMesh();
- const std::vector<LLVector3>& profile = volume->getProfile().mProfile;
+ const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh();
+ const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
// All types of caps have the same number of vertices and indices
num_vertices = profile.size();
@@ -6251,13 +5581,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
{
resizeVertices(num_vertices);
allocateBinormals(num_vertices);
-
if (!partial_build)
{
resizeIndices(num_indices);
}
}
+ LL_CHECK_MEMORY;
+
S32 max_s = volume->getProfile().getTotal();
S32 max_t = volume->getPath().mPath.size();
@@ -6288,35 +5619,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
LLVector4a* binorm = (LLVector4a*) mBinormals;
// Copy the vertices into the array
- for (S32 i = 0; i < num_vertices; i++)
+
+ const LLVector4a* src = mesh.mArray+offset;
+ const LLVector4a* end = src+num_vertices;
+
+ min = *src;
+ max = min;
+
+
+ const LLVector4a* p = profile.mArray;
+
+ if (mTypeMask & TOP_MASK)
{
- if (mTypeMask & TOP_MASK)
- {
- tc[i].mV[0] = profile[i].mV[0]+0.5f;
- tc[i].mV[1] = profile[i].mV[1]+0.5f;
- }
- else
+ min_uv.set((*p)[0]+0.5f,
+ (*p)[1]+0.5f);
+
+ max_uv = min_uv;
+
+ while(src < end)
{
- // Mirror for underside.
- tc[i].mV[0] = profile[i].mV[0]+0.5f;
- tc[i].mV[1] = 0.5f - profile[i].mV[1];
- }
+ tc->mV[0] = (*p)[0]+0.5f;
+ tc->mV[1] = (*p)[1]+0.5f;
- pos[i].load3(mesh[i + offset].mPos.mV);
+ llassert(less_than_max_mag(*src));
+ update_min_max(min,max,*src);
+ update_min_max(min_uv, max_uv, *tc);
- if (i == 0)
- {
- max = pos[i];
- min = max;
- min_uv = max_uv = tc[i];
+ *pos = *src;
+
+ ++p;
+ ++tc;
+ ++src;
+ ++pos;
}
- else
+ }
+ else
+ {
+
+ min_uv.set((*p)[0]+0.5f,
+ 0.5f - (*p)[1]);
+ max_uv = min_uv;
+
+ while(src < end)
{
- update_min_max(min,max,pos[i]);
- update_min_max(min_uv, max_uv, tc[i]);
+ // Mirror for underside.
+ tc->mV[0] = (*p)[0]+0.5f;
+ tc->mV[1] = 0.5f - (*p)[1];
+
+ llassert(less_than_max_mag(*src));
+ update_min_max(min,max,*src);
+ update_min_max(min_uv, max_uv, *tc);
+
+ *pos = *src;
+
+ ++p;
+ ++tc;
+ ++src;
+ ++pos;
}
}
+ LL_CHECK_MEMORY
+
mCenter->setAdd(min, max);
mCenter->mul(0.5f);
@@ -6353,15 +5717,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))
{
- pos[num_vertices] = *mCenter;
- tc[num_vertices] = cuv;
+ *pos++ = *mCenter;
+ *tc++ = cuv;
num_vertices++;
}
-
- for (S32 i = 0; i < num_vertices; i++)
+
+ LL_CHECK_MEMORY
+
+ F32* dst_binorm = (F32*) binorm;
+ F32* end_binorm = (F32*) (binorm+num_vertices);
+
+ F32* dst_norm = (F32*) norm;
+
+ while (dst_binorm < end_binorm)
{
- binorm[i].load4a(binormal.getF32ptr());
- norm[i].load4a(normal.getF32ptr());
+ binormal.store4a(dst_binorm);
+ normal.store4a(dst_norm);
+
+ dst_binorm += 4;
+ dst_norm += 4;
}
if (partial_build)
@@ -6382,33 +5756,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
{
// Use the profile points instead of the mesh, since you want
// the un-transformed profile distances.
- LLVector3 p1 = profile[pt1];
- LLVector3 p2 = profile[pt2];
- LLVector3 pa = profile[pt1+1];
- LLVector3 pb = profile[pt2-1];
+ const LLVector4a& p1 = profile[pt1];
+ const LLVector4a& p2 = profile[pt2];
+ const LLVector4a& pa = profile[pt1+1];
+ const LLVector4a& pb = profile[pt2-1];
+
+ const F32* p1V = p1.getF32ptr();
+ const F32* p2V = p2.getF32ptr();
+ const F32* paV = pa.getF32ptr();
+ const F32* pbV = pb.getF32ptr();
- p1.mV[VZ] = 0.f;
- p2.mV[VZ] = 0.f;
- pa.mV[VZ] = 0.f;
- pb.mV[VZ] = 0.f;
+ //p1.mV[VZ] = 0.f;
+ //p2.mV[VZ] = 0.f;
+ //pa.mV[VZ] = 0.f;
+ //pb.mV[VZ] = 0.f;
// Use area of triangle to determine backfacing
F32 area_1a2, area_1ba, area_21b, area_2ab;
- area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
- (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
- (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
+ area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) +
+ (paV[0]*p2V[1] - p2V[0]*paV[1]) +
+ (p2V[0]*p1V[1] - p1V[0]*p2V[1]);
- area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
- (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
+ area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+ (pbV[0]*paV[1] - paV[0]*pbV[1]) +
+ (paV[0]*p1V[1] - p1V[0]*paV[1]);
- area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
- (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+ area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) +
+ (p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+ (pbV[0]*p2V[1] - p2V[0]*pbV[1]);
- area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
- (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+ area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) +
+ (paV[0]*pbV[1] - pbV[0]*paV[1]) +
+ (pbV[0]*p2V[1] - p2V[0]*pbV[1]);
BOOL use_tri1a2 = TRUE;
BOOL tri_1a2 = TRUE;
@@ -6443,10 +5822,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
}
else
{
- LLVector3 d1 = p1 - pa;
- LLVector3 d2 = p2 - pb;
+ LLVector4a d1;
+ d1.setSub(p1, pa);
+
+ LLVector4a d2;
+ d2.setSub(p2, pb);
- if (d1.magVecSquared() < d2.magVecSquared())
+ if (d1.dot3(d1) < d2.dot3(d2))
{
use_tri1a2 = TRUE;
}
@@ -6485,33 +5867,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
{
// Use the profile points instead of the mesh, since you want
// the un-transformed profile distances.
- LLVector3 p1 = profile[pt1];
- LLVector3 p2 = profile[pt2];
- LLVector3 pa = profile[pt1+1];
- LLVector3 pb = profile[pt2-1];
-
- p1.mV[VZ] = 0.f;
- p2.mV[VZ] = 0.f;
- pa.mV[VZ] = 0.f;
- pb.mV[VZ] = 0.f;
-
+ const LLVector4a& p1 = profile[pt1];
+ const LLVector4a& p2 = profile[pt2];
+ const LLVector4a& pa = profile[pt1+1];
+ const LLVector4a& pb = profile[pt2-1];
+
+ const F32* p1V = p1.getF32ptr();
+ const F32* p2V = p2.getF32ptr();
+ const F32* paV = pa.getF32ptr();
+ const F32* pbV = pb.getF32ptr();
+
// Use area of triangle to determine backfacing
F32 area_1a2, area_1ba, area_21b, area_2ab;
- area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) +
- (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) +
- (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]);
+ area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) +
+ (paV[0]*p2V[1] - p2V[0]*paV[1]) +
+ (p2V[0]*p1V[1] - p1V[0]*p2V[1]);
- area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) +
- (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]);
+ area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+ (pbV[0]*paV[1] - paV[0]*pbV[1]) +
+ (paV[0]*p1V[1] - p1V[0]*paV[1]);
- area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) +
- (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+ area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) +
+ (p1V[0]*pbV[1] - pbV[0]*p1V[1]) +
+ (pbV[0]*p2V[1] - p2V[0]*pbV[1]);
- area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) +
- (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) +
- (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]);
+ area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) +
+ (paV[0]*pbV[1] - pbV[0]*paV[1]) +
+ (pbV[0]*p2V[1] - p2V[0]*pbV[1]);
BOOL use_tri1a2 = TRUE;
BOOL tri_1a2 = TRUE;
@@ -6546,10 +5928,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
}
else
{
- LLVector3 d1 = p1 - pa;
- LLVector3 d2 = p2 - pb;
+ LLVector4a d1;
+ d1.setSub(p1,pa);
+ LLVector4a d2;
+ d2.setSub(p2,pb);
- if (d1.magVecSquared() < d2.magVecSquared())
+ if (d1.dot3(d1) < d2.dot3(d2))
{
use_tri1a2 = TRUE;
}
@@ -6598,6 +5982,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
}
+
+ LL_CHECK_MEMORY
return TRUE;
}
@@ -6900,6 +6286,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
{
+ LL_CHECK_MEMORY
BOOL flat = mTypeMask & FLAT_MASK;
U8 sculpt_type = volume->getParams().getSculptType();
@@ -6910,9 +6297,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
S32 num_vertices, num_indices;
- const std::vector<LLVolume::Point>& mesh = volume->getMesh();
- const std::vector<LLVector3>& profile = volume->getProfile().mProfile;
- const std::vector<LLPath::PathPt>& path_data = volume->getPath().mPath;
+ const LLAlignedArray<LLVector4a,64>& mesh = volume->getMesh();
+ const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
+ const LLAlignedArray<LLPath::PathPt,64>& path_data = volume->getPath().mPath;
S32 max_s = volume->getProfile().getTotal();
@@ -6933,10 +6320,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
}
}
+ LL_CHECK_MEMORY
+
LLVector4a* pos = (LLVector4a*) mPositions;
- LLVector4a* norm = (LLVector4a*) mNormals;
LLVector2* tc = (LLVector2*) mTexCoords;
- F32 begin_stex = floorf(profile[mBeginS].mV[2]);
+ F32 begin_stex = floorf(profile[mBeginS][2]);
S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS;
S32 cur_vertex = 0;
@@ -6965,11 +6353,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
// Get s value for tex-coord.
if (!flat)
{
- ss = profile[mBeginS + s].mV[2];
+ ss = profile[mBeginS + s][2];
}
else
{
- ss = profile[mBeginS + s].mV[2] - begin_stex;
+ ss = profile[mBeginS + s][2] - begin_stex;
}
}
@@ -6989,19 +6377,17 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
i = mBeginS + s + max_s*t;
}
- pos[cur_vertex].load3(mesh[i].mPos.mV);
- tc[cur_vertex] = LLVector2(ss,tt);
+ llassert(less_than_max_mag(mesh[i]));
+ mesh[i].store4a((F32*)(pos+cur_vertex));
+ tc[cur_vertex].set(ss,tt);
- norm[cur_vertex].clear();
cur_vertex++;
if (test && s > 0)
{
- pos[cur_vertex].load3(mesh[i].mPos.mV);
- tc[cur_vertex] = LLVector2(ss,tt);
-
- norm[cur_vertex].clear();
-
+ llassert(less_than_max_mag(mesh[i]));
+ mesh[i].store4a((F32*)(pos+cur_vertex));
+ tc[cur_vertex].set(ss,tt);
cur_vertex++;
}
}
@@ -7018,28 +6404,66 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
}
i = mBeginS + s + max_s*t;
- ss = profile[mBeginS + s].mV[2] - begin_stex;
- pos[cur_vertex].load3(mesh[i].mPos.mV);
- tc[cur_vertex] = LLVector2(ss,tt);
- norm[cur_vertex].clear();
-
+ ss = profile[mBeginS + s][2] - begin_stex;
+
+ llassert(less_than_max_mag(mesh[i]));
+ mesh[i].store4a((F32*)(pos+cur_vertex));
+ tc[cur_vertex].set(ss,tt);
+
cur_vertex++;
}
}
+ LL_CHECK_MEMORY
- //get bounding box for this side
- LLVector4a& face_min = mExtents[0];
- LLVector4a& face_max = mExtents[1];
+
mCenter->clear();
- face_min = face_max = pos[0];
+ LLVector4a* cur_pos = pos;
+ LLVector4a* end_pos = pos + mNumVertices;
- for (U32 i = 1; i < mNumVertices; ++i)
+ //get bounding box for this side
+ LLVector4a face_min;
+ LLVector4a face_max;
+
+ face_min = face_max = *cur_pos++;
+
+ while (cur_pos < end_pos)
{
- update_min_max(face_min, face_max, pos[i]);
+ update_min_max(face_min, face_max, *cur_pos++);
+ }
+
+ mExtents[0] = face_min;
+ mExtents[1] = face_max;
+
+ U32 tc_count = mNumVertices;
+ if (tc_count%2 == 1)
+ { //odd number of texture coordinates, duplicate last entry to padded end of array
+ tc_count++;
+ mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1];
}
+ LLVector4a* cur_tc = (LLVector4a*) mTexCoords;
+ LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count);
+
+ LLVector4a tc_min;
+ LLVector4a tc_max;
+
+ tc_min = tc_max = *cur_tc++;
+
+ while (cur_tc < end_tc)
+ {
+ update_min_max(tc_min, tc_max, *cur_tc++);
+ }
+
+ F32* minp = tc_min.getF32ptr();
+ F32* maxp = tc_max.getF32ptr();
+
+ mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]);
+ mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]);
+ mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]);
+ mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]);
+
mCenter->setAdd(face_min, face_max);
mCenter->mul(0.5f);
@@ -7104,33 +6528,94 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
}
}
+ LL_CHECK_MEMORY
+
//clear normals
- for (U32 i = 0; i < mNumVertices; i++)
+ F32* dst = (F32*) mNormals;
+ F32* end = (F32*) (mNormals+mNumVertices);
+ LLVector4a zero = LLVector4a::getZero();
+
+ while (dst < end)
{
- mNormals[i].clear();
+ zero.store4a(dst);
+ dst += 4;
}
+ LL_CHECK_MEMORY
+
//generate normals
U32 count = mNumIndices/3;
- for (U32 i = 0; i < count; i++) //for each triangle
+ LLVector4a* norm = mNormals;
+
+ static LLAlignedArray<LLVector4a, 64> triangle_normals;
+ triangle_normals.resize(count);
+ LLVector4a* output = triangle_normals.mArray;
+ LLVector4a* end_output = output+count;
+
+ U16* idx = mIndices;
+
+ while (output < end_output)
{
- const U16* idx = &(mIndices[i*3]);
-
- LLVector4a& v0 = *(pos+idx[0]);
- LLVector4a& v1 = *(pos+idx[1]);
- LLVector4a& v2 = *(pos+idx[2]);
-
- LLVector4a& n0 = *(norm+idx[0]);
- LLVector4a& n1 = *(norm+idx[1]);
- LLVector4a& n2 = *(norm+idx[2]);
+ LLVector4a b,v1,v2;
+ b.load4a((F32*) (pos+idx[0]));
+ v1.load4a((F32*) (pos+idx[1]));
+ v2.load4a((F32*) (pos+idx[2]));
//calculate triangle normal
- LLVector4a a, b, c;
+ LLVector4a a;
- a.setSub(v0, v1);
- b.setSub(v0, v2);
- c.setCross3(a,b);
+ a.setSub(b, v1);
+ b.sub(v2);
+
+
+ LLQuad& vector1 = *((LLQuad*) &v1);
+ LLQuad& vector2 = *((LLQuad*) &v2);
+
+ LLQuad& amQ = *((LLQuad*) &a);
+ LLQuad& bmQ = *((LLQuad*) &b);
+
+ //v1.setCross3(t,v0);
+ //setCross3(const LLVector4a& a, const LLVector4a& b)
+ // Vectors are stored in memory in w, z, y, x order from high to low
+ // Set vector1 = { a[W], a[X], a[Z], a[Y] }
+ vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 ));
+ // Set vector2 = { b[W], b[Y], b[X], b[Z] }
+ vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 ));
+ // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] }
+ vector2 = _mm_mul_ps( vector1, vector2 );
+ // vector3 = { a[W], a[Y], a[X], a[Z] }
+ amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 ));
+ // vector4 = { b[W], b[X], b[Z], b[Y] }
+ bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 ));
+ // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] }
+ vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ ));
+
+ v1.store4a((F32*) output);
+
+ output++;
+ idx += 3;
+ }
+
+ idx = mIndices;
+
+ LLVector4a* src = triangle_normals.mArray;
+
+ for (U32 i = 0; i < count; i++) //for each triangle
+ {
+ LLVector4a c;
+ c.load4a((F32*) (src++));
+
+ LLVector4a* n0p = norm+idx[0];
+ LLVector4a* n1p = norm+idx[1];
+ LLVector4a* n2p = norm+idx[2];
+
+ idx += 3;
+
+ LLVector4a n0,n1,n2;
+ n0.load4a((F32*) n0p);
+ n1.load4a((F32*) n1p);
+ n2.load4a((F32*) n2p);
n0.add(c);
n1.add(c);
@@ -7143,8 +6628,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
case 1: n1.add(c); break;
case 2: n2.add(c); break;
};
+
+ n0.store4a((F32*) n0p);
+ n1.store4a((F32*) n1p);
+ n2.store4a((F32*) n2p);
}
+ LL_CHECK_MEMORY
+
// adjust normals based on wrapping and stitching
LLVector4a top;
@@ -7276,6 +6767,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
}
+ LL_CHECK_MEMORY
+
return TRUE;
}
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 1d3b0fe52f..5e43af92ec 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -37,7 +37,6 @@ class LLPath;
template <class T> class LLOctreeNode;
-class LLVector4a;
class LLVolumeFace;
class LLVolume;
class LLVolumeTriangle;
@@ -50,6 +49,8 @@ class LLVolumeTriangle;
#include "v3math.h"
#include "v3dmath.h"
#include "v4math.h"
+#include "llvector4a.h"
+#include "llmatrix4a.h"
#include "llquaternion.h"
#include "llstrider.h"
#include "v4coloru.h"
@@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128;
const S32 LL_SCULPT_MESH_MAX_FACES = 8;
+template <class T, U32 alignment>
+class LLAlignedArray
+{
+public:
+ T* mArray;
+ U32 mElementCount;
+ U32 mCapacity;
+
+ LLAlignedArray();
+ ~LLAlignedArray();
+
+ void push_back(const T& elem);
+ U32 size() const { return mElementCount; }
+ void resize(U32 size);
+ T* append(S32 N);
+ T& operator[](int idx);
+ const T& operator[](int idx) const;
+};
+
+
class LLProfileParams
{
public:
@@ -708,16 +729,16 @@ public:
LLFaceID mFaceID;
};
- std::vector<LLVector3> mProfile;
- std::vector<LLVector2> mNormals;
+ LLAlignedArray<LLVector4a, 64> mProfile;
+ //LLAlignedArray<LLVector4a, 64> mNormals;
std::vector<Face> mFaces;
- std::vector<LLVector3> mEdgeNormals;
- std::vector<LLVector3> mEdgeCenters;
+
+ //LLAlignedArray<LLVector4a, 64> mEdgeNormals;
+ //LLAlignedArray<LLVector4a, 64> mEdgeCenters;
friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile);
protected:
- void genNormals(const LLProfileParams& params);
static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);
void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);
@@ -741,13 +762,29 @@ protected:
class LLPath
{
public:
- struct PathPt
+ class PathPt
{
- LLVector3 mPos;
- LLVector2 mScale;
- LLQuaternion mRot;
+ public:
+ LLMatrix4a mRot;
+ LLVector4a mPos;
+
+ LLVector4a mScale;
F32 mTexT;
- PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); }
+ F32 pad[3]; //for alignment
+ PathPt()
+ {
+ mPos.clear();
+ mTexT = 0;
+ mScale.clear();
+ mRot.setRows(LLVector4a(1,0,0,0),
+ LLVector4a(0,1,0,0),
+ LLVector4a(0,0,1,0));
+
+ //distinguished data in the pad for debugging
+ pad[0] = 3.14159f;
+ pad[1] = -3.14159f;
+ pad[2] = 0.585f;
+ }
};
public:
@@ -779,7 +816,7 @@ public:
friend std::ostream& operator<<(std::ostream &s, const LLPath &path);
public:
- std::vector<PathPt> mPath;
+ LLAlignedArray<PathPt, 64> mPath;
protected:
BOOL mOpen;
@@ -951,11 +988,7 @@ protected:
~LLVolume(); // use unref
public:
- struct Point
- {
- LLVector3 mPos;
- };
-
+
struct FaceParams
{
LLFaceID mFaceID;
@@ -978,8 +1011,8 @@ public:
const LLProfile& getProfile() const { return *mProfilep; }
LLPath& getPath() const { return *mPathp; }
void resizePath(S32 length);
- const std::vector<Point>& getMesh() const { return mMesh; }
- const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; }
+ const LLAlignedArray<LLVector4a,64>& getMesh() const { return mMesh; }
+ const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; }
void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); }
@@ -994,10 +1027,7 @@ public:
S32 getSculptLevel() const { return mSculptLevel; }
void setSculptLevel(S32 level) { mSculptLevel = level; }
- S32 *getTriangleIndices(U32 &num_indices) const;
-
- // returns number of triangle indeces required for path/profile mesh
- S32 getNumTriangleIndices() const;
+
static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts);
S32 getNumTriangles(S32* vcount = NULL) const;
@@ -1070,7 +1100,8 @@ public:
LLVolumeParams mParams;
LLPath *mPathp;
LLProfile *mProfilep;
- std::vector<Point> mMesh;
+ LLAlignedArray<LLVector4a,64> mMesh;
+
BOOL mGenerateSingleFace;
typedef std::vector<LLVolumeFace> face_list_t;