summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2010-05-25 03:55:01 -0500
committerDave Parks <davep@lindenlab.com>2010-05-25 03:55:01 -0500
commite6fe3b1f1aa888e4594c89154ef895b3cf5498e9 (patch)
treeaa40dffc20582b66ba7263a4215d71fd8c2297eb
parent051d55e9e417d1f70e4a0dcee0035f6e2a413792 (diff)
Better vectorization of various things. Turn off debug gl by default.
-rw-r--r--indra/llcommon/lldefs.h8
-rw-r--r--indra/llmath/llvolume.cpp383
-rw-r--r--indra/llmath/llvolume.h110
-rw-r--r--indra/newview/app_settings/settings.xml4
-rw-r--r--indra/newview/llface.cpp8
-rw-r--r--indra/newview/llfloaterimagepreview.cpp20
-rw-r--r--indra/newview/llhudicon.cpp43
-rw-r--r--indra/newview/llhudtext.cpp5
-rw-r--r--indra/newview/llviewercamera.cpp16
-rw-r--r--indra/newview/llvograss.cpp7
-rw-r--r--indra/newview/llvovolume.cpp2
11 files changed, 404 insertions, 202 deletions
diff --git a/indra/llcommon/lldefs.h b/indra/llcommon/lldefs.h
index f3b5ca361f..10e6cb34bf 100644
--- a/indra/llcommon/lldefs.h
+++ b/indra/llcommon/lldefs.h
@@ -242,5 +242,13 @@ inline LLDATATYPE llclampb(const LLDATATYPE& a)
return llmin(llmax(a, (LLDATATYPE)0), (LLDATATYPE)255);
}
+template <class LLDATATYPE>
+inline void llswap(LLDATATYPE& lhs, LLDATATYPE& rhs)
+{
+ LLDATATYPE tmp = lhs;
+ lhs = rhs;
+ rhs = tmp;
+}
+
#endif // LL_LLDEFS_H
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index f05e6eb9d9..d8fbc081fa 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -237,6 +237,21 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
return TRUE;
}
+//helper for non-aligned vectors
+BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
+ F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided)
+{
+ LLVector4a vert0a, vert1a, vert2a, origa, dira;
+ vert0a.load3(vert0.mV);
+ vert1a.load3(vert1.mV);
+ vert2a.load3(vert2.mV);
+ origa.load3(orig.mV);
+ dira.load3(dir.mV);
+
+ return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira,
+ intersection_a, intersection_b, intersection_t, two_sided);
+}
+
//-------------------------------------------------------------------
// statics
@@ -1889,15 +1904,15 @@ bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)co
bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const
{
bool retval = false;
- if (rhs.mPosition == mPosition && rhs.mTexCoord == mTexCoord)
+ if (rhs.mData[POSITION].equal3(mData[POSITION]) && rhs.mTexCoord == mTexCoord)
{
if (angle_cutoff > 1.f)
{
- retval = (mNormal == rhs.mNormal);
+ retval = (mData[NORMAL].equal3(rhs.mData[NORMAL]));
}
else
{
- F32 cur_angle = rhs.mNormal*mNormal;
+ F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]);
retval = cur_angle > angle_cutoff;
}
}
@@ -2081,9 +2096,9 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
min = max = LLVector3(0,0,0);
- F32* pos_out = face.mPositions;
- F32* norm_out = face.mNormals;
- F32* tc_out = face.mTexCoords;
+ F32* pos_out = (F32*) face.mPositions;
+ F32* norm_out = (F32*) face.mNormals;
+ F32* tc_out = (F32*) face.mTexCoords;
for (U32 j = 0; j < num_verts; ++j)
{
@@ -2188,13 +2203,15 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
void tetrahedron_set_normal(LLVolumeFace::VertexData* cv)
{
- LLVector3 nrm = (cv[1].mPosition-cv[0].mPosition)%(cv[2].mPosition-cv[0].mPosition);
-
- nrm.normVec();
-
- cv[0].mNormal = nrm;
- cv[1].mNormal = nrm;
- cv[2].mNormal = nrm;
+ LLVector4a v0;
+ v0.setSub(cv[1].getPosition(), cv[0].getNormal());
+ LLVector4a v1;
+ v1.setSub(cv[2].getNormal(), cv[0].getPosition());
+
+ cv[0].getNormal().setCross3(v0,v1);
+ cv[0].getNormal().normalize3fast();
+ cv[1].setNormal(cv[0].getNormal());
+ cv[2].setNormal(cv[1].getNormal());
}
BOOL LLVolume::isTetrahedron()
@@ -2209,12 +2226,12 @@ void LLVolume::makeTetrahedron()
LLVolumeFace face;
F32 x = 0.25f;
- LLVector3 p[] =
+ LLVector4a p[] =
{ //unit tetrahedron corners
- LLVector3(x,x,x),
- LLVector3(-x,-x,x),
- LLVector3(-x,x,-x),
- LLVector3(x,-x,-x)
+ LLVector4a(x,x,x),
+ LLVector4a(-x,-x,x),
+ LLVector4a(-x,x,-x),
+ LLVector4a(x,-x,-x)
};
face.mExtents[0].setVec(-x,-x,-x);
@@ -2229,9 +2246,9 @@ void LLVolume::makeTetrahedron()
//side 1
- cv[0].mPosition = p[1];
- cv[1].mPosition = p[0];
- cv[2].mPosition = p[2];
+ cv[0].setPosition(p[1]);
+ cv[1].setPosition(p[0]);
+ cv[2].setPosition(p[2]);
tetrahedron_set_normal(cv);
@@ -2242,14 +2259,14 @@ void LLVolume::makeTetrahedron()
LLVector4a* n = (LLVector4a*) face.mNormals;
LLVector2* tc = (LLVector2*) face.mTexCoords;
- v[0].load3(cv[0].mPosition.mV);
- v[1].load3(cv[1].mPosition.mV);
- v[2].load3(cv[2].mPosition.mV);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
v += 3;
- n[0].load3(cv[0].mNormal.mV);
- n[1].load3(cv[1].mNormal.mV);
- n[2].load3(cv[2].mNormal.mV);
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
n += 3;
tc[0] = cv[0].mTexCoord;
@@ -2259,20 +2276,20 @@ void LLVolume::makeTetrahedron()
//side 2
- cv[0].mPosition = p[3];
- cv[1].mPosition = p[0];
- cv[2].mPosition = p[1];
+ cv[0].setPosition(p[3]);
+ cv[1].setPosition(p[0]);
+ cv[2].setPosition(p[1]);
tetrahedron_set_normal(cv);
- v[0].load3(cv[0].mPosition.mV);
- v[1].load3(cv[1].mPosition.mV);
- v[2].load3(cv[2].mPosition.mV);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
v += 3;
- n[0].load3(cv[0].mNormal.mV);
- n[1].load3(cv[1].mNormal.mV);
- n[2].load3(cv[2].mNormal.mV);
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
n += 3;
tc[0] = cv[0].mTexCoord;
@@ -2281,20 +2298,20 @@ void LLVolume::makeTetrahedron()
tc += 3;
//side 3
- cv[0].mPosition = p[3];
- cv[1].mPosition = p[1];
- cv[2].mPosition = p[2];
+ cv[0].setPosition(p[3]);
+ cv[1].setPosition(p[1]);
+ cv[2].setPosition(p[2]);
tetrahedron_set_normal(cv);
- v[0].load3(cv[0].mPosition.mV);
- v[1].load3(cv[1].mPosition.mV);
- v[2].load3(cv[2].mPosition.mV);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
v += 3;
- n[0].load3(cv[0].mNormal.mV);
- n[1].load3(cv[1].mNormal.mV);
- n[2].load3(cv[2].mNormal.mV);
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
n += 3;
tc[0] = cv[0].mTexCoord;
@@ -2303,20 +2320,20 @@ void LLVolume::makeTetrahedron()
tc += 3;
//side 4
- cv[0].mPosition = p[2];
- cv[1].mPosition = p[0];
- cv[2].mPosition = p[3];
+ cv[0].setPosition(p[2]);
+ cv[1].setPosition(p[0]);
+ cv[2].setPosition(p[3]);
tetrahedron_set_normal(cv);
- v[0].load3(cv[0].mPosition.mV);
- v[1].load3(cv[1].mPosition.mV);
- v[2].load3(cv[2].mPosition.mV);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
v += 3;
- n[0].load3(cv[0].mNormal.mV);
- n[1].load3(cv[1].mNormal.mV);
- n[2].load3(cv[2].mNormal.mV);
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
n += 3;
tc[0] = cv[0].mTexCoord;
@@ -3974,9 +3991,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
S32 v3 = face.mIndices[j*3+2];
//get current face center
- LLVector3 cCenter = (face.mVertices[v1].mPosition +
- face.mVertices[v2].mPosition +
- face.mVertices[v3].mPosition) / 3.0f;
+ LLVector3 cCenter = (face.mVertices[v1].getPosition() +
+ face.mVertices[v2].getPosition() +
+ face.mVertices[v3].getPosition()) / 3.0f;
//for each edge
for (S32 k = 0; k < 3; k++) {
@@ -3994,9 +4011,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
v3 = face.mIndices[nIndex*3+2];
//get neighbor face center
- LLVector3 nCenter = (face.mVertices[v1].mPosition +
- face.mVertices[v2].mPosition +
- face.mVertices[v3].mPosition) / 3.0f;
+ LLVector3 nCenter = (face.mVertices[v1].getPosition() +
+ face.mVertices[v2].getPosition() +
+ face.mVertices[v3].getPosition()) / 3.0f;
//draw line
vertices.push_back(cCenter);
@@ -4020,14 +4037,14 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
//for each vertex
for (U32 j = 0; j < face.mNumVertices; j++) {
- vertices.push_back(face.mVertices[j].mPosition);
- vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mNormal*0.1f);
+ vertices.push_back(face.mVertices[j].getPosition());
+ vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].getNormal()*0.1f);
normals.push_back(LLVector3(0,0,1));
normals.push_back(LLVector3(0,0,1));
segments.push_back(vertices.size());
#if DEBUG_SILHOUETTE_BINORMALS
- vertices.push_back(face.mVertices[j].mPosition);
- vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mBinormal*0.1f);
+ vertices.push_back(face.mVertices[j].getPosition());
+ vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f);
normals.push_back(LLVector3(0,0,1));
normals.push_back(LLVector3(0,0,1));
segments.push_back(vertices.size());
@@ -5038,9 +5055,15 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)
}
}
+void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv)
+{
+ cv.setPosition(mPositions[index]);
+ cv.setNormal(mNormals[index]);
+ cv.mTexCoord = mTexCoords[index];
+}
+
void LLVolumeFace::optimize(F32 angle_cutoff)
{
-#if 0 //disabling until a vectorized version is available
LLVolumeFace new_face;
VertexMapData::PointMap point_map;
@@ -5050,10 +5073,11 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
{
U16 index = mIndices[i];
- LLVolumeFace::VertexData cv = mVertices[index];
-
+ LLVolumeFace::VertexData cv;
+ getVertexData(index, cv);
+
BOOL found = FALSE;
- VertexMapData::PointMap::iterator point_iter = point_map.find(cv.mPosition);
+ VertexMapData::PointMap::iterator point_iter = point_map.find(cv.getPosition());
if (point_iter != point_map.end())
{ //duplicate point might exist
for (U32 j = 0; j < point_iter->second.size(); ++j)
@@ -5062,7 +5086,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
if (tv.compareNormal(cv, angle_cutoff))
{
found = TRUE;
- new_face.mIndices.push_back((point_iter->second)[j].mIndex);
+ new_face.pushIndex((point_iter->second)[j].mIndex);
break;
}
}
@@ -5070,14 +5094,14 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
if (!found)
{
- new_face.mVertices.push_back(cv);
+ new_face.pushVertex(cv);
U16 index = (U16) new_face.mNumVertices-1;
- new_face.mIndices.push_back(index);
+ new_face.pushIndex(index);
VertexMapData d;
- d.mPosition = cv.mPosition;
+ d.setPosition(cv.getPosition());
d.mTexCoord = cv.mTexCoord;
- d.mNormal = cv.mNormal;
+ d.setNormal(cv.getNormal());
d.mIndex = index;
if (point_iter != point_map.end())
{
@@ -5085,14 +5109,23 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
}
else
{
- point_map[d.mPosition].push_back(d);
+ point_map[d.getPosition()].push_back(d);
}
}
}
- mVertices = new_face.mVertices;
- mIndices = new_face.mIndices;
-#endif
+ swapData(new_face);
+}
+
+void LLVolumeFace::swapData(LLVolumeFace& rhs)
+{
+ llswap(rhs.mPositions, mPositions);
+ llswap(rhs.mNormals, mNormals);
+ llswap(rhs.mBinormals, mBinormals);
+ llswap(rhs.mTexCoords, mTexCoords);
+ llswap(rhs.mIndices,mIndices);
+ llswap(rhs.mNumVertices, mNumVertices);
+ llswap(rhs.mNumIndices, mNumIndices);
}
void LerpPlanarVertex(LLVolumeFace::VertexData& v0,
@@ -5102,10 +5135,21 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0,
F32 coef01,
F32 coef02)
{
- vout.mPosition = v0.mPosition + ((v1.mPosition-v0.mPosition)*coef01)+((v2.mPosition-v0.mPosition)*coef02);
+
+ LLVector4a lhs;
+ lhs.setSub(v1.getPosition(), v0.getPosition());
+ lhs.mul(coef01);
+ LLVector4a rhs;
+ rhs.setSub(v2.getPosition(), v0.getPosition());
+ rhs.mul(coef02);
+
+ rhs.add(lhs);
+ rhs.add(v0.getPosition());
+
+ vout.setPosition(rhs);
+
vout.mTexCoord = v0.mTexCoord + ((v1.mTexCoord-v0.mTexCoord)*coef01)+((v2.mTexCoord-v0.mTexCoord)*coef02);
- vout.mNormal = v0.mNormal;
- vout.mBinormal = v0.mBinormal;
+ vout.setNormal(v0.getNormal());
}
BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
@@ -5137,16 +5181,22 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
VertexData corners[4];
VertexData baseVert;
for(int t = 0; t < 4; t++){
- corners[t].mPosition = mesh[offset + (grid_size*t)].mPos;
+ corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f;
corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1];
}
- baseVert.mNormal =
- ((corners[1].mPosition-corners[0].mPosition) %
- (corners[2].mPosition-corners[1].mPosition));
- baseVert.mNormal.normVec();
+
+ {
+ LLVector4a lhs;
+ lhs.setSub(corners[1].getPosition(), corners[0].getPosition());
+ LLVector4a rhs;
+ rhs.setSub(corners[2].getPosition(), corners[1].getPosition());
+ baseVert.getNormal().setCross3(lhs, rhs);
+ baseVert.getNormal().normalize3fast();
+ }
+
if(!(mTypeMask & TOP_MASK)){
- baseVert.mNormal *= -1.0f;
+ baseVert.getNormal().mul(-1.0f);
}else{
//Swap the UVs on the U(X) axis for top face
LLVector2 swap;
@@ -5161,9 +5211,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
LLVector4a binormal;
calc_binormal_from_triangle( binormal,
- corners[0].mPosition, corners[0].mTexCoord,
- corners[1].mPosition, corners[1].mTexCoord,
- corners[2].mPosition, corners[2].mTexCoord);
+ corners[0].getPosition(), corners[0].mTexCoord,
+ corners[1].getPosition(), corners[1].mTexCoord,
+ corners[2].getPosition(), corners[2].mTexCoord);
S32 size = (grid_size+1)*(grid_size+1);
resizeVertices(size);
@@ -5185,18 +5235,18 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
(F32)gx/(F32)grid_size,
(F32)gy/(F32)grid_size);
- (*pos++).load3(newVert.mPosition.mV);
- (*norm++).load3(baseVert.mNormal.mV);
- (*tc++) = newVert.mTexCoord;
- (*binorm++).load4a((F32*) &binormal.mQ);
+ *pos++ = newVert.getPosition();
+ *norm++ = baseVert.getNormal();
+ *tc++ = newVert.mTexCoord;
+ *binorm++ = binormal;
if (gx == 0 && gy == 0)
{
- min = max = newVert.mPosition;
+ min = max = LLVector3(newVert.getPosition().getF32());
}
else
{
- update_min_max(min,max,newVert.mPosition);
+ update_min_max(min,max,newVert.getPosition().getF32());
}
}
}
@@ -5343,18 +5393,19 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
mCenter = (min+max)*0.5f;
cuv = (min_uv + max_uv)*0.5f;
+ LLVector4a center;
+ center.load3(mCenter.mV);
+
LLVector4a binormal;
calc_binormal_from_triangle(binormal,
- mCenter, cuv,
- mesh[0+offset].mPos, tc[0],
- mesh[1+offset].mPos, tc[1]);
+ center, cuv,
+ pos[0], tc[0],
+ pos[1], tc[1]);
binormal.normalize3fast();
LLVector4a normal;
LLVector4a d0, d1;
- LLVector4a center;
-
- center.load3(mCenter.mV);
+
d0.setSub(center, pos[0]);
d1.setSub(center, pos[1]);
@@ -5371,7 +5422,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
normal.normalize3fast();
VertexData vd;
- vd.mPosition = mCenter;
+ vd.getPosition().load3(mCenter.mV);
vd.mTexCoord = cuv;
if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))
@@ -5634,10 +5685,7 @@ void LLVolumeFace::createBinormals()
allocateBinormals(mNumVertices);
//generate binormals
- LLStrider<LLVector3> pos;
- pos = (LLVector3*) mPositions;
- pos.setStride(16);
-
+ LLVector4a* pos = mPositions;
LLVector2* tc = (LLVector2*) mTexCoords;
LLVector4a* binorm = (LLVector4a*) mBinormals;
@@ -5690,12 +5738,12 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
if (num_verts)
{
- mPositions = (F32*) _mm_malloc(num_verts*16, 16);
- mNormals = (F32*) _mm_malloc(num_verts*16, 16);
+ mPositions = (LLVector4a*) _mm_malloc(num_verts*16, 16);
+ mNormals = (LLVector4a*) _mm_malloc(num_verts*16, 16);
//pad texture coordinate block end to allow for QWORD reads
S32 size = ((num_verts*8) + 0xF) & ~0xF;
- mTexCoords = (F32*) _mm_malloc(size, 16);
+ mTexCoords = (LLVector2*) _mm_malloc(size, 16);
}
else
{
@@ -5707,10 +5755,61 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
mNumVertices = num_verts;
}
+void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv)
+{
+ pushVertex(cv.getPosition(), cv.getNormal(), cv.mTexCoord);
+}
+
+void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc)
+{
+ S32 new_verts = mNumVertices+1;
+ S32 new_size = new_verts*16;
+
+ //positions
+ LLVector4a* dst = (LLVector4a*) _mm_malloc(new_size, 16);
+ if (mPositions)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, new_size/4);
+ _mm_free(mPositions);
+ }
+ mPositions = dst;
+
+ //normals
+ dst = (LLVector4a*) _mm_malloc(new_size, 16);
+ if (mNormals)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, new_size/4);
+ _mm_free(mNormals);
+ }
+ mNormals = dst;
+
+ //tex coords
+ new_size = ((new_verts*8)+0xF) & ~0xF;
+
+ {
+ LLVector2* dst = (LLVector2*) _mm_malloc(new_size, 16);
+ if (mTexCoords)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, new_size/4);
+ _mm_free(mTexCoords);
+ }
+ }
+
+ //just clear binormals
+ _mm_free(mBinormals);
+ mBinormals = NULL;
+
+ mPositions[mNumVertices] = pos;
+ mNormals[mNumVertices] = norm;
+ mTexCoords[mNumVertices] = tc;
+
+ mNumVertices++;
+}
+
void LLVolumeFace::allocateBinormals(S32 num_verts)
{
_mm_free(mBinormals);
- mBinormals = (F32*) _mm_malloc(num_verts*16, 16);
+ mBinormals = (LLVector4a*) _mm_malloc(num_verts*16, 16);
}
@@ -5733,6 +5832,23 @@ void LLVolumeFace::resizeIndices(S32 num_indices)
mNumIndices = num_indices;
}
+void LLVolumeFace::pushIndex(const U16& idx)
+{
+ S32 new_count = mNumIndices + 1;
+ S32 new_size = ((new_count*2)+0xF) & ~0xF;
+
+ S32 old_size = (mNumIndices+0xF) & ~0xF;
+ if (new_size != old_size)
+ {
+ U16* dst = (U16*) _mm_malloc(new_size, 16);
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, new_size/4);
+ _mm_free(mIndices);
+ mIndices = dst;
+ }
+
+ mIndices[mNumIndices++] = idx;
+}
+
void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx)
{
resizeVertices(v.size());
@@ -5740,14 +5856,9 @@ void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v,
for (U32 i = 0; i < v.size(); ++i)
{
- for (U32 j = 0; j < 3; ++j)
- {
- mPositions[i*4+j] = v[i].mPosition[j];
- mNormals[i*4+j] = v[i].mNormal[j];
- }
-
- mTexCoords[i*2+0] = v[i].mTexCoord.mV[0];
- mTexCoords[i*2+1] = v[i].mTexCoord.mV[1];
+ mPositions[i] = v[i].getPosition();
+ mNormals[i] = v[i].getNormal();
+ mTexCoords[i] = v[i].mTexCoord;
}
for (U32 i = 0; i < idx.size(); ++i)
@@ -5768,13 +5879,13 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
}
- F32* new_pos = (F32*) _mm_malloc(new_count*16, 16);
- F32* new_norm = (F32*) _mm_malloc(new_count*16, 16);
- F32* new_tc = (F32*) _mm_malloc((new_count*8+0xF) & ~0xF, 16);
+ LLVector4a* new_pos = (LLVector4a*) _mm_malloc(new_count*16, 16);
+ LLVector4a* new_norm = (LLVector4a*) _mm_malloc(new_count*16, 16);
+ LLVector2* new_tc = (LLVector2*) _mm_malloc((new_count*8+0xF) & ~0xF, 16);
- LLVector4a::memcpyNonAliased16(new_pos, mPositions, new_count*4);
- LLVector4a::memcpyNonAliased16(new_norm, mNormals, new_count*4);
- LLVector4a::memcpyNonAliased16(new_tc, mTexCoords, new_count*2);
+ LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, new_count*4);
+ LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, new_count*4);
+ LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, new_count*2);
_mm_free(mPositions);
_mm_free(mNormals);
@@ -6205,24 +6316,24 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
// Fills in dummy values if the triangle has degenerate texture coordinates.
void calc_binormal_from_triangle(LLVector4a& binormal,
- const LLVector3& pos0,
+ const LLVector4a& pos0,
const LLVector2& tex0,
- const LLVector3& pos1,
+ const LLVector4a& pos1,
const LLVector2& tex1,
- const LLVector3& pos2,
+ const LLVector4a& pos2,
const LLVector2& tex2)
{
- LLVector4a rx0; rx0.set( pos0.mV[VX], tex0.mV[VX], tex0.mV[VY] );
- LLVector4a rx1; rx1.set( pos1.mV[VX], tex1.mV[VX], tex1.mV[VY] );
- LLVector4a rx2; rx2.set( pos2.mV[VX], tex2.mV[VX], tex2.mV[VY] );
+ LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] );
+ LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] );
+ LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] );
- LLVector4a ry0; ry0.set( pos0.mV[VY], tex0.mV[VX], tex0.mV[VY] );
- LLVector4a ry1; ry1.set( pos1.mV[VY], tex1.mV[VX], tex1.mV[VY] );
- LLVector4a ry2; ry2.set( pos2.mV[VY], tex2.mV[VX], tex2.mV[VY] );
+ LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] );
+ LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] );
+ LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] );
- LLVector4a rz0; rz0.set( pos0.mV[VZ], tex0.mV[VX], tex0.mV[VY] );
- LLVector4a rz1; rz1.set( pos1.mV[VZ], tex1.mV[VX], tex1.mV[VY] );
- LLVector4a rz2; rz2.set( pos2.mV[VZ], tex2.mV[VX], tex2.mV[VY] );
+ LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] );
+ LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] );
+ LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] );
LLVector4a lhs, rhs;
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index 911db6f94b..aa58d6d114 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -40,7 +40,6 @@ class LLPathParams;
class LLVolumeParams;
class LLProfile;
class LLPath;
-class LLVector4a;
class LLVolumeFace;
class LLVolume;
@@ -56,6 +55,7 @@ class LLVolume;
#include "v4coloru.h"
#include "llrefcount.h"
#include "llfile.h"
+#include "llvector4a.h"
//============================================================================
@@ -794,15 +794,74 @@ class LLVolumeFace
public:
class VertexData
{
+ enum
+ {
+ POSITION = 0,
+ NORMAL = 1
+ };
+
+ private:
+ void init()
+ {
+ mData = (LLVector4a*) _mm_malloc(32, 16);
+ }
public:
- LLVector3 mPosition;
- LLVector3 mNormal;
- LLVector3 mBinormal;
+ VertexData()
+ {
+ init();
+ }
+
+ VertexData(const VertexData& rhs)
+ {
+ init();
+ LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8);
+ mTexCoord = rhs.mTexCoord;
+ }
+
+ ~VertexData()
+ {
+ _mm_free(mData);
+ }
+
+ LLVector4a& getPosition()
+ {
+ return mData[POSITION];
+ }
+
+ LLVector4a& getNormal()
+ {
+ return mData[NORMAL];
+ }
+
+ const LLVector4a& getPosition() const
+ {
+ return mData[POSITION];
+ }
+
+ const LLVector4a& getNormal() const
+ {
+ return mData[NORMAL];
+ }
+
+
+ void setPosition(const LLVector4a& pos)
+ {
+ mData[POSITION] = pos;
+ }
+
+ void setNormal(const LLVector4a& norm)
+ {
+ mData[NORMAL] = norm;
+ }
+
LLVector2 mTexCoord;
bool operator<(const VertexData& rhs) const;
bool operator==(const VertexData& rhs) const;
bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const;
+
+ private:
+ LLVector4a* mData;
};
LLVolumeFace() :
@@ -834,6 +893,13 @@ public:
void resizeIndices(S32 num_indices);
void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx);
+ void pushVertex(const VertexData& cv);
+ void pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc);
+ void pushIndex(const U16& idx);
+
+ void swapData(LLVolumeFace& rhs);
+
+ void getVertexData(U16 indx, LLVolumeFace::VertexData& cv);
class VertexMapData : public LLVolumeFace::VertexData
{
@@ -842,28 +908,20 @@ public:
bool operator==(const LLVolumeFace::VertexData& rhs) const
{
- return mPosition == rhs.mPosition &&
+ return getPosition().equal3(rhs.getPosition()) &&
mTexCoord == rhs.mTexCoord &&
- mNormal == rhs.mNormal;
+ getNormal().equal3(rhs.getNormal());
}
struct ComparePosition
{
- bool operator()(const LLVector3& a, const LLVector3& b) const
+ bool operator()(const LLVector4a& a, const LLVector4a& b) const
{
- if (a.mV[0] != b.mV[0])
- {
- return a.mV[0] < b.mV[0];
- }
- if (a.mV[1] != b.mV[1])
- {
- return a.mV[1] < b.mV[1];
- }
- return a.mV[2] < b.mV[2];
+ return a.less3(b);
}
};
- typedef std::map<LLVector3, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;
+ typedef std::map<LLVector4a, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;
};
void optimize(F32 angle_cutoff = 2.f);
@@ -899,10 +957,10 @@ public:
S32 mNumVertices;
S32 mNumIndices;
- F32* mPositions;
- F32* mNormals;
- F32* mBinormals;
- F32* mTexCoords;
+ LLVector4a* mPositions;
+ LLVector4a* mNormals;
+ LLVector4a* mBinormals;
+ LLVector2* mTexCoords;
U16* mIndices;
std::vector<S32> mEdge;
@@ -1059,14 +1117,18 @@ std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params);
void calc_binormal_from_triangle(
LLVector4a& binormal,
- const LLVector3& pos0,
+ const LLVector4a& pos0,
const LLVector2& tex0,
- const LLVector3& pos1,
+ const LLVector4a& pos1,
const LLVector2& tex1,
- const LLVector3& pos2,
+ const LLVector4a& pos2,
const LLVector2& tex2);
BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size);
+
+BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
+ F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided);
+
BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,
F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided);
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index b0a4c02a43..9b7cc04120 100644
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -6278,7 +6278,7 @@
<key>Type</key>
<string>Boolean</string>
<key>Value</key>
- <integer>1</integer>
+ <integer>0</integer>
</map>
<key>RenderDebugPipeline</key>
<map>
@@ -7735,7 +7735,7 @@
<key>Type</key>
<string>Boolean</string>
<key>Value</key>
- <integer>0</integer>
+ <integer>1</integer>
</map>
<key>RenderUseStreamVBO</key>
<map>
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 679875e6bd..db3c5cca33 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -1216,18 +1216,18 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
LLVector3 vf_binormal;
if (vf.mBinormals)
{
- vf_binormal.set(vf.mBinormals+i*4);
+ vf_binormal.setVec(vf.mBinormals[i].getF32());
}
LLVector3 vf_normal;
- vf_normal.set(vf.mNormals+i*4);
+ vf_normal.set(vf.mNormals[i].getF32());
LLVector3 vf_position;
- vf_position.set(vf.mPositions+i*4);
+ vf_position.set(vf.mPositions[i].getF32());
if (rebuild_tcoord)
{
- LLVector2 tc(vf.mTexCoords+i*2);
+ LLVector2 tc(vf.mTexCoords[i]);
if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
{
diff --git a/indra/newview/llfloaterimagepreview.cpp b/indra/newview/llfloaterimagepreview.cpp
index dae301ae29..28fe2a14b7 100644
--- a/indra/newview/llfloaterimagepreview.cpp
+++ b/indra/newview/llfloaterimagepreview.cpp
@@ -852,7 +852,6 @@ S8 LLImagePreviewSculpted::getType() const
void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
{
-#if 0 //VECTORIZE THIS
mCameraDistance = distance;
mCameraZoom = 1.f;
mCameraPitch = 0.f;
@@ -865,8 +864,8 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
}
const LLVolumeFace &vf = mVolume->getVolumeFace(0);
- U32 num_indices = vf.mIndices.size();
- U32 num_vertices = vf.mVertices.size();
+ U32 num_indices = vf.mNumIndices;
+ U32 num_vertices = vf.mNumVertices;
mVertexBuffer = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL, 0);
mVertexBuffer->allocateBuffer(num_vertices, num_indices, TRUE);
@@ -880,10 +879,16 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
mVertexBuffer->getIndexStrider(index_strider);
// build vertices and normals
+ LLStrider<LLVector3> pos;
+ pos = (LLVector3*) vf.mPositions; pos.setStride(16);
+ LLStrider<LLVector3> norm;
+ norm = (LLVector3*) vf.mNormals; norm.setStride(16);
+
+
for (U32 i = 0; i < num_vertices; i++)
{
- *(vertex_strider++) = vf.mVertices[i].mPosition;
- LLVector3 normal = vf.mVertices[i].mNormal;
+ *(vertex_strider++) = *pos++;
+ LLVector3 normal = *norm++;
normal.normalize();
*(normal_strider++) = normal;
}
@@ -893,7 +898,6 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
{
*(index_strider++) = vf.mIndices[i];
}
-#endif
}
@@ -903,7 +907,6 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
BOOL LLImagePreviewSculpted::render()
{
mNeedsUpdate = FALSE;
-#if 0 //VECTORIZE THIS
LLGLSUIDefault def;
LLGLDisable no_blend(GL_BLEND);
LLGLEnable cull(GL_CULL_FACE);
@@ -948,7 +951,7 @@ BOOL LLImagePreviewSculpted::render()
LLViewerCamera::getInstance()->setPerspective(FALSE, mOrigin.mX, mOrigin.mY, mFullWidth, mFullHeight, FALSE);
const LLVolumeFace &vf = mVolume->getVolumeFace(0);
- U32 num_indices = vf.mIndices.size();
+ U32 num_indices = vf.mNumIndices;
mVertexBuffer->setBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL);
@@ -961,7 +964,6 @@ BOOL LLImagePreviewSculpted::render()
mVertexBuffer->draw(LLRender::TRIANGLES, num_indices, 0);
gGL.popMatrix();
-#endif
return TRUE;
}
diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp
index c7ad0bde7e..3c5a4de7f8 100644
--- a/indra/newview/llhudicon.cpp
+++ b/indra/newview/llhudicon.cpp
@@ -39,6 +39,7 @@
#include "llviewerobject.h"
#include "lldrawable.h"
+#include "llvector4a.h"
#include "llviewercamera.h"
#include "llviewertexture.h"
#include "llviewerwindow.h"
@@ -266,26 +267,44 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
LLVector3 x_scale = image_aspect * (F32)gViewerWindow->getWindowHeightScaled() * mScale * scale_factor * x_pixel_vec;
LLVector3 y_scale = (F32)gViewerWindow->getWindowHeightScaled() * mScale * scale_factor * y_pixel_vec;
- LLVector3 lower_left = icon_position - (x_scale * 0.5f);
- LLVector3 lower_right = icon_position + (x_scale * 0.5f);
- LLVector3 upper_left = icon_position - (x_scale * 0.5f) + y_scale;
- LLVector3 upper_right = icon_position + (x_scale * 0.5f) + y_scale;
+ LLVector4a x_scalea;
+ LLVector4a icon_positiona;
+ LLVector4a y_scalea;
-#if 0 //VECTORIZE THIS
-
- F32 t = 0.f;
- LLVector3 dir = end-start;
+ x_scalea.load3(x_scale.mV);
+ x_scalea.mul(0.5f);
+ y_scalea.load3(y_scale.mV);
+
+ icon_positiona.load3(icon_position.mV);
- if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, start, dir, NULL, NULL, &t, FALSE) ||
- LLTriangleRayIntersect(upper_left, lower_left, lower_right, start, dir, NULL, NULL, &t, FALSE))
+ LLVector4a lower_left;
+ lower_left.setSub(icon_positiona, x_scalea);
+ LLVector4a lower_right;
+ lower_right.setAdd(icon_positiona, x_scalea);
+ LLVector4a upper_left;
+ upper_left.setAdd(lower_left, y_scalea);
+ LLVector4a upper_right;
+ upper_right.setAdd(lower_right, y_scalea);
+
+ F32 t = 0.f;
+ LLVector4a enda;
+ enda.load3(end.mV);
+ LLVector4a starta;
+ starta.load3(start.mV);
+ LLVector4a dir;
+ dir.setSub(enda, starta);
+
+ if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, starta, dir, NULL, NULL, &t, FALSE) ||
+ LLTriangleRayIntersect(upper_left, lower_left, lower_right, starta, dir, NULL, NULL, &t, FALSE))
{
if (intersection)
{
- *intersection = start + dir*t;
+ dir.mul(t);
+ starta.add(dir);
+ *intersection = LLVector3((F32*) &starta.mQ);
}
return TRUE;
}
-#endif
return FALSE;
}
diff --git a/indra/newview/llhudtext.cpp b/indra/newview/llhudtext.cpp
index 64c01d0937..9ed5d13831 100644
--- a/indra/newview/llhudtext.cpp
+++ b/indra/newview/llhudtext.cpp
@@ -206,8 +206,6 @@ BOOL LLHUDText::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
gGL.end();
}
-#if 0 //VECTORIZE THIS
-
LLVector3 dir = end-start;
F32 t = 0.f;
@@ -220,9 +218,6 @@ BOOL LLHUDText::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
return TRUE;
}
}
-
-#endif
-
}
return FALSE;
diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp
index 7eca276358..cef7c4abbb 100644
--- a/indra/newview/llviewercamera.cpp
+++ b/indra/newview/llviewercamera.cpp
@@ -38,6 +38,7 @@
// Viewer includes
#include "llagent.h"
#include "llagentcamera.h"
+#include "llmatrix4a.h"
#include "llviewercontrol.h"
#include "llviewerobjectlist.h"
#include "llviewerregion.h"
@@ -787,22 +788,29 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts)
LLMatrix4 render_mat(vo_volume->getRenderRotation(), LLVector4(vo_volume->getRenderPosition()));
+ LLMatrix4a render_mata;
+ render_mata.loadu(render_mat);
+ LLMatrix4a mata;
+ mata.loadu(mat);
+
num_faces = volume->getNumVolumeFaces();
- //VECTORIZE THIS
for (i = 0; i < num_faces; i++)
{
const LLVolumeFace& face = volume->getVolumeFace(i);
for (U32 v = 0; v < face.mNumVertices; v++)
{
- LLVector4 vec = LLVector4(face.mPositions+v*4) * mat;
+ const LLVector4a& src_vec = face.mPositions[v];
+ LLVector4a vec;
+ mata.affineTransform(src_vec, vec);
if (drawablep->isActive())
{
- vec = vec * render_mat;
+ LLVector4a t = vec;
+ render_mata.affineTransform(t, vec);
}
- BOOL in_frustum = pointInFrustum(LLVector3(vec)) > 0;
+ BOOL in_frustum = pointInFrustum(LLVector3(vec.getF32())) > 0;
if (( !in_frustum && all_verts) ||
(in_frustum && !all_verts))
diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp
index b40e6af496..d2842fd62c 100644
--- a/indra/newview/llvograss.cpp
+++ b/indra/newview/llvograss.cpp
@@ -594,9 +594,9 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
LLVector2 tc[4];
LLVector3 v[4];
- // LLVector3 n[4]; // unused!
+ LLVector3 n[4];
- // F32 closest_t = 1.f; // unused!
+ F32 closest_t = 1.f;
for (S32 i = 0; i < mNumBlades; i++)
{
@@ -640,8 +640,6 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
position.mV[2] += blade_height;
v[3] = v1 = position + mRegionp->getOriginAgent();
-#if 0 //VECTORIZE THIS
-
F32 a,b,t;
BOOL hit = FALSE;
@@ -704,7 +702,6 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
}
}
}
-#endif
}
return ret;
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index d57a535050..8022f81f19 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -1868,7 +1868,7 @@ LLVector3 LLVOVolume::getApproximateFaceNormal(U8 face_id)
const LLVolumeFace& face = volume->getVolumeFace(face_id);
for (S32 i = 0; i < (S32)face.mNumVertices; ++i)
{
- result.add(*((LLVector4a*) face.mNormals+i*4));
+ result.add(face.mNormals[i]);
}
LLVector3 ret((F32*) &result.mQ);