summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--indra/cmake/00-Common.cmake3
-rw-r--r--indra/llcommon/lldefs.h8
-rw-r--r--indra/llcommon/llmemory.cpp19
-rw-r--r--indra/llcommon/llmemory.h35
-rw-r--r--indra/llmath/CMakeLists.txt2
-rw-r--r--indra/llmath/llvolume.cpp1234
-rw-r--r--indra/llmath/llvolume.h152
-rw-r--r--indra/llmath/v3math.h15
-rw-r--r--indra/llrender/llimagegl.cpp4
-rw-r--r--indra/llrender/llvertexbuffer.cpp287
-rw-r--r--indra/llrender/llvertexbuffer.h15
-rw-r--r--indra/newview/llappviewer.cpp4
-rw-r--r--indra/newview/lldrawpoolavatar.cpp21
-rw-r--r--indra/newview/lldrawpoolbump.cpp6
-rw-r--r--indra/newview/llface.cpp63
-rw-r--r--indra/newview/llface.h2
-rw-r--r--indra/newview/llfloaterimagepreview.cpp18
-rw-r--r--indra/newview/llhudicon.cpp41
-rw-r--r--indra/newview/llinventorybridge.cpp20
-rw-r--r--indra/newview/llpanelobject.cpp17
-rw-r--r--indra/newview/llpolymesh.cpp34
-rw-r--r--indra/newview/llspatialpartition.cpp66
-rw-r--r--indra/newview/llspatialpartition.h4
-rw-r--r--indra/newview/llviewercamera.cpp17
-rw-r--r--indra/newview/llviewerjointmesh.cpp169
-rw-r--r--indra/newview/llviewerjointmesh_sse.cpp4
-rw-r--r--indra/newview/llviewerjointmesh_sse2.cpp4
-rw-r--r--indra/newview/llviewerregion.cpp3
-rw-r--r--indra/newview/llvograss.cpp3
-rw-r--r--indra/newview/llvosurfacepatch.cpp74
-rw-r--r--indra/newview/llvotextbubble.cpp33
-rw-r--r--indra/newview/llvovolume.cpp34
32 files changed, 1494 insertions, 917 deletions
diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake
index d9e1dd9afb..f0e2890448 100644
--- a/indra/cmake/00-Common.cmake
+++ b/indra/cmake/00-Common.cmake
@@ -172,7 +172,8 @@ if (LINUX)
if (WORD_SIZE EQUAL 32)
add_definitions(-march=pentium3)
endif (WORD_SIZE EQUAL 32)
- add_definitions(-mfpmath=sse -ftree-vectorize)
+ add_definitions(-mfpmath=sse)
+ #add_definitions(-ftree-vectorize) # THIS CRASHES GCC 3.1-3.2
if (NOT STANDALONE)
# this stops us requiring a really recent glibc at runtime
add_definitions(-fno-stack-protector)
diff --git a/indra/llcommon/lldefs.h b/indra/llcommon/lldefs.h
index f3b5ca361f..10e6cb34bf 100644
--- a/indra/llcommon/lldefs.h
+++ b/indra/llcommon/lldefs.h
@@ -242,5 +242,13 @@ inline LLDATATYPE llclampb(const LLDATATYPE& a)
return llmin(llmax(a, (LLDATATYPE)0), (LLDATATYPE)255);
}
+template <class LLDATATYPE>
+inline void llswap(LLDATATYPE& lhs, LLDATATYPE& rhs)
+{
+ LLDATATYPE tmp = lhs;
+ lhs = rhs;
+ rhs = tmp;
+}
+
#endif // LL_LLDEFS_H
diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp
index 2a8015e26d..5c6ca30cdd 100644
--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@@ -73,25 +73,6 @@ void LLMemory::freeReserve()
reserveMem = NULL;
}
-void* ll_allocate (size_t size)
-{
- if (size == 0)
- {
- llwarns << "Null allocation" << llendl;
- }
- void *p = malloc(size);
- if (p == NULL)
- {
- LLMemory::freeReserve();
- llerrs << "Out of memory Error" << llendl;
- }
- return p;
-}
-
-void ll_release (void *p)
-{
- free(p);
-}
//----------------------------------------------------------------------------
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 1c6f64dd8b..117268cfe7 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -32,14 +32,37 @@
#ifndef LLMEMORY_H
#define LLMEMORY_H
+#include <stdlib.h>
+inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free().
+{
+#if defined(LL_WINDOWS)
+ return _mm_malloc(size, 16);
+#elif defined(LL_DARWIN)
+ return malloc(size); // default osx malloc is 16 byte aligned.
+#else
+ void *rtn;
+ if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size)))
+ {
+ return rtn;
+ }
+ else // bad alignment requested, or out of memory
+ {
+ return NULL;
+ }
+#endif
+}
-extern S32 gTotalDAlloc;
-extern S32 gTotalDAUse;
-extern S32 gDACount;
-
-extern void* ll_allocate (size_t size);
-extern void ll_release (void *p);
+inline void ll_aligned_free_16(void *p)
+{
+#if defined(LL_WINDOWS)
+ _mm_free(p);
+#elif defined(LL_DARWIN)
+ return free(p);
+#else
+ free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
class LL_COMMON_API LLMemory
{
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt
index e93fe90650..367486eee7 100644
--- a/indra/llmath/CMakeLists.txt
+++ b/indra/llmath/CMakeLists.txt
@@ -62,6 +62,8 @@ set(llmath_HEADER_FILES
llv4matrix3.h
llv4matrix4.h
llv4vector3.h
+ llvector4a.h
+ llmatrix4a.h
llvolume.h
llvolumemgr.h
llsdutil_math.h
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 5ffc61ce9c..d8fbc081fa 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -1,4 +1,5 @@
/**
+
* @file llvolume.cpp
*
* $LicenseInfo:firstyear=2002&license=viewergpl$
@@ -43,10 +44,12 @@
#include "v4math.h"
#include "m4math.h"
#include "m3math.h"
+#include "llmatrix4a.h"
#include "lldarray.h"
#include "llvolume.h"
#include "llstl.h"
#include "llsdserialize.h"
+#include "llvector4a.h"
#define DEBUG_SILHOUETTE_BINORMALS 0
#define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette
@@ -87,8 +90,6 @@ const F32 SKEW_MAX = 0.95f;
const F32 SCULPT_MIN_AREA = 0.002f;
const S32 SCULPT_MIN_AREA_DETAIL = 1;
-#define GEN_TRI_STRIP 0
-
BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)
{
LLVector3 test = (pt2-pt1)%(pt3-pt2);
@@ -132,21 +133,25 @@ BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, con
// and returns the intersection point along dir in intersection_t.
// Moller-Trumbore algorithm
-BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
+BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,
F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided)
{
F32 u, v, t;
/* find vectors for two edges sharing vert0 */
- LLVector3 edge1 = vert1 - vert0;
+ LLVector4a edge1;
+ edge1.setSub(vert1, vert0);
- LLVector3 edge2 = vert2 - vert0;;
+
+ LLVector4a edge2;
+ edge2.setSub(vert2, vert0);
/* begin calculating determinant - also used to calculate U parameter */
- LLVector3 pvec = dir % edge2;
-
+ LLVector4a pvec;
+ pvec.setCross3(dir, edge2);
+
/* if determinant is near zero, ray lies in plane of triangle */
- F32 det = edge1 * pvec;
+ F32 det = edge1.dot3(pvec);
if (!two_sided)
{
@@ -156,10 +161,11 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons
}
/* calculate distance from vert0 to ray origin */
- LLVector3 tvec = orig - vert0;
+ LLVector4a tvec;
+ tvec.setSub(orig, vert0);
/* calculate U parameter and test bounds */
- u = tvec * pvec;
+ u = tvec.dot3(pvec);
if (u < 0.f || u > det)
{
@@ -167,17 +173,18 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons
}
/* prepare to test V parameter */
- LLVector3 qvec = tvec % edge1;
+ LLVector4a qvec;
+ qvec.setCross3(tvec, edge1);
/* calculate V parameter and test bounds */
- v = dir * qvec;
+ v = dir.dot3(qvec);
if (v < 0.f || u + v > det)
{
return FALSE;
}
/* calculate t, scale parameters, ray intersects triangle */
- t = edge2 * qvec;
+ t = edge2.dot3(qvec);
F32 inv_det = 1.0 / det;
t *= inv_det;
u *= inv_det;
@@ -193,20 +200,22 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons
F32 inv_det = 1.0 / det;
/* calculate distance from vert0 to ray origin */
- LLVector3 tvec = orig - vert0;
+ LLVector4a tvec;
+ tvec.setSub(orig, vert0);
/* calculate U parameter and test bounds */
- u = (tvec * pvec) * inv_det;
+ u = (tvec.dot3(pvec)) * inv_det;
if (u < 0.f || u > 1.f)
{
return FALSE;
}
/* prepare to test V parameter */
- LLVector3 qvec = tvec - edge1;
+ LLVector4a qvec;
+ qvec.setSub(tvec, edge1);
/* calculate V parameter and test bounds */
- v = (dir * qvec) * inv_det;
+ v = (dir.dot3(qvec)) * inv_det;
if (v < 0.f || u + v > 1.f)
{
@@ -214,7 +223,7 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons
}
/* calculate t, ray intersects triangle */
- t = (edge2 * qvec) * inv_det;
+ t = (edge2.dot3(qvec)) * inv_det;
}
if (intersection_a != NULL)
@@ -228,6 +237,21 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons
return TRUE;
}
+//helper for non-aligned vectors
+BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
+ F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided)
+{
+ LLVector4a vert0a, vert1a, vert2a, origa, dira;
+ vert0a.load3(vert0.mV);
+ vert1a.load3(vert1.mV);
+ vert2a.load3(vert2.mV);
+ origa.load3(orig.mV);
+ dira.load3(dir.mV);
+
+ return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira,
+ intersection_a, intersection_b, intersection_t, two_sided);
+}
+
//-------------------------------------------------------------------
// statics
@@ -1880,15 +1904,15 @@ bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)co
bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const
{
bool retval = false;
- if (rhs.mPosition == mPosition && rhs.mTexCoord == mTexCoord)
+ if (rhs.mData[POSITION].equal3(mData[POSITION]) && rhs.mTexCoord == mTexCoord)
{
if (angle_cutoff > 1.f)
{
- retval = (mNormal == rhs.mNormal);
+ retval = (mData[NORMAL].equal3(rhs.mData[NORMAL]));
}
else
{
- F32 cur_angle = rhs.mNormal*mNormal;
+ F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]);
retval = cur_angle > angle_cutoff;
}
}
@@ -1992,11 +2016,10 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
LLVolumeFace& face = mVolumeFaces[i];
- face.mHasBinormals = false;
-
//copy out indices
- face.mIndices.resize(idx.size()/2);
- if (idx.empty() || face.mIndices.size() < 3)
+ face.resizeIndices(idx.size()/2);
+
+ if (idx.empty() || face.mNumIndices < 3)
{ //why is there an empty index list?
llerrs <<"WTF?" << llendl;
continue;
@@ -2010,7 +2033,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
//copy out vertices
U32 num_verts = pos.size()/(3*2);
- face.mVertices.resize(num_verts);
+ face.resizeVertices(num_verts);
if (mdl[i].has("Weights"))
{
@@ -2059,7 +2082,6 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
LLVector3 max_pos;
LLVector2 min_tc;
LLVector2 max_tc;
-
min_pos.setValue(mdl[i]["PositionDomain"]["Min"]);
max_pos.setValue(mdl[i]["PositionDomain"]["Max"]);
@@ -2074,36 +2096,44 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
min = max = LLVector3(0,0,0);
+ F32* pos_out = (F32*) face.mPositions;
+ F32* norm_out = (F32*) face.mNormals;
+ F32* tc_out = (F32*) face.mTexCoords;
+
for (U32 j = 0; j < num_verts; ++j)
{
U16* v = (U16*) &(pos[j*3*2]);
- face.mVertices[j].mPosition.setVec(
- (F32) v[0] / 65535.f * pos_range.mV[0] + min_pos.mV[0],
- (F32) v[1] / 65535.f * pos_range.mV[1] + min_pos.mV[1],
- (F32) v[2] / 65535.f * pos_range.mV[2] + min_pos.mV[2]);
+ pos_out[0] = (F32) v[0] / 65535.f * pos_range.mV[0] + min_pos.mV[0];
+ pos_out[1] = (F32) v[1] / 65535.f * pos_range.mV[1] + min_pos.mV[1];
+ pos_out[2] = (F32) v[2] / 65535.f * pos_range.mV[2] + min_pos.mV[2];
+
if (j == 0)
{
- min = max = face.mVertices[j].mPosition;
+ min = max = LLVector3(pos_out);
}
else
{
- update_min_max(min,max,face.mVertices[j].mPosition);
+ update_min_max(min,max,pos_out);
}
+ pos_out += 4;
+
U16* n = (U16*) &(norm[j*3*2]);
- face.mVertices[j].mNormal.setVec(
- (F32) n[0] / 65535.f * 2.f - 1.f,
- (F32) n[1] / 65535.f * 2.f - 1.f,
- (F32) n[2] / 65535.f * 2.f - 1.f);
+
+ norm_out[0] = (F32) n[0] / 65535.f * 2.f - 1.f;
+ norm_out[1] = (F32) n[1] / 65535.f * 2.f - 1.f;
+ norm_out[2] = (F32) n[2] / 65535.f * 2.f - 1.f;
+ norm_out += 4;
U16* t = (U16*) &(tc[j*2*2]);
- face.mVertices[j].mTexCoord.setVec(
- (F32) t[0] / 65535.f * tc_range.mV[0] + min_tc.mV[0],
- (F32) t[1] / 65535.f * tc_range.mV[1] + min_tc.mV[1]);
+ tc_out[0] = (F32) t[0] / 65535.f * tc_range.mV[0] + min_tc.mV[0];
+ tc_out[1] = (F32) t[1] / 65535.f * tc_range.mV[1] + min_tc.mV[1];
+
+ tc_out += 8;
}
@@ -2133,24 +2163,29 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
if (do_reflect_x)
{
- for (S32 i = 0; i < face.mVertices.size(); i++)
+ LLVector4a* p = (LLVector4a*) face.mPositions;
+ LLVector4a* n = (LLVector4a*) face.mNormals;
+
+ for (S32 i = 0; i < face.mNumVertices; i++)
{
- face.mVertices[i].mPosition.mV[VX] *= -1.0f;
- face.mVertices[i].mNormal.mV[VX] *= -1.0f;
+ p[i].mul(-1.0f);
+ n[i].mul(-1.0f);
}
}
if (do_invert_normals)
{
- for (S32 i = 0; i < face.mVertices.size(); i++)
+ LLVector4a* n = (LLVector4a*) face.mNormals;
+
+ for (S32 i = 0; i < face.mNumVertices; i++)
{
- face.mVertices[i].mNormal *= -1.0f;
+ n[i].mul(-1.0f);
}
}
if (do_reverse_triangles)
{
- for (U32 j = 0; j < face.mIndices.size(); j += 3)
+ for (U32 j = 0; j < face.mNumIndices; j += 3)
{
// swap the 2nd and 3rd index
S32 swap = face.mIndices[j+1];
@@ -2168,13 +2203,15 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
void tetrahedron_set_normal(LLVolumeFace::VertexData* cv)
{
- LLVector3 nrm = (cv[1].mPosition-cv[0].mPosition)%(cv[2].mPosition-cv[0].mPosition);
-
- nrm.normVec();
-
- cv[0].mNormal = nrm;
- cv[1].mNormal = nrm;
- cv[2].mNormal = nrm;
+ LLVector4a v0;
+ v0.setSub(cv[1].getPosition(), cv[0].getNormal());
+ LLVector4a v1;
+ v1.setSub(cv[2].getNormal(), cv[0].getPosition());
+
+ cv[0].getNormal().setCross3(v0,v1);
+ cv[0].getNormal().normalize3fast();
+ cv[1].setNormal(cv[0].getNormal());
+ cv[2].setNormal(cv[1].getNormal());
}
BOOL LLVolume::isTetrahedron()
@@ -2189,12 +2226,12 @@ void LLVolume::makeTetrahedron()
LLVolumeFace face;
F32 x = 0.25f;
- LLVector3 p[] =
+ LLVector4a p[] =
{ //unit tetrahedron corners
- LLVector3(x,x,x),
- LLVector3(-x,-x,x),
- LLVector3(-x,x,-x),
- LLVector3(x,-x,-x)
+ LLVector4a(x,x,x),
+ LLVector4a(-x,-x,x),
+ LLVector4a(-x,x,-x),
+ LLVector4a(x,-x,-x)
};
face.mExtents[0].setVec(-x,-x,-x);
@@ -2209,53 +2246,105 @@ void LLVolume::makeTetrahedron()
//side 1
- cv[0].mPosition = p[1];
- cv[1].mPosition = p[0];
- cv[2].mPosition = p[2];
+ cv[0].setPosition(p[1]);
+ cv[1].setPosition(p[0]);
+ cv[2].setPosition(p[2]);
tetrahedron_set_normal(cv);
- face.mVertices.push_back(cv[0]);
- face.mVertices.push_back(cv[1]);
- face.mVertices.push_back(cv[2]);
+ face.resizeVertices(12);
+ face.resizeIndices(12);
+
+ LLVector4a* v = (LLVector4a*) face.mPositions;
+ LLVector4a* n = (LLVector4a*) face.mNormals;
+ LLVector2* tc = (LLVector2*) face.mTexCoords;
+
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
+ v += 3;
+
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
+ n += 3;
+
+ tc[0] = cv[0].mTexCoord;
+ tc[1] = cv[1].mTexCoord;
+ tc[2] = cv[2].mTexCoord;
+ tc += 3;
+
//side 2
- cv[0].mPosition = p[3];
- cv[1].mPosition = p[0];
- cv[2].mPosition = p[1];
+ cv[0].setPosition(p[3]);
+ cv[1].setPosition(p[0]);
+ cv[2].setPosition(p[1]);
tetrahedron_set_normal(cv);
- face.mVertices.push_back(cv[0]);
- face.mVertices.push_back(cv[1]);
- face.mVertices.push_back(cv[2]);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
+ v += 3;
+
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
+ n += 3;
+
+ tc[0] = cv[0].mTexCoord;
+ tc[1] = cv[1].mTexCoord;
+ tc[2] = cv[2].mTexCoord;
+ tc += 3;
//side 3
- cv[0].mPosition = p[3];
- cv[1].mPosition = p[1];
- cv[2].mPosition = p[2];
+ cv[0].setPosition(p[3]);
+ cv[1].setPosition(p[1]);
+ cv[2].setPosition(p[2]);
tetrahedron_set_normal(cv);
- face.mVertices.push_back(cv[0]);
- face.mVertices.push_back(cv[1]);
- face.mVertices.push_back(cv[2]);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
+ v += 3;
+
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
+ n += 3;
+
+ tc[0] = cv[0].mTexCoord;
+ tc[1] = cv[1].mTexCoord;
+ tc[2] = cv[2].mTexCoord;
+ tc += 3;
//side 4
- cv[0].mPosition = p[2];
- cv[1].mPosition = p[0];
- cv[2].mPosition = p[3];
+ cv[0].setPosition(p[2]);
+ cv[1].setPosition(p[0]);
+ cv[2].setPosition(p[3]);
tetrahedron_set_normal(cv);
- face.mVertices.push_back(cv[0]);
- face.mVertices.push_back(cv[1]);
- face.mVertices.push_back(cv[2]);
+ v[0] = cv[0].getPosition();
+ v[1] = cv[1].getPosition();
+ v[2] = cv[2].getPosition();
+ v += 3;
+
+ n[0] = cv[0].getNormal();
+ n[1] = cv[1].getNormal();
+ n[2] = cv[2].getNormal();
+ n += 3;
+
+ tc[0] = cv[0].mTexCoord;
+ tc[1] = cv[1].mTexCoord;
+ tc[2] = cv[2].mTexCoord;
+ tc += 3;
//set index buffer
- for (U32 i = 0; i < 12; i++)
+ for (U16 i = 0; i < 12; i++)
{
- face.mIndices.push_back(i);
+ face.mIndices[i] = i;
}
mVolumeFaces.push_back(face);
@@ -3831,7 +3920,7 @@ S32 LLVolume::getNumTriangles() const
for (S32 i = 0; i < getNumVolumeFaces(); ++i)
{
- triangle_count += getVolumeFace(i).mIndices.size()/3;
+ triangle_count += getVolumeFace(i).mNumIndices/3;
}
return triangle_count;
@@ -3844,13 +3933,22 @@ S32 LLVolume::getNumTriangles() const
void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
std::vector<LLVector3> &normals,
std::vector<S32> &segments,
- const LLVector3& obj_cam_vec,
- const LLMatrix4& mat,
- const LLMatrix3& norm_mat,
+ const LLVector3& obj_cam_vec_in,
+ const LLMatrix4& mat_in,
+ const LLMatrix3& norm_mat_in,
S32 face_mask)
{
LLMemType m1(LLMemType::MTYPE_VOLUME);
+ LLMatrix4a mat;
+ mat.loadu(mat_in);
+
+ LLMatrix4a norm_mat;
+ norm_mat.loadu(norm_mat_in);
+
+ LLVector4a obj_cam_vec;
+ obj_cam_vec.load3(obj_cam_vec_in.mV);
+
vertices.clear();
normals.clear();
segments.clear();
@@ -3868,7 +3966,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
LLVolumeFace& face = *iter;
if (!(face_mask & (0x1 << cur_index++)) ||
- face.mIndices.empty() || face.mEdge.empty())
+ face.mNumIndices == 0 || face.mEdge.empty())
{
continue;
}
@@ -3885,7 +3983,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
#if DEBUG_SILHOUETTE_EDGE_MAP
//for each triangle
- U32 count = face.mIndices.size();
+ U32 count = face.mNumIndices;
for (U32 j = 0; j < count/3; j++) {
//get vertices
S32 v1 = face.mIndices[j*3+0];
@@ -3893,9 +3991,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
S32 v3 = face.mIndices[j*3+2];
//get current face center
- LLVector3 cCenter = (face.mVertices[v1].mPosition +
- face.mVertices[v2].mPosition +
- face.mVertices[v3].mPosition) / 3.0f;
+ LLVector3 cCenter = (face.mVertices[v1].getPosition() +
+ face.mVertices[v2].getPosition() +
+ face.mVertices[v3].getPosition()) / 3.0f;
//for each edge
for (S32 k = 0; k < 3; k++) {
@@ -3913,9 +4011,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
v3 = face.mIndices[nIndex*3+2];
//get neighbor face center
- LLVector3 nCenter = (face.mVertices[v1].mPosition +
- face.mVertices[v2].mPosition +
- face.mVertices[v3].mPosition) / 3.0f;
+ LLVector3 nCenter = (face.mVertices[v1].getPosition() +
+ face.mVertices[v2].getPosition() +
+ face.mVertices[v3].getPosition()) / 3.0f;
//draw line
vertices.push_back(cCenter);
@@ -3938,15 +4036,15 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
#elif DEBUG_SILHOUETTE_NORMALS
//for each vertex
- for (U32 j = 0; j < face.mVertices.size(); j++) {
- vertices.push_back(face.mVertices[j].mPosition);
- vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mNormal*0.1f);
+ for (U32 j = 0; j < face.mNumVertices; j++) {
+ vertices.push_back(face.mVertices[j].getPosition());
+ vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].getNormal()*0.1f);
normals.push_back(LLVector3(0,0,1));
normals.push_back(LLVector3(0,0,1));
segments.push_back(vertices.size());
#if DEBUG_SILHOUETTE_BINORMALS
- vertices.push_back(face.mVertices[j].mPosition);
- vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mBinormal*0.1f);
+ vertices.push_back(face.mVertices[j].getPosition());
+ vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f);
normals.push_back(LLVector3(0,0,1));
normals.push_back(LLVector3(0,0,1));
segments.push_back(vertices.size());
@@ -3964,26 +4062,36 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
//for each triangle
std::vector<U8> fFacing;
- vector_append(fFacing, face.mIndices.size()/3);
- for (U32 j = 0; j < face.mIndices.size()/3; j++)
+ vector_append(fFacing, face.mNumIndices/3);
+
+ LLVector4a* v = (LLVector4a*) face.mPositions;
+ LLVector4a* n = (LLVector4a*) face.mNormals;
+
+ for (U32 j = 0; j < face.mNumIndices/3; j++)
{
//approximate normal
S32 v1 = face.mIndices[j*3+0];
S32 v2 = face.mIndices[j*3+1];
S32 v3 = face.mIndices[j*3+2];
- LLVector3 norm = (face.mVertices[v1].mPosition - face.mVertices[v2].mPosition) %
- (face.mVertices[v2].mPosition - face.mVertices[v3].mPosition);
-
- if (norm.magVecSquared() < 0.00000001f)
+ LLVector4a c1,c2;
+ c1.setSub(v[v1], v[v2]);
+ c2.setSub(v[v2], v[v3]);
+
+ LLVector4a norm;
+
+ norm.setCross3(c1, c2);
+
+ if (norm.dot3(norm) < 0.00000001f)
{
fFacing[j] = AWAY | TOWARDS;
}
else
{
//get view vector
- LLVector3 view = (obj_cam_vec-face.mVertices[v1].mPosition);
- bool away = view * norm > 0.0f;
+ LLVector4a view;
+ view.setSub(obj_cam_vec, v[v1]);
+ bool away = view.dot3(norm) > 0.0f;
if (away)
{
fFacing[j] = AWAY;
@@ -3996,7 +4104,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
}
//for each triangle
- for (U32 j = 0; j < face.mIndices.size()/3; j++)
+ for (U32 j = 0; j < face.mNumIndices/3; j++)
{
if (fFacing[j] == (AWAY | TOWARDS))
{ //this is a degenerate triangle
@@ -4029,15 +4137,21 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
S32 v1 = face.mIndices[j*3+k];
S32 v2 = face.mIndices[j*3+((k+1)%3)];
- vertices.push_back(face.mVertices[v1].mPosition*mat);
- LLVector3 norm1 = face.mVertices[v1].mNormal * norm_mat;
- norm1.normVec();
- normals.push_back(norm1);
+ LLVector4a t;
+ mat.affineTransform(v[v1], t);
+ vertices.push_back(LLVector3(t[0], t[1], t[2]));
- vertices.push_back(face.mVertices[v2].mPosition*mat);
- LLVector3 norm2 = face.mVertices[v2].mNormal * norm_mat;
- norm2.normVec();
- normals.push_back(norm2);
+ norm_mat.rotate(n[v1], t);
+
+ t.normalize3fast();
+ normals.push_back(LLVector3(t[0], t[1], t[2]));
+
+ mat.affineTransform(v[v2], t);
+ vertices.push_back(LLVector3(t[0], t[1], t[2]));
+
+ norm_mat.rotate(n[v2], t);
+ t.normalize3fast();
+ normals.push_back(LLVector3(t[0], t[1], t[2]));
segments.push_back(vertices.size());
}
@@ -4076,7 +4190,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,
for (S32 i = start_face; i <= end_face; i++)
{
- const LLVolumeFace &face = getVolumeFace((U32)i);
+ LLVolumeFace &face = mVolumeFaces[i];
LLVector3 box_center = (face.mExtents[0] + face.mExtents[1]) / 2.f;
LLVector3 box_size = face.mExtents[1] - face.mExtents[0];
@@ -4088,7 +4202,14 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,
genBinormals(i);
}
- for (U32 tri = 0; tri < face.mIndices.size()/3; tri++)
+ LLVector4a starta, dira;
+
+ starta.load3(start.mV);
+ dira.load3(dir.mV);
+
+ LLVector4a* p = (LLVector4a*) face.mPositions;
+
+ for (U32 tri = 0; tri < face.mNumIndices/3; tri++)
{
S32 index1 = face.mIndices[tri*3+0];
S32 index2 = face.mIndices[tri*3+1];
@@ -4096,15 +4217,15 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,
F32 a, b, t;
- if (LLTriangleRayIntersect(face.mVertices[index1].mPosition,
- face.mVertices[index2].mPosition,
- face.mVertices[index3].mPosition,
- start, dir, &a, &b, &t, FALSE))
+ if (LLTriangleRayIntersect(p[index1],
+ p[index2],
+ p[index3],
+ starta, dira, &a, &b, &t, FALSE))
{
if ((t >= 0.f) && // if hit is after start
(t <= 1.f) && // and before end
(t < closest_t)) // and this hit is closer
- {
+ {
closest_t = t;
hit_face = i;
@@ -4112,27 +4233,32 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,
{
*intersection = start + dir * closest_t;
}
-
+
+
if (tex_coord != NULL)
- {
- *tex_coord = ((1.f - a - b) * face.mVertices[index1].mTexCoord +
- a * face.mVertices[index2].mTexCoord +
- b * face.mVertices[index3].mTexCoord);
+ {
+ LLVector2* tc = (LLVector2*) face.mTexCoords;
+ *tex_coord = ((1.f - a - b) * tc[index1] +
+ a * tc[index2] +
+ b * tc[index3]);
}
if (normal != NULL)
- {
- *normal = ((1.f - a - b) * face.mVertices[index1].mNormal +
- a * face.mVertices[index2].mNormal +
- b * face.mVertices[index3].mNormal);
+ {
+ LLVector4* norm = (LLVector4*) face.mNormals;
+
+ *normal = ((1.f - a - b) * LLVector3(norm[index1]) +
+ a * LLVector3(norm[index2]) +
+ b * LLVector3(norm[index3]));
}
if (bi_normal != NULL)
- {
- *bi_normal = ((1.f - a - b) * face.mVertices[index1].mBinormal +
- a * face.mVertices[index2].mBinormal +
- b * face.mVertices[index3].mBinormal);
+ {
+ LLVector4* binormal = (LLVector4*) face.mBinormals;
+ *bi_normal = ((1.f - a - b) * LLVector3(binormal[index1]) +
+ a * LLVector3(binormal[index2]) +
+ b * LLVector3(binormal[index3]));
}
}
@@ -4903,6 +5029,14 @@ std::ostream& operator<<(std::ostream &s, const LLVolume *volumep)
return s;
}
+LLVolumeFace::~LLVolumeFace()
+{
+ _mm_free(mPositions);
+ _mm_free(mNormals);
+ _mm_free(mTexCoords);
+ _mm_free(mIndices);
+ _mm_free(mBinormals);
+}
BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)
{
@@ -4921,6 +5055,13 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)
}
}
+void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv)
+{
+ cv.setPosition(mPositions[index]);
+ cv.setNormal(mNormals[index]);
+ cv.mTexCoord = mTexCoords[index];
+}
+
void LLVolumeFace::optimize(F32 angle_cutoff)
{
LLVolumeFace new_face;
@@ -4928,14 +5069,15 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
VertexMapData::PointMap point_map;
//remove redundant vertices
- for (U32 i = 0; i < mIndices.size(); ++i)
+ for (U32 i = 0; i < mNumIndices; ++i)
{
U16 index = mIndices[i];
- LLVolumeFace::VertexData cv = mVertices[index];
-
+ LLVolumeFace::VertexData cv;
+ getVertexData(index, cv);
+
BOOL found = FALSE;
- VertexMapData::PointMap::iterator point_iter = point_map.find(cv.mPosition);
+ VertexMapData::PointMap::iterator point_iter = point_map.find(cv.getPosition());
if (point_iter != point_map.end())
{ //duplicate point might exist
for (U32 j = 0; j < point_iter->second.size(); ++j)
@@ -4944,7 +5086,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
if (tv.compareNormal(cv, angle_cutoff))
{
found = TRUE;
- new_face.mIndices.push_back((point_iter->second)[j].mIndex);
+ new_face.pushIndex((point_iter->second)[j].mIndex);
break;
}
}
@@ -4952,14 +5094,14 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
if (!found)
{
- new_face.mVertices.push_back(cv);
- U16 index = (U16) new_face.mVertices.size()-1;
- new_face.mIndices.push_back(index);
+ new_face.pushVertex(cv);
+ U16 index = (U16) new_face.mNumVertices-1;
+ new_face.pushIndex(index);
VertexMapData d;
- d.mPosition = cv.mPosition;
+ d.setPosition(cv.getPosition());
d.mTexCoord = cv.mTexCoord;
- d.mNormal = cv.mNormal;
+ d.setNormal(cv.getNormal());
d.mIndex = index;
if (point_iter != point_map.end())
{
@@ -4967,13 +5109,23 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
}
else
{
- point_map[d.mPosition].push_back(d);
+ point_map[d.getPosition()].push_back(d);
}
}
}
- mVertices = new_face.mVertices;
- mIndices = new_face.mIndices;
+ swapData(new_face);
+}
+
+void LLVolumeFace::swapData(LLVolumeFace& rhs)
+{
+ llswap(rhs.mPositions, mPositions);
+ llswap(rhs.mNormals, mNormals);
+ llswap(rhs.mBinormals, mBinormals);
+ llswap(rhs.mTexCoords, mTexCoords);
+ llswap(rhs.mIndices,mIndices);
+ llswap(rhs.mNumVertices, mNumVertices);
+ llswap(rhs.mNumIndices, mNumIndices);
}
void LerpPlanarVertex(LLVolumeFace::VertexData& v0,
@@ -4983,10 +5135,21 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0,
F32 coef01,
F32 coef02)
{
- vout.mPosition = v0.mPosition + ((v1.mPosition-v0.mPosition)*coef01)+((v2.mPosition-v0.mPosition)*coef02);
+
+ LLVector4a lhs;
+ lhs.setSub(v1.getPosition(), v0.getPosition());
+ lhs.mul(coef01);
+ LLVector4a rhs;
+ rhs.setSub(v2.getPosition(), v0.getPosition());
+ rhs.mul(coef02);
+
+ rhs.add(lhs);
+ rhs.add(v0.getPosition());
+
+ vout.setPosition(rhs);
+
vout.mTexCoord = v0.mTexCoord + ((v1.mTexCoord-v0.mTexCoord)*coef01)+((v2.mTexCoord-v0.mTexCoord)*coef02);
- vout.mNormal = v0.mNormal;
- vout.mBinormal = v0.mBinormal;
+ vout.setNormal(v0.getNormal());
}
BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
@@ -5018,16 +5181,22 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
VertexData corners[4];
VertexData baseVert;
for(int t = 0; t < 4; t++){
- corners[t].mPosition = mesh[offset + (grid_size*t)].mPos;
+ corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f;
corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1];
}
- baseVert.mNormal =
- ((corners[1].mPosition-corners[0].mPosition) %
- (corners[2].mPosition-corners[1].mPosition));
- baseVert.mNormal.normVec();
+
+ {
+ LLVector4a lhs;
+ lhs.setSub(corners[1].getPosition(), corners[0].getPosition());
+ LLVector4a rhs;
+ rhs.setSub(corners[2].getPosition(), corners[1].getPosition());
+ baseVert.getNormal().setCross3(lhs, rhs);
+ baseVert.getNormal().normalize3fast();
+ }
+
if(!(mTypeMask & TOP_MASK)){
- baseVert.mNormal *= -1.0f;
+ baseVert.getNormal().mul(-1.0f);
}else{
//Swap the UVs on the U(X) axis for top face
LLVector2 swap;
@@ -5038,22 +5207,23 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
corners[1].mTexCoord=corners[2].mTexCoord;
corners[2].mTexCoord=swap;
}
- baseVert.mBinormal = calc_binormal_from_triangle(
- corners[0].mPosition, corners[0].mTexCoord,
- corners[1].mPosition, corners[1].mTexCoord,
- corners[2].mPosition, corners[2].mTexCoord);
- for(int t = 0; t < 4; t++){
- corners[t].mBinormal = baseVert.mBinormal;
- corners[t].mNormal = baseVert.mNormal;
- }
- mHasBinormals = TRUE;
- if (partial_build)
- {
- mVertices.clear();
- }
+ LLVector4a binormal;
+
+ calc_binormal_from_triangle( binormal,
+ corners[0].getPosition(), corners[0].mTexCoord,
+ corners[1].getPosition(), corners[1].mTexCoord,
+ corners[2].getPosition(), corners[2].mTexCoord);
+
+ S32 size = (grid_size+1)*(grid_size+1);
+ resizeVertices(size);
+ allocateBinormals(size);
+
+ LLVector4a* pos = (LLVector4a*) mPositions;
+ LLVector4a* norm = (LLVector4a*) mNormals;
+ LLVector4a* binorm = (LLVector4a*) mBinormals;
+ LLVector2* tc = (LLVector2*) mTexCoords;
- S32 vtop = mVertices.size();
for(int gx = 0;gx<grid_size+1;gx++){
for(int gy = 0;gy<grid_size+1;gy++){
VertexData newVert;
@@ -5064,15 +5234,19 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
newVert,
(F32)gx/(F32)grid_size,
(F32)gy/(F32)grid_size);
- mVertices.push_back(newVert);
+
+ *pos++ = newVert.getPosition();
+ *norm++ = baseVert.getNormal();
+ *tc++ = newVert.mTexCoord;
+ *binorm++ = binormal;
if (gx == 0 && gy == 0)
{
- min = max = newVert.mPosition;
+ min = max = LLVector3(newVert.getPosition().getF32());
}
else
{
- update_min_max(min,max,newVert.mPosition);
+ update_min_max(min,max,newVert.getPosition().getF32());
}
}
}
@@ -5081,9 +5255,10 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
if (!partial_build)
{
-#if GEN_TRI_STRIP
- mTriStrip.clear();
-#endif
+ resizeIndices(grid_size*grid_size*6);
+
+ U16* out = mIndices;
+
S32 idxs[] = {0,1,(grid_size+1)+1,(grid_size+1)+1,(grid_size+1),0};
for(S32 gx = 0;gx<grid_size;gx++)
{
@@ -5094,61 +5269,19 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
{
for(S32 i=5;i>=0;i--)
{
- mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]);
- }
-
-#if GEN_TRI_STRIP
- if (gy == 0)
- {
- mTriStrip.push_back((gx+1)*(grid_size+1));
- mTriStrip.push_back((gx+1)*(grid_size+1));
- mTriStrip.push_back(gx*(grid_size+1));
- }
-
- mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1));
- mTriStrip.push_back(gy+1+gx*(grid_size+1));
-
-
- if (gy == grid_size-1)
- {
- mTriStrip.push_back(gy+1+gx*(grid_size+1));
- }
-#endif
+ *out++ = ((gy*(grid_size+1))+gx+idxs[i]);
+ }
}
else
{
for(S32 i=0;i<6;i++)
{
- mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]);
+ *out++ = ((gy*(grid_size+1))+gx+idxs[i]);
}
-
-#if GEN_TRI_STRIP
- if (gy == 0)
- {
- mTriStrip.push_back(gx*(grid_size+1));
- mTriStrip.push_back(gx*(grid_size+1));
- mTriStrip.push_back((gx+1)*(grid_size+1));
- }
-
- mTriStrip.push_back(gy+1+gx*(grid_size+1));
- mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1));
-
- if (gy == grid_size-1)
- {
- mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1));
- }
-#endif
}
}
}
-
-#if GEN_TRI_STRIP
- if (mTriStrip.size()%2 == 1)
- {
- mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]);
- }
-#endif
}
return TRUE;
@@ -5178,11 +5311,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
num_vertices = profile.size();
num_indices = (profile.size() - 2)*3;
- mVertices.resize(num_vertices);
+ if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))
+ {
+ resizeVertices(num_vertices+1);
+ allocateBinormals(num_vertices+1);
- if (!partial_build)
+ if (!partial_build)
+ {
+ resizeIndices(num_indices+3);
+ }
+ }
+ else
{
- mIndices.resize(num_indices);
+ resizeVertices(num_vertices);
+ allocateBinormals(num_vertices);
+
+ if (!partial_build)
+ {
+ resizeIndices(num_indices);
+ }
}
S32 max_s = volume->getProfile().getTotal();
@@ -5209,79 +5356,88 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
LLVector3& min = mExtents[0];
LLVector3& max = mExtents[1];
+ LLVector2* tc = (LLVector2*) mTexCoords;
+ LLVector4a* pos = (LLVector4a*) mPositions;
+ LLVector4a* norm = (LLVector4a*) mNormals;
+ LLVector4a* binorm = (LLVector4a*) mBinormals;
+
// Copy the vertices into the array
for (S32 i = 0; i < num_vertices; i++)
{
if (mTypeMask & TOP_MASK)
{
- mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f;
- mVertices[i].mTexCoord.mV[1] = profile[i].mV[1]+0.5f;
+ tc[i].mV[0] = profile[i].mV[0]+0.5f;
+ tc[i].mV[1] = profile[i].mV[1]+0.5f;
}
else
{
// Mirror for underside.
- mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f;
- mVertices[i].mTexCoord.mV[1] = 0.5f - profile[i].mV[1];
+ tc[i].mV[0] = profile[i].mV[0]+0.5f;
+ tc[i].mV[1] = 0.5f - profile[i].mV[1];
}
- mVertices[i].mPosition = mesh[i + offset].mPos;
+ pos[i].load3(mesh[i + offset].mPos.mV);
if (i == 0)
{
- min = max = mVertices[i].mPosition;
- min_uv = max_uv = mVertices[i].mTexCoord;
+ min = max = mesh[i+offset].mPos;
+ min_uv = max_uv = tc[i];
}
else
{
- update_min_max(min,max, mVertices[i].mPosition);
- update_min_max(min_uv, max_uv, mVertices[i].mTexCoord);
+ update_min_max(min,max, mesh[i+offset].mPos);
+ update_min_max(min_uv, max_uv, tc[i]);
}
}
mCenter = (min+max)*0.5f;
cuv = (min_uv + max_uv)*0.5f;
- LLVector3 binormal = calc_binormal_from_triangle(
- mCenter, cuv,
- mVertices[0].mPosition, mVertices[0].mTexCoord,
- mVertices[1].mPosition, mVertices[1].mTexCoord);
- binormal.normVec();
+ LLVector4a center;
+ center.load3(mCenter.mV);
+
+ LLVector4a binormal;
+ calc_binormal_from_triangle(binormal,
+ center, cuv,
+ pos[0], tc[0],
+ pos[1], tc[1]);
+ binormal.normalize3fast();
+
+ LLVector4a normal;
+ LLVector4a d0, d1;
+
- LLVector3 d0;
- LLVector3 d1;
- LLVector3 normal;
+ d0.setSub(center, pos[0]);
+ d1.setSub(center, pos[1]);
- d0 = mCenter-mVertices[0].mPosition;
- d1 = mCenter-mVertices[1].mPosition;
+ if (mTypeMask & TOP_MASK)
+ {
+ normal.setCross3(d0, d1);
+ }
+ else
+ {
+ normal.setCross3(d1, d0);
+ }
- normal = (mTypeMask & TOP_MASK) ? (d0%d1) : (d1%d0);
- normal.normVec();
+ normal.normalize3fast();
VertexData vd;
- vd.mPosition = mCenter;
- vd.mNormal = normal;
- vd.mBinormal = binormal;
+ vd.getPosition().load3(mCenter.mV);
vd.mTexCoord = cuv;
if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))
{
- mVertices.push_back(vd);
+ pos[num_vertices].load4a((F32*) &center.mQ);
+ tc[num_vertices] = cuv;
num_vertices++;
- if (!partial_build)
- {
- vector_append(mIndices, 3);
- }
}
-
for (S32 i = 0; i < num_vertices; i++)
{
- mVertices[i].mBinormal = binormal;
- mVertices[i].mNormal = normal;
+ binorm[i].load4a((F32*) &binormal.mQ);
+ norm[i].load4a((F32*) &normal.mQ);
}
- mHasBinormals = TRUE;
-
if (partial_build)
{
return TRUE;
@@ -5389,8 +5545,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
pt2--;
}
}
-
- makeTriStrip();
}
else
{
@@ -5495,8 +5649,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
pt2--;
}
}
-
- makeTriStrip();
}
}
else
@@ -5518,167 +5670,276 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
mIndices[3*i+v2] = i + 1;
}
-#if GEN_TRI_STRIP
- //make tri strip
- if (mTypeMask & OPEN_MASK)
- {
- makeTriStrip();
- }
- else
- {
- S32 j = num_vertices-2;
- if (mTypeMask & TOP_MASK)
- {
- mTriStrip.push_back(0);
- for (S32 i = 0; i <= j; ++i)
- {
- mTriStrip.push_back(i);
- if (i != j)
- {
- mTriStrip.push_back(j);
- }
- --j;
- }
- }
- else
- {
- mTriStrip.push_back(j);
- for (S32 i = 0; i <= j; ++i)
- {
- if (i != j)
- {
- mTriStrip.push_back(j);
- }
- mTriStrip.push_back(i);
- --j;
- }
- }
-
- mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]);
- if (mTriStrip.size()%2 == 1)
- {
- mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]);
- }
- }
-#endif
}
return TRUE;
}
-void LLVolumeFace::makeTriStrip()
-{
-#if GEN_TRI_STRIP
- for (U32 i = 0; i < mIndices.size(); i+=3)
- {
- U16 i0 = mIndices[i];
- U16 i1 = mIndices[i+1];
- U16 i2 = mIndices[i+2];
-
- if ((i/3)%2 == 1)
- {
- mTriStrip.push_back(i0);
- mTriStrip.push_back(i0);
- mTriStrip.push_back(i1);
- mTriStrip.push_back(i2);
- mTriStrip.push_back(i2);
- }
- else
- {
- mTriStrip.push_back(i2);
- mTriStrip.push_back(i2);
- mTriStrip.push_back(i1);
- mTriStrip.push_back(i0);
- mTriStrip.push_back(i0);
- }
- }
-
- if (mTriStrip.size()%2 == 1)
- {
- mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]);
- }
-#endif
-}
-
void LLVolumeFace::createBinormals()
{
LLMemType m1(LLMemType::MTYPE_VOLUME);
- if (!mHasBinormals)
+ if (!mBinormals)
{
+ allocateBinormals(mNumVertices);
+
//generate binormals
- for (U32 i = 0; i < mIndices.size()/3; i++)
+ LLVector4a* pos = mPositions;
+ LLVector2* tc = (LLVector2*) mTexCoords;
+ LLVector4a* binorm = (LLVector4a*) mBinormals;
+
+ for (U32 i = 0; i < mNumIndices/3; i++)
{ //for each triangle
- const VertexData& v0 = mVertices[mIndices[i*3+0]];
- const VertexData& v1 = mVertices[mIndices[i*3+1]];
- const VertexData& v2 = mVertices[mIndices[i*3+2]];
+ const U16& i0 = mIndices[i*3+0];
+ const U16& i1 = mIndices[i*3+1];
+ const U16& i2 = mIndices[i*3+2];
//calculate binormal
- LLVector3 binorm = calc_binormal_from_triangle(v0.mPosition, v0.mTexCoord,
- v1.mPosition, v1.mTexCoord,
- v2.mPosition, v2.mTexCoord);
+ LLVector4a binormal;
+ calc_binormal_from_triangle(binormal,
+ pos[i0], tc[i0],
+ pos[i1], tc[i1],
+ pos[i2], tc[i2]);
- for (U32 j = 0; j < 3; j++)
- { //add triangle normal to vertices
- mVertices[mIndices[i*3+j]].mBinormal += binorm; // * (weight_sum - d[j])/weight_sum;
- }
+
+ //add triangle normal to vertices
+ binorm[i0].add(binormal);
+ binorm[i1].add(binormal);
+ binorm[i2].add(binormal);
//even out quad contributions
if (i % 2 == 0)
{
- mVertices[mIndices[i*3+2]].mBinormal += binorm;
+ binorm[i2].add(binormal);
}
else
{
- mVertices[mIndices[i*3+1]].mBinormal += binorm;
+ binorm[i1].add(binormal);
}
}
//normalize binormals
- for (U32 i = 0; i < mVertices.size(); i++)
+ for (U32 i = 0; i < mNumVertices; i++)
{
- mVertices[i].mBinormal.normVec();
- mVertices[i].mNormal.normVec();
+ binorm[i].normalize3fast();
}
+ }
+}
+
+void LLVolumeFace::resizeVertices(S32 num_verts)
+{
+ _mm_free(mPositions);
+ _mm_free(mNormals);
+ _mm_free(mBinormals);
+ _mm_free(mTexCoords);
- mHasBinormals = TRUE;
+ mBinormals = NULL;
+
+ if (num_verts)
+ {
+ mPositions = (LLVector4a*) _mm_malloc(num_verts*16, 16);
+ mNormals = (LLVector4a*) _mm_malloc(num_verts*16, 16);
+
+ //pad texture coordinate block end to allow for QWORD reads
+ S32 size = ((num_verts*8) + 0xF) & ~0xF;
+ mTexCoords = (LLVector2*) _mm_malloc(size, 16);
}
+ else
+ {
+ mPositions = NULL;
+ mNormals = NULL;
+ mTexCoords = NULL;
+ }
+
+ mNumVertices = num_verts;
+}
+
+void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv)
+{
+ pushVertex(cv.getPosition(), cv.getNormal(), cv.mTexCoord);
}
-void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat, LLMatrix4& norm_mat)
+void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc)
{
- U16 offset = mVertices.size();
+ S32 new_verts = mNumVertices+1;
+ S32 new_size = new_verts*16;
+
+ //positions
+ LLVector4a* dst = (LLVector4a*) _mm_malloc(new_size, 16);
+ if (mPositions)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, new_size/4);
+ _mm_free(mPositions);
+ }
+ mPositions = dst;
- if (face.mVertices.size() + mVertices.size() > 65536)
+ //normals
+ dst = (LLVector4a*) _mm_malloc(new_size, 16);
+ if (mNormals)
{
- llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl;
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, new_size/4);
+ _mm_free(mNormals);
+ }
+ mNormals = dst;
+
+ //tex coords
+ new_size = ((new_verts*8)+0xF) & ~0xF;
+
+ {
+ LLVector2* dst = (LLVector2*) _mm_malloc(new_size, 16);
+ if (mTexCoords)
+ {
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, new_size/4);
+ _mm_free(mTexCoords);
+ }
+ }
+
+ //just clear binormals
+ _mm_free(mBinormals);
+ mBinormals = NULL;
+
+ mPositions[mNumVertices] = pos;
+ mNormals[mNumVertices] = norm;
+ mTexCoords[mNumVertices] = tc;
+
+ mNumVertices++;
+}
+
+void LLVolumeFace::allocateBinormals(S32 num_verts)
+{
+ _mm_free(mBinormals);
+ mBinormals = (LLVector4a*) _mm_malloc(num_verts*16, 16);
+}
+
+
+void LLVolumeFace::resizeIndices(S32 num_indices)
+{
+ _mm_free(mIndices);
+
+ if (num_indices)
+ {
+ //pad index block end to allow for QWORD reads
+ S32 size = ((num_indices*2) + 0xF) & ~0xF;
+
+ mIndices = (U16*) _mm_malloc(size,16);
+ }
+ else
+ {
+ mIndices = NULL;
+ }
+
+ mNumIndices = num_indices;
+}
+
+void LLVolumeFace::pushIndex(const U16& idx)
+{
+ S32 new_count = mNumIndices + 1;
+ S32 new_size = ((new_count*2)+0xF) & ~0xF;
+
+ S32 old_size = (mNumIndices+0xF) & ~0xF;
+ if (new_size != old_size)
+ {
+ U16* dst = (U16*) _mm_malloc(new_size, 16);
+ LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, new_size/4);
+ _mm_free(mIndices);
+ mIndices = dst;
}
- for (U32 i = 0; i < face.mVertices.size(); ++i)
+ mIndices[mNumIndices++] = idx;
+}
+
+void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx)
+{
+ resizeVertices(v.size());
+ resizeIndices(idx.size());
+
+ for (U32 i = 0; i < v.size(); ++i)
{
- VertexData v = face.mVertices[i];
- v.mPosition = v.mPosition*mat;
- v.mNormal = v.mNormal * norm_mat;
+ mPositions[i] = v[i].getPosition();
+ mNormals[i] = v[i].getNormal();
+ mTexCoords[i] = v[i].mTexCoord;
+ }
+
+ for (U32 i = 0; i < idx.size(); ++i)
+ {
+ mIndices[i] = idx[i];
+ }
+}
+
+void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMatrix4& norm_mat_in)
+{
+ U16 offset = mNumVertices;
+
+ S32 new_count = face.mNumVertices + mNumVertices;
+
+ if (new_count > 65536)
+ {
+ llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl;
+ }
+
+
+ LLVector4a* new_pos = (LLVector4a*) _mm_malloc(new_count*16, 16);
+ LLVector4a* new_norm = (LLVector4a*) _mm_malloc(new_count*16, 16);
+ LLVector2* new_tc = (LLVector2*) _mm_malloc((new_count*8+0xF) & ~0xF, 16);
+
+ LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, new_count*4);
+ LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, new_count*4);
+ LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, new_count*2);
+
+ _mm_free(mPositions);
+ _mm_free(mNormals);
+ _mm_free(mTexCoords);
+
+ mPositions = new_pos;
+ mNormals = new_norm;
+ mTexCoords = new_tc;
+
+ mNumVertices = new_count;
- v.mNormal.normalize();
+ LLVector4a* dst_pos = (LLVector4a*) mPositions+offset;
+ LLVector2* dst_tc = (LLVector2*) mTexCoords+offset;
+ LLVector4a* dst_norm = (LLVector4a*) mNormals+offset;
- mVertices.push_back(v);
+ LLVector4a* src_pos = (LLVector4a*) face.mPositions;
+ LLVector2* src_tc = (LLVector2*) face.mTexCoords;
+ LLVector4a* src_norm = (LLVector4a*) face.mNormals;
+
+ LLMatrix4a mat, norm_mat;
+ mat.loadu(mat_in);
+ norm_mat.loadu(norm_mat_in);
+
+ for (U32 i = 0; i < face.mNumVertices; ++i)
+ {
+ mat.affineTransform(src_pos[i], dst_pos[i]);
+ norm_mat.rotate(src_norm[i], dst_norm[i]);
+ dst_norm[i].normalize3fast();
+
+ dst_tc[i] = src_tc[i];
if (offset == 0 && i == 0)
{
- mExtents[0] = mExtents[1] = v.mPosition;
+ mExtents[0] = mExtents[1] = LLVector3((F32*) &(dst_pos[i].mQ));
}
else
{
- update_min_max(mExtents[0], mExtents[1], v.mPosition);
+ update_min_max(mExtents[0], mExtents[1], (F32*) &(dst_pos[i].mQ));
}
}
-
- for (U32 i = 0; i < face.mIndices.size(); ++i)
+
+ new_count = mNumIndices + face.mNumIndices;
+ U16* new_indices = (U16*) _mm_malloc((new_count*2+0xF) & ~0xF, 16);
+ LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, new_count/2);
+ _mm_free(mIndices);
+ mIndices = new_indices;
+ mNumIndices = new_count;
+
+ U16* dst_idx = mIndices+offset;
+
+ for (U32 i = 0; i < face.mNumIndices; ++i)
{
- mIndices.push_back(face.mIndices[i]+offset);
+ dst_idx[i] = face.mIndices[i]+offset;
}
}
@@ -5708,21 +5969,20 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
num_vertices = mNumS*mNumT;
num_indices = (mNumS-1)*(mNumT-1)*6;
- mVertices.resize(num_vertices);
-
if (!partial_build)
{
- mIndices.resize(num_indices);
+ resizeVertices(num_vertices);
+ resizeIndices(num_indices);
if ((volume->getParams().getSculptType() & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_MESH)
{
mEdge.resize(num_indices);
}
}
- else
- {
- mHasBinormals = FALSE;
- }
+
+ LLVector4a* pos = (LLVector4a*) mPositions;
+ LLVector4a* norm = (LLVector4a*) mNormals;
+ LLVector2* tc = (LLVector2*) mTexCoords;
S32 begin_stex = llfloor( profile[mBeginS].mV[2] );
S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS;
@@ -5774,21 +6034,21 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
i = mBeginS + s + max_s*t;
}
- mVertices[cur_vertex].mPosition = mesh[i].mPos;
- mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt);
+ pos[cur_vertex].load3(mesh[i].mPos.mV);
+ tc[cur_vertex] = LLVector2(ss,tt);
- mVertices[cur_vertex].mNormal = LLVector3(0,0,0);
- mVertices[cur_vertex].mBinormal = LLVector3(0,0,0);
+ norm[cur_vertex].clear();
cur_vertex++;
if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0)
{
- mVertices[cur_vertex].mPosition = mesh[i].mPos;
- mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt);
+
+ pos[cur_vertex].load3(mesh[i].mPos.mV);
+ tc[cur_vertex] = LLVector2(ss,tt);
- mVertices[cur_vertex].mNormal = LLVector3(0,0,0);
- mVertices[cur_vertex].mBinormal = LLVector3(0,0,0);
+ norm[cur_vertex].clear();
+
cur_vertex++;
}
}
@@ -5806,12 +6066,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
i = mBeginS + s + max_s*t;
ss = profile[mBeginS + s].mV[2] - begin_stex;
- mVertices[cur_vertex].mPosition = mesh[i].mPos;
- mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt);
-
- mVertices[cur_vertex].mNormal = LLVector3(0,0,0);
- mVertices[cur_vertex].mBinormal = LLVector3(0,0,0);
-
+ pos[cur_vertex].load3(mesh[i].mPos.mV);
+ tc[cur_vertex] = LLVector2(ss,tt);
+ norm[cur_vertex].clear();
+
cur_vertex++;
}
}
@@ -5822,10 +6080,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
LLVector3& face_max = mExtents[1];
mCenter.clearVec();
- face_min = face_max = mVertices[0].mPosition;
- for (U32 i = 1; i < mVertices.size(); ++i)
+ face_min = face_max = LLVector3((F32*) &(pos[0].mQ));
+
+ for (U32 i = 1; i < mNumVertices; ++i)
{
- update_min_max(face_min, face_max, mVertices[i].mPosition);
+ update_min_max(face_min, face_max, (F32*) &(pos[i].mQ));
}
mCenter = (face_min + face_max) * 0.5f;
@@ -5836,18 +6095,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
if (!partial_build)
{
-#if GEN_TRI_STRIP
- mTriStrip.clear();
-#endif
-
// Now we generate the indices.
for (t = 0; t < (mNumT-1); t++)
{
-#if GEN_TRI_STRIP
- //prepend terminating index to strip
- mTriStrip.push_back(mNumS*t);
-#endif
-
for (s = 0; s < (mNumS-1); s++)
{
mIndices[cur_index++] = s + mNumS*t; //bottom left
@@ -5857,16 +6107,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
mIndices[cur_index++] = s+1 + mNumS*t; //bottom right
mIndices[cur_index++] = s+1 + mNumS*(t+1); //top right
-#if GEN_TRI_STRIP
- if (s == 0)
- {
- mTriStrip.push_back(s+mNumS*t);
- mTriStrip.push_back(s+mNumS*(t+1));
- }
- mTriStrip.push_back(s+1+mNumS*t);
- mTriStrip.push_back(s+1+mNumS*(t+1));
-#endif
-
mEdge[cur_edge++] = (mNumS-1)*2*t+s*2+1; //bottom left/top right neighbor face
if (t < mNumT-2) { //top right/top left neighbor face
mEdge[cur_edge++] = (mNumS-1)*2*(t+1)+s*2+1;
@@ -5907,52 +6147,55 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
}
mEdge[cur_edge++] = (mNumS-1)*2*t+s*2; //top right/bottom left neighbor face
}
-#if GEN_TRI_STRIP
- //append terminating vertex to strip
- mTriStrip.push_back(mNumS-1+mNumS*(t+1));
-#endif
}
-
-#if GEN_TRI_STRIP
- if (mTriStrip.size()%2 == 1)
- {
- mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]);
- }
-#endif
}
//generate normals
- for (U32 i = 0; i < mIndices.size()/3; i++) //for each triangle
+ for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle
{
const U16* idx = &(mIndices[i*3]);
-
- VertexData* v[] =
- { &mVertices[idx[0]], &mVertices[idx[1]], &mVertices[idx[2]] };
-
- //calculate triangle normal
- LLVector3 norm = (v[0]->mPosition-v[1]->mPosition) % (v[0]->mPosition-v[2]->mPosition);
+
- v[0]->mNormal += norm;
- v[1]->mNormal += norm;
- v[2]->mNormal += norm;
+ LLVector4a* v[] =
+ { pos+idx[0], pos+idx[1], pos+idx[2] };
+
+ LLVector4a* n[] =
+ { norm+idx[0], norm+idx[1], norm+idx[2] };
+
+ //calculate triangle normal
+ LLVector4a a, b, c;
+
+ a.setSub(*v[0], *v[1]);
+ b.setSub(*v[0], *v[2]);
+ c.setCross3(a,b);
+ n[0]->add(c);
+ n[1]->add(c);
+ n[2]->add(c);
+
//even out quad contributions
- v[i%2+1]->mNormal += norm;
+ n[i%2+1]->add(c);
}
// adjust normals based on wrapping and stitching
- BOOL s_bottom_converges = ((mVertices[0].mPosition - mVertices[mNumS*(mNumT-2)].mPosition).magVecSquared() < 0.000001f);
- BOOL s_top_converges = ((mVertices[mNumS-1].mPosition - mVertices[mNumS*(mNumT-2)+mNumS-1].mPosition).magVecSquared() < 0.000001f);
+ LLVector4a top;
+ top.setSub(pos[0], pos[mNumS*(mNumT-2)]);
+ BOOL s_bottom_converges = (top.dot3(top) < 0.000001f);
+
+ top.setSub(pos[mNumS-1], pos[mNumS*(mNumT-2)+mNumS-1]);
+ BOOL s_top_converges = (top.dot3(top) < 0.000001f);
+
if (sculpt_stitching == LL_SCULPT_TYPE_NONE) // logic for non-sculpt volumes
{
if (volume->getPath().isOpen() == FALSE)
{ //wrap normals on T
for (S32 i = 0; i < mNumS; i++)
{
- LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal;
- mVertices[i].mNormal = norm;
- mVertices[mNumS*(mNumT-1)+i].mNormal = norm;
+ LLVector4a n;
+ n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]);
+ norm[i] = n;
+ norm[mNumS*(mNumT-1)+i] = n;
}
}
@@ -5960,9 +6203,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
{ //wrap normals on S
for (S32 i = 0; i < mNumT; i++)
{
- LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal;
- mVertices[mNumS * i].mNormal = norm;
- mVertices[mNumS * i+mNumS-1].mNormal = norm;
+ LLVector4a n;
+ n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]);
+ norm[mNumS * i] = n;
+ norm[mNumS * i+mNumS-1] = n;
}
}
@@ -5973,7 +6217,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
{ //all lower S have same normal
for (S32 i = 0; i < mNumT; i++)
{
- mVertices[mNumS*i].mNormal = LLVector3(1,0,0);
+ norm[mNumS*i].set(1,0,0);
}
}
@@ -5981,7 +6225,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
{ //all upper S have same normal
for (S32 i = 0; i < mNumT; i++)
{
- mVertices[mNumS*i+mNumS-1].mNormal = LLVector3(-1,0,0);
+ norm[mNumS*i+mNumS-1].set(-1,0,0);
}
}
}
@@ -6009,30 +6253,33 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
{
// average normals for north pole
- LLVector3 average(0.0, 0.0, 0.0);
+ LLVector4a average;
+ average.clear();
+
for (S32 i = 0; i < mNumS; i++)
{
- average += mVertices[i].mNormal;
+ average.add(norm[i]);
}
// set average
for (S32 i = 0; i < mNumS; i++)
{
- mVertices[i].mNormal = average;
+ norm[i] = average;
}
// average normals for south pole
- average = LLVector3(0.0, 0.0, 0.0);
+ average.clear();
+
for (S32 i = 0; i < mNumS; i++)
{
- average += mVertices[i + mNumS * (mNumT - 1)].mNormal;
+ average.add(norm[i + mNumS * (mNumT - 1)]);
}
// set average
for (S32 i = 0; i < mNumS; i++)
{
- mVertices[i + mNumS * (mNumT - 1)].mNormal = average;
+ norm[i + mNumS * (mNumT - 1)] = average;
}
}
@@ -6042,23 +6289,22 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
{
for (S32 i = 0; i < mNumT; i++)
{
- LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal;
- mVertices[mNumS * i].mNormal = norm;
- mVertices[mNumS * i+mNumS-1].mNormal = norm;
+ LLVector4a n;
+ n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]);
+ norm[mNumS * i] = n;
+ norm[mNumS * i+mNumS-1] = n;
}
}
-
-
if (wrap_t)
{
for (S32 i = 0; i < mNumS; i++)
{
- LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal;
- mVertices[i].mNormal = norm;
- mVertices[mNumS*(mNumT-1)+i].mNormal = norm;
+ LLVector4a n;
+ n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]);
+ norm[i] = n;
+ norm[mNumS*(mNumT-1)+i] = n;
}
-
}
}
@@ -6068,41 +6314,51 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
// Finds binormal based on three vertices with texture coordinates.
// Fills in dummy values if the triangle has degenerate texture coordinates.
-LLVector3 calc_binormal_from_triangle(
- const LLVector3& pos0,
+void calc_binormal_from_triangle(LLVector4a& binormal,
+
+ const LLVector4a& pos0,
const LLVector2& tex0,
- const LLVector3& pos1,
+ const LLVector4a& pos1,
const LLVector2& tex1,
- const LLVector3& pos2,
+ const LLVector4a& pos2,
const LLVector2& tex2)
{
- LLVector3 rx0( pos0.mV[VX], tex0.mV[VX], tex0.mV[VY] );
- LLVector3 rx1( pos1.mV[VX], tex1.mV[VX], tex1.mV[VY] );
- LLVector3 rx2( pos2.mV[VX], tex2.mV[VX], tex2.mV[VY] );
+ LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] );
+ LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] );
+ LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] );
- LLVector3 ry0( pos0.mV[VY], tex0.mV[VX], tex0.mV[VY] );
- LLVector3 ry1( pos1.mV[VY], tex1.mV[VX], tex1.mV[VY] );
- LLVector3 ry2( pos2.mV[VY], tex2.mV[VX], tex2.mV[VY] );
+ LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] );
+ LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] );
+ LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] );
- LLVector3 rz0( pos0.mV[VZ], tex0.mV[VX], tex0.mV[VY] );
- LLVector3 rz1( pos1.mV[VZ], tex1.mV[VX], tex1.mV[VY] );
- LLVector3 rz2( pos2.mV[VZ], tex2.mV[VX], tex2.mV[VY] );
+ LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] );
+ LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] );
+ LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] );
- LLVector3 r0 = (rx0 - rx1) % (rx0 - rx2);
- LLVector3 r1 = (ry0 - ry1) % (ry0 - ry2);
- LLVector3 r2 = (rz0 - rz1) % (rz0 - rz2);
+ LLVector4a lhs, rhs;
+
+ LLVector4a r0;
+ lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2);
+ r0.setCross3(lhs, rhs);
+
+ LLVector4a r1;
+ lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2);
+ r1.setCross3(lhs, rhs);
+
+ LLVector4a r2;
+ lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2);
+ r2.setCross3(lhs, rhs);
- if( r0.mV[VX] && r1.mV[VX] && r2.mV[VX] )
+ if( r0[VX] && r1[VX] && r2[VX] )
{
- LLVector3 binormal(
- -r0.mV[VZ] / r0.mV[VX],
- -r1.mV[VZ] / r1.mV[VX],
- -r2.mV[VZ] / r2.mV[VX]);
+ binormal.set(
+ -r0[VZ] / r0[VX],
+ -r1[VZ] / r1[VX],
+ -r2[VZ] / r2[VX]);
// binormal.normVec();
- return binormal;
}
else
{
- return LLVector3( 0, 1 , 0 );
+ binormal.set( 0, 1 , 0 );
}
}
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index c6a156ae37..aa58d6d114 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -55,6 +55,7 @@ class LLVolume;
#include "v4coloru.h"
#include "llrefcount.h"
#include "llfile.h"
+#include "llvector4a.h"
//============================================================================
@@ -791,35 +792,114 @@ public:
class LLVolumeFace
{
public:
+ class VertexData
+ {
+ enum
+ {
+ POSITION = 0,
+ NORMAL = 1
+ };
+
+ private:
+ void init()
+ {
+ mData = (LLVector4a*) _mm_malloc(32, 16);
+ }
+ public:
+ VertexData()
+ {
+ init();
+ }
+
+ VertexData(const VertexData& rhs)
+ {
+ init();
+ LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8);
+ mTexCoord = rhs.mTexCoord;
+ }
+
+ ~VertexData()
+ {
+ _mm_free(mData);
+ }
+
+ LLVector4a& getPosition()
+ {
+ return mData[POSITION];
+ }
+
+ LLVector4a& getNormal()
+ {
+ return mData[NORMAL];
+ }
+
+ const LLVector4a& getPosition() const
+ {
+ return mData[POSITION];
+ }
+
+ const LLVector4a& getNormal() const
+ {
+ return mData[NORMAL];
+ }
+
+
+ void setPosition(const LLVector4a& pos)
+ {
+ mData[POSITION] = pos;
+ }
+
+ void setNormal(const LLVector4a& norm)
+ {
+ mData[NORMAL] = norm;
+ }
+
+ LLVector2 mTexCoord;
+
+ bool operator<(const VertexData& rhs) const;
+ bool operator==(const VertexData& rhs) const;
+ bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const;
+
+ private:
+ LLVector4a* mData;
+ };
+
LLVolumeFace() :
mID(0),
mTypeMask(0),
- mHasBinormals(FALSE),
mBeginS(0),
mBeginT(0),
mNumS(0),
- mNumT(0)
+ mNumT(0),
+ mNumVertices(0),
+ mNumIndices(0),
+ mPositions(NULL),
+ mNormals(NULL),
+ mBinormals(NULL),
+ mTexCoords(NULL),
+ mIndices(NULL)
{
}
+ ~LLVolumeFace();
+
BOOL create(LLVolume* volume, BOOL partial_build = FALSE);
void createBinormals();
- void makeTriStrip();
void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform);
- class VertexData
- {
- public:
- LLVector3 mPosition;
- LLVector3 mNormal;
- LLVector3 mBinormal;
- LLVector2 mTexCoord;
+ void resizeVertices(S32 num_verts);
+ void allocateBinormals(S32 num_verts);
+ void resizeIndices(S32 num_indices);
+ void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx);
- bool operator<(const VertexData& rhs) const;
- bool operator==(const VertexData& rhs) const;
- bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const;
- };
+ void pushVertex(const VertexData& cv);
+ void pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc);
+ void pushIndex(const U16& idx);
+
+ void swapData(LLVolumeFace& rhs);
+
+ void getVertexData(U16 indx, LLVolumeFace::VertexData& cv);
class VertexMapData : public LLVolumeFace::VertexData
{
@@ -828,28 +908,20 @@ public:
bool operator==(const LLVolumeFace::VertexData& rhs) const
{
- return mPosition == rhs.mPosition &&
+ return getPosition().equal3(rhs.getPosition()) &&
mTexCoord == rhs.mTexCoord &&
- mNormal == rhs.mNormal;
+ getNormal().equal3(rhs.getNormal());
}
struct ComparePosition
{
- bool operator()(const LLVector3& a, const LLVector3& b) const
+ bool operator()(const LLVector4a& a, const LLVector4a& b) const
{
- if (a.mV[0] != b.mV[0])
- {
- return a.mV[0] < b.mV[0];
- }
- if (a.mV[1] != b.mV[1])
- {
- return a.mV[1] < b.mV[1];
- }
- return a.mV[2] < b.mV[2];
+ return a.less3(b);
}
};
- typedef std::map<LLVector3, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;
+ typedef std::map<LLVector4a, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;
};
void optimize(F32 angle_cutoff = 2.f);
@@ -873,7 +945,6 @@ public:
S32 mID;
U32 mTypeMask;
LLVector3 mCenter;
- BOOL mHasBinormals;
// Only used for INNER/OUTER faces
S32 mBeginS;
@@ -883,9 +954,15 @@ public:
LLVector3 mExtents[2]; //minimum and maximum point of face
- std::vector<VertexData> mVertices;
- std::vector<U16> mIndices;
- std::vector<U16> mTriStrip;
+ S32 mNumVertices;
+ S32 mNumIndices;
+
+ LLVector4a* mPositions;
+ LLVector4a* mNormals;
+ LLVector4a* mBinormals;
+ LLVector2* mTexCoords;
+ U16* mIndices;
+
std::vector<S32> mEdge;
//list of skin weights for rigged volumes
@@ -1038,17 +1115,22 @@ public:
std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params);
-LLVector3 calc_binormal_from_triangle(
- const LLVector3& pos0,
+void calc_binormal_from_triangle(
+ LLVector4a& binormal,
+ const LLVector4a& pos0,
const LLVector2& tex0,
- const LLVector3& pos1,
+ const LLVector4a& pos1,
const LLVector2& tex1,
- const LLVector3& pos2,
+ const LLVector4a& pos2,
const LLVector2& tex2);
BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size);
+
BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided);
+
+BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,
+ F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided);
diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h
index 76dd938887..0e7d72e958 100644
--- a/indra/llmath/v3math.h
+++ b/indra/llmath/v3math.h
@@ -532,6 +532,21 @@ inline void update_min_max(LLVector3& min, LLVector3& max, const LLVector3& pos)
}
}
+inline void update_min_max(LLVector3& min, LLVector3& max, const F32* pos)
+{
+ for (U32 i = 0; i < 3; i++)
+ {
+ if (min.mV[i] > pos[i])
+ {
+ min.mV[i] = pos[i];
+ }
+ if (max.mV[i] < pos[i])
+ {
+ max.mV[i] = pos[i];
+ }
+ }
+}
+
inline F32 angle_between(const LLVector3& a, const LLVector3& b)
{
LLVector3 an = a;
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 2f02ccf30b..2579bad0b6 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -1813,7 +1813,7 @@ BOOL LLImageGL::getMask(const LLVector2 &tc)
{
LL_WARNS_ONCE("render") << "Ugh, non-finite u/v in mask pick" << LL_ENDL;
u = v = 0.f;
- llassert(false);
+ //llassert(false);
}
if (LL_UNLIKELY(u < 0.f || u > 1.f ||
@@ -1821,7 +1821,7 @@ BOOL LLImageGL::getMask(const LLVector2 &tc)
{
LL_WARNS_ONCE("render") << "Ugh, u/v out of range in image mask pick" << LL_ENDL;
u = v = 0.f;
- llassert(false);
+ //llassert(false);
}
S32 x = llfloor(u * mPickMaskWidth);
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 7fa47cd171..7f14a8d5ac 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -39,6 +39,7 @@
#include "llglheaders.h"
#include "llmemtype.h"
#include "llrender.h"
+#include "llvector4a.h"
//============================================================================
@@ -66,6 +67,27 @@ S32 LLVertexBuffer::sWeight4Loc = -1;
std::vector<U32> LLVertexBuffer::sDeleteList;
+#define LL_ALIGNED_VB 1
+
+#if LL_ALIGNED_VB
+
+S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] =
+{
+ sizeof(LLVector4), // TYPE_VERTEX,
+ sizeof(LLVector4), // TYPE_NORMAL,
+ sizeof(LLVector2), // TYPE_TEXCOORD0,
+ sizeof(LLVector2), // TYPE_TEXCOORD1,
+ sizeof(LLVector2), // TYPE_TEXCOORD2,
+ sizeof(LLVector2), // TYPE_TEXCOORD3,
+ sizeof(LLColor4U), // TYPE_COLOR,
+ sizeof(LLVector4), // TYPE_BINORMAL,
+ sizeof(F32), // TYPE_WEIGHT,
+ sizeof(LLVector4), // TYPE_WEIGHT4,
+ sizeof(LLVector4), // TYPE_CLOTHWEIGHT,
+};
+
+#else
+
S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] =
{
sizeof(LLVector3), // TYPE_VERTEX,
@@ -81,6 +103,8 @@ S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] =
sizeof(LLVector4), // TYPE_CLOTHWEIGHT,
};
+#endif
+
U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] =
{
GL_TRIANGLES,
@@ -237,10 +261,8 @@ void LLVertexBuffer::setupClientArrays(U32 data_mask)
}
}
-void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const
+void LLVertexBuffer::validateRange(U32 start, U32 end, U32 count, U32 indices_offset) const
{
- llassert(mRequestedNumVerts >= 0);
-
if (start >= (U32) mRequestedNumVerts ||
end >= (U32) mRequestedNumVerts)
{
@@ -255,6 +277,25 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
llerrs << "Bad index buffer draw range: [" << indices_offset << ", " << indices_offset+count << "]" << llendl;
}
+ if (gDebugGL && !useVBOs())
+ {
+ U16* idx = ((U16*) getIndicesPointer())+indices_offset;
+ for (U32 i = 0; i < count; ++i)
+ {
+ if (idx[i] < start || idx[i] > end)
+ {
+ llerrs << "Index out of range: " << idx[i] << " not in [" << start << ", " << end << "]" << llendl;
+ }
+ }
+ }
+}
+
+void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const
+{
+ validateRange(start, end, count, indices_offset);
+
+ llassert(mRequestedNumVerts >= 0);
+
if (mGLIndices != sGLRenderIndices)
{
llerrs << "Wrong index buffer bound." << llendl;
@@ -273,17 +314,6 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
U16* idx = ((U16*) getIndicesPointer())+indices_offset;
- if (gDebugGL && !useVBOs())
- {
- for (U32 i = 0; i < count; ++i)
- {
- if (idx[i] < start || idx[i] > end)
- {
- llerrs << "Index out of range: " << idx[i] << " not in [" << start << ", " << end << "]" << llendl;
- }
- }
- }
-
stop_glerror();
glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT,
idx);
@@ -428,11 +458,42 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask, S32 usage) :
mTypeMask = typemask;
mStride = stride;
+ mAlignedOffset = 0;
+ mAlignedIndexOffset = 0;
+
sCount++;
}
+#if LL_ALIGNED_VB
+//static
+S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets, S32 num_vertices)
+{
+ S32 offset = 0;
+ for (S32 i=0; i<TYPE_MAX; i++)
+ {
+ U32 mask = 1<<i;
+ if (typemask & mask)
+ {
+ if (offsets)
+ {
+ offsets[i] = offset;
+ offset += LLVertexBuffer::sTypeOffsets[i]*num_vertices;
+ offset = (offset + 0xF) & ~0xF;
+ }
+ }
+ }
+
+ return offset+16;
+}
+
+S32 LLVertexBuffer::getSize() const
+{
+ return mStride;
+}
+
+#else
//static
-S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets)
+S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets, S32 num_vertices)
{
S32 stride = 0;
for (S32 i=0; i<TYPE_MAX; i++)
@@ -451,6 +512,12 @@ S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets)
return stride;
}
+S32 LLVertexBuffer::getSize() const
+{
+ return mNumVerts*mStride;
+}
+
+#endif
// protected, use unref()
//virtual
LLVertexBuffer::~LLVertexBuffer()
@@ -560,7 +627,7 @@ void LLVertexBuffer::createGLBuffer()
{
static int gl_buffer_idx = 0;
mGLBuffer = ++gl_buffer_idx;
- mMappedData = new U8[size];
+ mMappedData = (U8*) _mm_malloc(size, 16);
memset(mMappedData, 0, size);
}
}
@@ -582,16 +649,20 @@ void LLVertexBuffer::createGLIndices()
mEmpty = TRUE;
+ //pad by 16 bytes for aligned copies
+ size += 16;
+
if (useVBOs())
{
+ //pad by another 16 bytes for VBO pointer adjustment
+ size += 16;
mMappedIndexData = NULL;
genIndices();
mResized = TRUE;
}
else
{
- mMappedIndexData = new U8[size];
- memset(mMappedIndexData, 0, size);
+ mMappedIndexData = (U8*) _mm_malloc(size, 16);
static int gl_buffer_idx = 0;
mGLIndices = ++gl_buffer_idx;
}
@@ -612,7 +683,7 @@ void LLVertexBuffer::destroyGLBuffer()
}
else
{
- delete [] mMappedData;
+ _mm_free(mMappedData);
mMappedData = NULL;
mEmpty = TRUE;
}
@@ -639,7 +710,7 @@ void LLVertexBuffer::destroyGLIndices()
}
else
{
- delete [] mMappedIndexData;
+ _mm_free(mMappedIndexData);
mMappedIndexData = NULL;
mEmpty = TRUE;
}
@@ -664,7 +735,7 @@ void LLVertexBuffer::updateNumVerts(S32 nverts)
}
mRequestedNumVerts = nverts;
-
+
if (!mDynamicSize)
{
mNumVerts = nverts;
@@ -679,6 +750,9 @@ void LLVertexBuffer::updateNumVerts(S32 nverts)
}
mNumVerts = nverts;
}
+#if LL_ALIGNED_VB
+ mStride = calcStride(mTypeMask, mOffsets, mNumVerts);
+#endif
}
@@ -754,9 +828,6 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)
{
sAllocatedBytes -= getSize() + getIndicesSize();
- S32 oldsize = getSize();
- S32 old_index_size = getIndicesSize();
-
updateNumVerts(newnverts);
updateNumIndices(newnindices);
@@ -773,26 +844,10 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)
}
else
{
- //delete old buffer, keep GL buffer for now
if (!useVBOs())
{
- U8* old = mMappedData;
- mMappedData = new U8[newsize];
- if (old)
- {
- memcpy(mMappedData, old, llmin(newsize, oldsize));
- if (newsize > oldsize)
- {
- memset(mMappedData+oldsize, 0, newsize-oldsize);
- }
-
- delete [] old;
- }
- else
- {
- memset(mMappedData, 0, newsize);
- mEmpty = TRUE;
- }
+ _mm_free(mMappedData);
+ mMappedData = (U8*) _mm_malloc(newsize, 16);
}
mResized = TRUE;
}
@@ -812,24 +867,8 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices)
{
if (!useVBOs())
{
- //delete old buffer, keep GL buffer for now
- U8* old = mMappedIndexData;
- mMappedIndexData = new U8[new_index_size];
-
- if (old)
- {
- memcpy(mMappedIndexData, old, llmin(new_index_size, old_index_size));
- if (new_index_size > old_index_size)
- {
- memset(mMappedIndexData+old_index_size, 0, new_index_size - old_index_size);
- }
- delete [] old;
- }
- else
- {
- memset(mMappedIndexData, 0, new_index_size);
- mEmpty = TRUE;
- }
+ _mm_free(mMappedIndexData);
+ mMappedIndexData = (U8*) _mm_malloc(new_index_size, 16);
}
mResized = TRUE;
}
@@ -886,12 +925,19 @@ U8* LLVertexBuffer::mapBuffer(S32 access)
setBuffer(0);
mLocked = TRUE;
stop_glerror();
- mMappedData = (U8*) glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
+
+ U8* src = (U8*) glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
+ mMappedData = LL_NEXT_ALIGNED_ADDRESS<U8>(src);
+ mAlignedOffset = mMappedData - src;
+
stop_glerror();
}
{
LLMemType mt_v(LLMemType::MTYPE_VERTEX_MAP_BUFFER_INDICES);
- mMappedIndexData = (U8*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
+ U8* src = (U8*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);
+ mMappedIndexData = LL_NEXT_ALIGNED_ADDRESS<U8>(src);
+ mAlignedIndexOffset = mMappedIndexData - src;
+
stop_glerror();
}
@@ -975,6 +1021,45 @@ void LLVertexBuffer::unmapBuffer()
//----------------------------------------------------------------------------
+#if LL_ALIGNED_VB
+
+template <class T,S32 type> struct VertexBufferStrider
+{
+ typedef LLStrider<T> strider_t;
+ static bool get(LLVertexBuffer& vbo,
+ strider_t& strider,
+ S32 index)
+ {
+ if (vbo.mapBuffer() == NULL)
+ {
+ llwarns << "mapBuffer failed!" << llendl;
+ return FALSE;
+ }
+
+ if (type == LLVertexBuffer::TYPE_INDEX)
+ {
+ S32 stride = sizeof(T);
+ strider = (T*)(vbo.getMappedIndices() + index*stride);
+ strider.setStride(0);
+ return TRUE;
+ }
+ else if (vbo.hasDataType(type))
+ {
+ S32 stride = LLVertexBuffer::sTypeOffsets[type];
+ strider = (T*)(vbo.getMappedData() + vbo.getOffset(type)+index*stride);
+ strider.setStride(stride);
+ return TRUE;
+ }
+ else
+ {
+ llerrs << "VertexBufferStrider could not find valid vertex data." << llendl;
+ }
+ return FALSE;
+ }
+};
+
+#else
+
template <class T,S32 type> struct VertexBufferStrider
{
typedef LLStrider<T> strider_t;
@@ -1010,6 +1095,7 @@ template <class T,S32 type> struct VertexBufferStrider
}
};
+#endif
bool LLVertexBuffer::getVertexStrider(LLStrider<LLVector3>& strider, S32 index)
{
@@ -1272,6 +1358,81 @@ void LLVertexBuffer::setBuffer(U32 data_mask)
}
}
+#if LL_ALIGNED_VB
+
+// virtual (default)
+void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const
+{
+ LLMemType mt2(LLMemType::MTYPE_VERTEX_SETUP_VERTEX_BUFFER);
+ stop_glerror();
+ U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
+
+ if ((data_mask & mTypeMask) != data_mask)
+ {
+ llerrs << "LLVertexBuffer::setupVertexBuffer missing required components for supplied data mask." << llendl;
+ }
+
+ if (data_mask & MAP_NORMAL)
+ {
+ glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL]));
+ }
+ if (data_mask & MAP_TEXCOORD3)
+ {
+ glClientActiveTextureARB(GL_TEXTURE3_ARB);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD3], (void*)(base + mOffsets[TYPE_TEXCOORD3]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
+ }
+ if (data_mask & MAP_TEXCOORD2)
+ {
+ glClientActiveTextureARB(GL_TEXTURE2_ARB);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD2], (void*)(base + mOffsets[TYPE_TEXCOORD2]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
+ }
+ if (data_mask & MAP_TEXCOORD1)
+ {
+ glClientActiveTextureARB(GL_TEXTURE1_ARB);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
+ }
+ if (data_mask & MAP_BINORMAL)
+ {
+ glClientActiveTextureARB(GL_TEXTURE2_ARB);
+ glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
+ }
+ if (data_mask & MAP_TEXCOORD0)
+ {
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0]));
+ }
+ if (data_mask & MAP_COLOR)
+ {
+ glColorPointer(4, GL_UNSIGNED_BYTE, LLVertexBuffer::sTypeOffsets[TYPE_COLOR], (void*)(base + mOffsets[TYPE_COLOR]));
+ }
+
+ if (data_mask & MAP_WEIGHT)
+ {
+ glVertexAttribPointerARB(1, 1, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT], (void*)(base + mOffsets[TYPE_WEIGHT]));
+ }
+
+ if (data_mask & MAP_WEIGHT4 && sWeight4Loc != -1)
+ {
+ glVertexAttribPointerARB(sWeight4Loc, 4, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT4], (void*)(base+mOffsets[TYPE_WEIGHT4]));
+ }
+
+ if (data_mask & MAP_CLOTHWEIGHT)
+ {
+ glVertexAttribPointerARB(4, 4, GL_FLOAT, TRUE, LLVertexBuffer::sTypeOffsets[TYPE_CLOTHWEIGHT], (void*)(base + mOffsets[TYPE_CLOTHWEIGHT]));
+ }
+ if (data_mask & MAP_VERTEX)
+ {
+ glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_VERTEX], (void*)(base + 0));
+ }
+
+ llglassertok();
+}
+
+#else
+
// virtual (default)
void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const
{
@@ -1344,6 +1505,8 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const
llglassertok();
}
+#endif
+
void LLVertexBuffer::markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count)
{
// TODO: use GL_APPLE_flush_buffer_range here
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index d1700aa54a..47146a5ec4 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -98,7 +98,7 @@ public:
//if offsets is not NULL, its contents will be filled
//with the offset of each vertex component in the buffer,
// indexed by the following enum
- static S32 calcStride(const U32& typemask, S32* offsets = NULL);
+ static S32 calcStride(const U32& typemask, S32* offsets = NULL, S32 num_vertices = 0);
enum {
TYPE_VERTEX,
@@ -187,12 +187,12 @@ public:
S32 getRequestedVerts() const { return mRequestedNumVerts; }
S32 getRequestedIndices() const { return mRequestedNumIndices; }
- U8* getIndicesPointer() const { return useVBOs() ? NULL : mMappedIndexData; }
+ U8* getIndicesPointer() const { return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; }
U8* getVerticesPointer() const { return useVBOs() ? NULL : mMappedData; }
S32 getStride() const { return mStride; }
U32 getTypeMask() const { return mTypeMask; }
BOOL hasDataType(S32 type) const { return ((1 << type) & getTypeMask()) ? TRUE : FALSE; }
- S32 getSize() const { return mNumVerts*mStride; }
+ S32 getSize() const;
S32 getIndicesSize() const { return mNumIndices * sizeof(U16); }
U8* getMappedData() const { return mMappedData; }
U8* getMappedIndices() const { return mMappedIndexData; }
@@ -207,12 +207,19 @@ public:
void drawArrays(U32 mode, U32 offset, U32 count) const;
void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
+ //for debugging, validate data in given range is valid
+ void validateRange(U32 start, U32 end, U32 count, U32 offset) const;
+
+
+
protected:
S32 mNumVerts; // Number of vertices allocated
S32 mNumIndices; // Number of indices allocated
S32 mRequestedNumVerts; // Number of vertices requested
S32 mRequestedNumIndices; // Number of indices requested
+ ptrdiff_t mAlignedOffset;
+ ptrdiff_t mAlignedIndexOffset;
S32 mStride;
U32 mTypeMask;
S32 mUsage; // GL usage
@@ -227,7 +234,7 @@ protected:
S32 mOffsets[TYPE_MAX];
BOOL mResized; // if TRUE, client buffer has been resized and GL buffer has not
BOOL mDynamicSize; // if TRUE, buffer has been resized at least once (and should be padded)
-
+
class DirtyRegion
{
public:
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 6de34d9458..600e3294eb 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -79,6 +79,7 @@
#include "llteleporthistory.h"
#include "lllocationhistory.h"
#include "llfasttimerview.h"
+#include "llvector4a.h"
#include "llviewermenufile.h"
#include "llvoicechannel.h"
#include "llvoavatarself.h"
@@ -612,6 +613,9 @@ bool LLAppViewer::init()
//
LLFastTimer::reset();
+ // initialize SSE options
+ LLVector4a::initClass();
+
// Need to do this initialization before we do anything else, since anything
// that touches files should really go through the lldir API
gDirUtilp->initAppDirs("SecondLife");
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index d1f4be71f5..0fa0e80cb7 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -1201,14 +1201,14 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLFace* face, const LLMeshSk
if (!buff ||
buff->getTypeMask() != data_mask ||
- buff->getRequestedVerts() != vol_face.mVertices.size())
+ buff->getRequestedVerts() != vol_face.mNumVertices)
{
face->setGeomIndex(0);
face->setIndicesIndex(0);
- face->setSize(vol_face.mVertices.size(), vol_face.mIndices.size());
+ face->setSize(vol_face.mNumVertices, vol_face.mNumIndices);
face->mVertexBuffer = new LLVertexBuffer(data_mask, 0);
- face->mVertexBuffer->allocateBuffer(vol_face.mVertices.size(), vol_face.mIndices.size(), true);
+ face->mVertexBuffer->allocateBuffer(vol_face.mNumVertices, vol_face.mNumIndices, true);
U16 offset = 0;
@@ -1546,20 +1546,23 @@ void LLVertexBufferAvatar::setupVertexBuffer(U32 data_mask) const
{
U8* base = useVBOs() ? NULL : mMappedData;
- glVertexPointer(3,GL_FLOAT, mStride, (void*)(base + 0));
- glNormalPointer(GL_FLOAT, mStride, (void*)(base + mOffsets[TYPE_NORMAL]));
- glTexCoordPointer(2,GL_FLOAT, mStride, (void*)(base + mOffsets[TYPE_TEXCOORD0]));
+ glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_VERTEX], (void*)(base + 0));
+ glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL]));
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0]));
- set_vertex_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_WEIGHT], mStride, (F32*)(base + mOffsets[TYPE_WEIGHT]));
+ set_vertex_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_WEIGHT],
+ LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_WEIGHT], (F32*)(base + mOffsets[TYPE_WEIGHT]));
if (sShaderLevel >= LLDrawPoolAvatar::SHADER_LEVEL_BUMP)
{
- set_binormals(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::BINORMAL], mStride, (LLVector3*)(base + mOffsets[TYPE_BINORMAL]));
+ set_binormals(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::BINORMAL],
+ LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_BINORMAL], (LLVector3*)(base + mOffsets[TYPE_BINORMAL]));
}
if (sShaderLevel >= LLDrawPoolAvatar::SHADER_LEVEL_CLOTH)
{
- set_vertex_clothing_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_CLOTHING], mStride, (LLVector4*)(base + mOffsets[TYPE_CLOTHWEIGHT]));
+ set_vertex_clothing_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_CLOTHING],
+ LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_CLOTHWEIGHT], (LLVector4*)(base + mOffsets[TYPE_CLOTHWEIGHT]));
}
}
else
diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp
index 1fae8e518c..b7092f32a2 100644
--- a/indra/newview/lldrawpoolbump.cpp
+++ b/indra/newview/lldrawpoolbump.cpp
@@ -82,7 +82,7 @@ static S32 bump_channel = -1;
// static
void LLStandardBumpmap::init()
{
- LLStandardBumpmap::restoreGL();
+ // do nothing
}
// static
@@ -876,6 +876,8 @@ void LLBumpImageList::clear()
// these will be re-populated on-demand
mBrightnessEntries.clear();
mDarknessEntries.clear();
+
+ LLStandardBumpmap::clear();
}
void LLBumpImageList::shutdown()
@@ -892,8 +894,8 @@ void LLBumpImageList::destroyGL()
void LLBumpImageList::restoreGL()
{
- // Images will be recreated as they are needed.
LLStandardBumpmap::restoreGL();
+ // Images will be recreated as they are needed.
}
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 53dc335c16..1571bab7de 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -366,8 +366,14 @@ void LLFace::setDrawable(LLDrawable *drawable)
mXform = &drawable->mXform;
}
-void LLFace::setSize(const S32 num_vertices, const S32 num_indices)
+void LLFace::setSize(S32 num_vertices, S32 num_indices, bool align)
{
+ if (align)
+ {
+ //allocate vertices in blocks of 4 for alignment
+ num_vertices = (num_vertices + 0x3) & ~0x3;
+ }
+
if (mGeomCount != num_vertices ||
mIndicesCount != num_indices)
{
@@ -928,8 +934,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
{
LLFastTimer t(FTM_FACE_GET_GEOM);
const LLVolumeFace &vf = volume.getVolumeFace(f);
- S32 num_vertices = (S32)vf.mVertices.size();
- S32 num_indices = LLPipeline::sUseTriStrips ? (S32)vf.mTriStrip.size() : (S32) vf.mIndices.size();
+ S32 num_vertices = (S32)vf.mNumVertices;
+ S32 num_indices = (S32) vf.mNumIndices;
if (mVertexBuffer.notNull())
{
@@ -1128,19 +1134,9 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (full_rebuild)
{
mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex);
- if (LLPipeline::sUseTriStrips)
+ for (U32 i = 0; i < (U32) num_indices; i++)
{
- for (U32 i = 0; i < (U32) num_indices; i++)
- {
- *indicesp++ = vf.mTriStrip[i] + index_offset;
- }
- }
- else
- {
- for (U32 i = 0; i < (U32) num_indices; i++)
- {
- *indicesp++ = vf.mIndices[i] + index_offset;
- }
+ *indicesp++ = vf.mIndices[i] + index_offset;
}
}
@@ -1214,28 +1210,41 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
bool bake_sunlight = !getTextureEntry()->getFullbright() &&
!mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL);
+ //VECTORIZE THIS
for (S32 i = 0; i < num_vertices; i++)
{
+ LLVector3 vf_binormal;
+ if (vf.mBinormals)
+ {
+ vf_binormal.setVec(vf.mBinormals[i].getF32());
+ }
+
+ LLVector3 vf_normal;
+ vf_normal.set(vf.mNormals[i].getF32());
+
+ LLVector3 vf_position;
+ vf_position.set(vf.mPositions[i].getF32());
+
if (rebuild_tcoord)
{
- LLVector2 tc = vf.mVertices[i].mTexCoord;
+ LLVector2 tc(vf.mTexCoords[i]);
if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
{
- LLVector3 vec = vf.mVertices[i].mPosition;
+ LLVector3 vec = vf_position;
vec.scaleVec(scale);
switch (texgen)
{
case LLTextureEntry::TEX_GEN_PLANAR:
- planarProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec);
+ planarProjection(tc, vf_normal, vf.mCenter, vec);
break;
case LLTextureEntry::TEX_GEN_SPHERICAL:
- sphericalProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec);
+ sphericalProjection(tc, vf_normal, vf.mCenter, vec);
break;
case LLTextureEntry::TEX_GEN_CYLINDRICAL:
- cylindricalProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec);
+ cylindricalProjection(tc, vf_normal, vf.mCenter, vec);
break;
default:
break;
@@ -1345,10 +1354,10 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1))
{
- LLVector3 tangent = vf.mVertices[i].mBinormal % vf.mVertices[i].mNormal;
+ LLVector3 tangent = vf_binormal % vf_normal;
LLMatrix3 tangent_to_object;
- tangent_to_object.setRows(tangent, vf.mVertices[i].mBinormal, vf.mVertices[i].mNormal);
+ tangent_to_object.setRows(tangent, vf_binormal, vf_normal);
LLVector3 binormal = binormal_dir * tangent_to_object;
binormal = binormal * mat_normal;
@@ -1366,12 +1375,12 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (rebuild_pos)
{
- *vertices++ = vf.mVertices[i].mPosition * mat_vert;
+ *vertices++ = vf_position * mat_vert;
}
if (rebuild_normal)
{
- LLVector3 normal = vf.mVertices[i].mNormal * mat_normal;
+ LLVector3 normal = vf_normal * mat_normal;
normal.normVec();
*normals++ = normal;
@@ -1379,21 +1388,21 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (rebuild_binormal)
{
- LLVector3 binormal = vf.mVertices[i].mBinormal * mat_normal;
+ LLVector3 binormal = vf_binormal * mat_normal;
binormal.normVec();
*binormals++ = binormal;
}
if (rebuild_weights && vf.mWeights.size() > i)
{
- *weights++ = vf.mWeights[i];
+ (*weights++) = vf.mWeights[i];
}
if (rebuild_color)
{
if (bake_sunlight)
{
- LLVector3 normal = vf.mVertices[i].mNormal * mat_normal;
+ LLVector3 normal = vf_normal * mat_normal;
normal.normVec();
F32 da = normal * gPipeline.mSunDir;
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index f9e9c3e078..48909d7895 100644
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -166,7 +166,7 @@ public:
S32 getColors(LLStrider<LLColor4U> &colors);
S32 getIndices(LLStrider<U16> &indices);
- void setSize(const S32 numVertices, const S32 num_indices = 0);
+ void setSize(S32 numVertices, S32 num_indices = 0, bool align = false);
BOOL genVolumeBBoxes(const LLVolume &volume, S32 f,
const LLMatrix4& mat, const LLMatrix3& inv_trans_mat, BOOL global_volume = FALSE);
diff --git a/indra/newview/llfloaterimagepreview.cpp b/indra/newview/llfloaterimagepreview.cpp
index 159e4b41ca..28fe2a14b7 100644
--- a/indra/newview/llfloaterimagepreview.cpp
+++ b/indra/newview/llfloaterimagepreview.cpp
@@ -864,8 +864,8 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
}
const LLVolumeFace &vf = mVolume->getVolumeFace(0);
- U32 num_indices = vf.mIndices.size();
- U32 num_vertices = vf.mVertices.size();
+ U32 num_indices = vf.mNumIndices;
+ U32 num_vertices = vf.mNumVertices;
mVertexBuffer = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL, 0);
mVertexBuffer->allocateBuffer(num_vertices, num_indices, TRUE);
@@ -879,10 +879,16 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
mVertexBuffer->getIndexStrider(index_strider);
// build vertices and normals
+ LLStrider<LLVector3> pos;
+ pos = (LLVector3*) vf.mPositions; pos.setStride(16);
+ LLStrider<LLVector3> norm;
+ norm = (LLVector3*) vf.mNormals; norm.setStride(16);
+
+
for (U32 i = 0; i < num_vertices; i++)
{
- *(vertex_strider++) = vf.mVertices[i].mPosition;
- LLVector3 normal = vf.mVertices[i].mNormal;
+ *(vertex_strider++) = *pos++;
+ LLVector3 normal = *norm++;
normal.normalize();
*(normal_strider++) = normal;
}
@@ -901,7 +907,6 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)
BOOL LLImagePreviewSculpted::render()
{
mNeedsUpdate = FALSE;
-
LLGLSUIDefault def;
LLGLDisable no_blend(GL_BLEND);
LLGLEnable cull(GL_CULL_FACE);
@@ -946,7 +951,7 @@ BOOL LLImagePreviewSculpted::render()
LLViewerCamera::getInstance()->setPerspective(FALSE, mOrigin.mX, mOrigin.mY, mFullWidth, mFullHeight, FALSE);
const LLVolumeFace &vf = mVolume->getVolumeFace(0);
- U32 num_indices = vf.mIndices.size();
+ U32 num_indices = vf.mNumIndices;
mVertexBuffer->setBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL);
@@ -959,7 +964,6 @@ BOOL LLImagePreviewSculpted::render()
mVertexBuffer->draw(LLRender::TRIANGLES, num_indices, 0);
gGL.popMatrix();
-
return TRUE;
}
diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp
index 28b0e7356a..3c5a4de7f8 100644
--- a/indra/newview/llhudicon.cpp
+++ b/indra/newview/llhudicon.cpp
@@ -39,6 +39,7 @@
#include "llviewerobject.h"
#include "lldrawable.h"
+#include "llvector4a.h"
#include "llviewercamera.h"
#include "llviewertexture.h"
#include "llviewerwindow.h"
@@ -266,21 +267,41 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
LLVector3 x_scale = image_aspect * (F32)gViewerWindow->getWindowHeightScaled() * mScale * scale_factor * x_pixel_vec;
LLVector3 y_scale = (F32)gViewerWindow->getWindowHeightScaled() * mScale * scale_factor * y_pixel_vec;
- LLVector3 lower_left = icon_position - (x_scale * 0.5f);
- LLVector3 lower_right = icon_position + (x_scale * 0.5f);
- LLVector3 upper_left = icon_position - (x_scale * 0.5f) + y_scale;
- LLVector3 upper_right = icon_position + (x_scale * 0.5f) + y_scale;
+ LLVector4a x_scalea;
+ LLVector4a icon_positiona;
+ LLVector4a y_scalea;
-
- F32 t = 0.f;
- LLVector3 dir = end-start;
+ x_scalea.load3(x_scale.mV);
+ x_scalea.mul(0.5f);
+ y_scalea.load3(y_scale.mV);
+
+ icon_positiona.load3(icon_position.mV);
- if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, start, dir, NULL, NULL, &t, FALSE) ||
- LLTriangleRayIntersect(upper_left, lower_left, lower_right, start, dir, NULL, NULL, &t, FALSE))
+ LLVector4a lower_left;
+ lower_left.setSub(icon_positiona, x_scalea);
+ LLVector4a lower_right;
+ lower_right.setAdd(icon_positiona, x_scalea);
+ LLVector4a upper_left;
+ upper_left.setAdd(lower_left, y_scalea);
+ LLVector4a upper_right;
+ upper_right.setAdd(lower_right, y_scalea);
+
+ F32 t = 0.f;
+ LLVector4a enda;
+ enda.load3(end.mV);
+ LLVector4a starta;
+ starta.load3(start.mV);
+ LLVector4a dir;
+ dir.setSub(enda, starta);
+
+ if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, starta, dir, NULL, NULL, &t, FALSE) ||
+ LLTriangleRayIntersect(upper_left, lower_left, lower_right, starta, dir, NULL, NULL, &t, FALSE))
{
if (intersection)
{
- *intersection = start + dir*t;
+ dir.mul(t);
+ starta.add(dir);
+ *intersection = LLVector3((F32*) &starta.mQ);
}
return TRUE;
}
diff --git a/indra/newview/llinventorybridge.cpp b/indra/newview/llinventorybridge.cpp
index ea43670da0..10cc6fae32 100644
--- a/indra/newview/llinventorybridge.cpp
+++ b/indra/newview/llinventorybridge.cpp
@@ -3400,7 +3400,7 @@ openSoundPreview((void*)this);
//send_uuid_sound_trigger(item->getAssetUUID(), 1.0);
}
*/
- }
+}
void LLSoundBridge::previewItem()
{
@@ -4187,21 +4187,9 @@ void LLObjectBridge::performAction(LLInventoryModel* model, std::string action)
void LLObjectBridge::openItem()
{
- LLViewerInventoryItem* item = getItem();
-
- if (item)
- {
- LLInvFVBridgeAction::doAction(item->getType(),mUUID,getInventoryModel());
- }
-
- LLSD key;
- key["id"] = mUUID;
- LLSideTray::getInstance()->showPanel("sidepanel_inventory", key);
-
- // Disable old properties floater; this is replaced by the sidepanel.
- /*
- LLFloaterReg::showInstance("properties", mUUID);
- */
+ // object double-click action is to wear/unwear object
+ performAction(getInventoryModel(),
+ get_is_item_worn(mUUID) ? "detach" : "attach");
}
LLFontGL::StyleFlags LLObjectBridge::getLabelStyle() const
diff --git a/indra/newview/llpanelobject.cpp b/indra/newview/llpanelobject.cpp
index 669ff3ffd6..77f3984ecb 100644
--- a/indra/newview/llpanelobject.cpp
+++ b/indra/newview/llpanelobject.cpp
@@ -1241,6 +1241,16 @@ void LLPanelObject::sendIsPhantom()
}
}
+#include "llsdutil.h"
+class CostResponder : public LLHTTPClient::Responder
+{
+public:
+ CostResponder(U32 id) { mID = id; }
+ virtual void result(const LLSD& content) { llinfos << ll_pretty_print_sd(content) << llendl; }
+
+ U32 mID;
+};
+
void LLPanelObject::sendPhysicsShapeType()
{
U8 value = (U8)mComboPhysicsShapeType->getCurrentIndex();
@@ -1255,6 +1265,13 @@ void LLPanelObject::sendPhysicsShapeType()
{
llinfos << "update physics shape type not changed" << llendl;
}
+
+ std::string url = gAgent.getRegion()->getCapability("GetObjectCost");
+ LLSD body = LLSD::emptyArray();
+
+ body.append(LLSelectMgr::getInstance()->getSelection()->getFirstObject()->getID());
+
+ LLHTTPClient::post( url, body, new CostResponder(body[0].asInteger()) );
}
void LLPanelObject::sendCastShadows()
diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp
index b8bdbfb2f8..d10e4fee3a 100644
--- a/indra/newview/llpolymesh.cpp
+++ b/indra/newview/llpolymesh.cpp
@@ -35,7 +35,8 @@
//-----------------------------------------------------------------------------
#include "llviewerprecompiledheaders.h"
-#include "llpolymesh.h"
+#include "llfasttimer.h"
+#include "llmemory.h"
#include "llviewercontrol.h"
#include "llxmltree.h"
@@ -45,7 +46,7 @@
#include "llvolume.h"
#include "llendianswizzle.h"
-#include "llfasttimer.h"
+#include "llpolymesh.h"
#define HEADER_ASCII "Linden Mesh 1.0"
#define HEADER_BINARY "Linden Binary Mesh 1.0"
@@ -140,7 +141,7 @@ void LLPolyMeshSharedData::freeMeshData()
delete [] mDetailTexCoords;
mDetailTexCoords = NULL;
- delete [] mWeights;
+ ll_aligned_free_16(mWeights);
mWeights = NULL;
}
@@ -230,7 +231,7 @@ BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices )
mBaseBinormals = new LLVector3[ numVertices ];
mTexCoords = new LLVector2[ numVertices ];
mDetailTexCoords = new LLVector2[ numVertices ];
- mWeights = new F32[ numVertices ];
+ mWeights = (F32*) ll_aligned_malloc_16((numVertices*sizeof(F32)+0xF) & ~0xF);
for (i = 0; i < numVertices; i++)
{
mWeights[i] = 0.f;
@@ -715,15 +716,22 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_
int nfloats = nverts * (2*4 + 3*3 + 2 + 4);
//use aligned vertex data to make LLPolyMesh SSE friendly
- mVertexData = (F32*) _mm_malloc(nfloats*4, 16);
+ mVertexData = (F32*) ll_aligned_malloc_16(nfloats*4);
int offset = 0;
- mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
- mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
- mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
- mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
- mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
- mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts;
- mClothingWeights = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
+
+ //all members must be 16-byte aligned except the last 3
+ mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
+ mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
+ mClothingWeights = (LLVector4*)(mVertexData + offset); offset += 4*nverts;
+ mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts;
+
+ // these members don't need to be 16-byte aligned, but the first one might be
+ // read during an aligned memcpy of mTexCoords
+ mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
+ mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
+ mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts;
+
+
#else
mCoords = new LLVector3[mSharedData->mNumVertices];
mNormals = new LLVector3[mSharedData->mNumVertices];
@@ -759,7 +767,7 @@ LLPolyMesh::~LLPolyMesh()
delete [] mClothingWeights;
delete [] mTexCoords;
#else
- _mm_free(mVertexData);
+ ll_aligned_free_16(mVertexData);
#endif
}
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 77c38798d1..470c332b42 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -512,50 +512,6 @@ void LLSpatialGroup::checkStates()
#endif
}
-void validate_draw_info(LLDrawInfo& params)
-{
-#if LL_OCTREE_PARANOIA_CHECK
- if (params.mVertexBuffer.isNull())
- {
- llerrs << "Draw batch has no vertex buffer." << llendl;
- }
-
- //bad range
- if (params.mStart >= params.mEnd)
- {
- llerrs << "Draw batch has invalid range." << llendl;
- }
-
- if (params.mEnd >= (U32) params.mVertexBuffer->getNumVerts())
- {
- llerrs << "Draw batch has buffer overrun error." << llendl;
- }
-
- if (params.mOffset + params.mCount > (U32) params.mVertexBuffer->getNumIndices())
- {
- llerrs << "Draw batch has index buffer ovverrun error." << llendl;
- }
-
- //bad indices
- U16* indicesp = (U16*) params.mVertexBuffer->getIndicesPointer();
- if (indicesp)
- {
- for (U32 i = params.mOffset; i < params.mOffset+params.mCount; i++)
- {
- if (indicesp[i] < (U16)params.mStart)
- {
- llerrs << "Draw batch has vertex buffer index out of range error (index too low)." << llendl;
- }
-
- if (indicesp[i] > (U16)params.mEnd)
- {
- llerrs << "Draw batch has vertex buffer index out of range error (index too high)." << llendl;
- }
- }
- }
-#endif
-}
-
void LLSpatialGroup::validateDrawMap()
{
#if LL_OCTREE_PARANOIA_CHECK
@@ -565,8 +521,8 @@ void LLSpatialGroup::validateDrawMap()
for (drawmap_elem_t::iterator j = draw_vec.begin(); j != draw_vec.end(); ++j)
{
LLDrawInfo& params = **j;
-
- validate_draw_info(params);
+
+ params.validate();
}
}
#endif
@@ -3379,18 +3335,9 @@ LLDrawInfo::LLDrawInfo(U16 start, U16 end, U32 count, U32 offset,
mDistance(0.f),
mDrawMode(LLRender::TRIANGLES)
{
- mDebugColor = (rand() << 16) + rand();
- if (mStart >= mVertexBuffer->getRequestedVerts() ||
- mEnd >= mVertexBuffer->getRequestedVerts())
- {
- llerrs << "Invalid draw info vertex range." << llendl;
- }
+ mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset);
- if (mOffset >= (U32) mVertexBuffer->getRequestedIndices() ||
- mOffset + mCount > (U32) mVertexBuffer->getRequestedIndices())
- {
- llerrs << "Invalid draw info index range." << llendl;
- }
+ mDebugColor = (rand() << 16) + rand();
}
LLDrawInfo::~LLDrawInfo()
@@ -3406,6 +3353,11 @@ LLDrawInfo::~LLDrawInfo()
}
}
+void LLDrawInfo::validate()
+{
+ mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset);
+}
+
LLVertexBuffer* LLGeometryManager::createVertexBuffer(U32 type_mask, U32 usage)
{
return new LLVertexBuffer(type_mask, usage);
diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h
index 67c33d5b0f..9b252d1035 100644
--- a/indra/newview/llspatialpartition.h
+++ b/indra/newview/llspatialpartition.h
@@ -75,6 +75,8 @@ public:
BOOL fullbright = FALSE, U8 bump = 0, BOOL particle = FALSE, F32 part_size = 0);
+ void validate();
+
LLPointer<LLVertexBuffer> mVertexBuffer;
LLPointer<LLViewerTexture> mTexture;
LLColor4U mGlowColor;
@@ -676,8 +678,6 @@ public:
virtual void shift(const LLVector3 &offset);
};
-void validate_draw_info(LLDrawInfo& params);
-
extern const F32 SG_BOX_SIDE;
extern const F32 SG_BOX_OFFSET;
extern const F32 SG_BOX_RAD;
diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp
index aa82c216d9..cef7c4abbb 100644
--- a/indra/newview/llviewercamera.cpp
+++ b/indra/newview/llviewercamera.cpp
@@ -38,6 +38,7 @@
// Viewer includes
#include "llagent.h"
#include "llagentcamera.h"
+#include "llmatrix4a.h"
#include "llviewercontrol.h"
#include "llviewerobjectlist.h"
#include "llviewerregion.h"
@@ -787,21 +788,29 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts)
LLMatrix4 render_mat(vo_volume->getRenderRotation(), LLVector4(vo_volume->getRenderPosition()));
+ LLMatrix4a render_mata;
+ render_mata.loadu(render_mat);
+ LLMatrix4a mata;
+ mata.loadu(mat);
+
num_faces = volume->getNumVolumeFaces();
for (i = 0; i < num_faces; i++)
{
const LLVolumeFace& face = volume->getVolumeFace(i);
- for (U32 v = 0; v < face.mVertices.size(); v++)
+ for (U32 v = 0; v < face.mNumVertices; v++)
{
- LLVector4 vec = LLVector4(face.mVertices[v].mPosition) * mat;
+ const LLVector4a& src_vec = face.mPositions[v];
+ LLVector4a vec;
+ mata.affineTransform(src_vec, vec);
if (drawablep->isActive())
{
- vec = vec * render_mat;
+ LLVector4a t = vec;
+ render_mata.affineTransform(t, vec);
}
- BOOL in_frustum = pointInFrustum(LLVector3(vec)) > 0;
+ BOOL in_frustum = pointInFrustum(LLVector3(vec.getF32())) > 0;
if (( !in_frustum && all_verts) ||
(in_frustum && !all_verts))
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 236ad98d68..91605005e3 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -655,6 +655,9 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
//-----------------------------------------------------------------------------
void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area)
{
+ //bump num_vertices to next multiple of 4
+ num_vertices = (num_vertices + 0x3) & ~0x3;
+
// Do a pre-alloc pass to determine sizes of data.
if (mMesh && mValid)
{
@@ -677,6 +680,8 @@ static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face");
void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update)
{
+ //IF THIS FUNCTION BREAKS, SEE LLPOLYMESH CONSTRUCTOR AND CHECK ALIGNMENT OF INPUT ARRAYS
+
mFace = face;
if (mFace->mVertexBuffer.isNull())
@@ -684,6 +689,16 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
return;
}
+ LLDrawPool *poolp = mFace->getPool();
+ BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE;
+
+ if (!hardware_skinning && terse_update)
+ { //no need to do terse updates if we're doing software vertex skinning
+ // since mMesh is being copied into mVertexBuffer every frame
+ return;
+ }
+
+
LLFastTimer t(FTM_AVATAR_FACE);
LLStrider<LLVector3> verticesp;
@@ -696,108 +711,52 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
// Copy data into the faces from the polymesh data.
if (mMesh && mValid)
{
- if (mMesh->getNumVertices())
+ const U32 num_verts = mMesh->getNumVertices();
+
+ if (num_verts)
{
- stop_glerror();
face->getGeometryAvatar(verticesp, normalsp, tex_coordsp, vertex_weightsp, clothing_weightsp);
- stop_glerror();
face->mVertexBuffer->getIndexStrider(indicesp);
- stop_glerror();
verticesp += mMesh->mFaceVertexOffset;
- tex_coordsp += mMesh->mFaceVertexOffset;
normalsp += mMesh->mFaceVertexOffset;
- vertex_weightsp += mMesh->mFaceVertexOffset;
- clothing_weightsp += mMesh->mFaceVertexOffset;
-
- const U32* __restrict coords = (U32*) mMesh->getCoords();
- const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords();
- const U32* __restrict normals = (U32*) mMesh->getNormals();
- const U32* __restrict weights = (U32*) mMesh->getWeights();
- const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights();
-
- const U32 num_verts = mMesh->getNumVertices();
-
- U32 i = 0;
-
- const U32 skip = verticesp.getSkip()/sizeof(U32);
+
+ F32* v = (F32*) verticesp.get();
+ F32* n = (F32*) normalsp.get();
+
+ U32 words = num_verts*4;
- U32* __restrict v = (U32*) verticesp.get();
- U32* __restrict n = (U32*) normalsp.get();
+ LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words);
+ LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words);
+
- if (terse_update)
+ if (!terse_update)
{
- for (S32 i = num_verts; i > 0; --i)
- {
- //morph target application only, only update positions and normals
- v[0] = coords[0];
- v[1] = coords[1];
- v[2] = coords[2];
- coords += 4;
- v += skip;
- }
+ vertex_weightsp += mMesh->mFaceVertexOffset;
+ clothing_weightsp += mMesh->mFaceVertexOffset;
+ tex_coordsp += mMesh->mFaceVertexOffset;
+
+ F32* tc = (F32*) tex_coordsp.get();
+ F32* vw = (F32*) vertex_weightsp.get();
+ F32* cw = (F32*) clothing_weightsp.get();
- for (S32 i = num_verts; i > 0; --i)
- {
- n[0] = normals[0];
- n[1] = normals[1];
- n[2] = normals[2];
- normals += 4;
- n += skip;
- }
+ LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2);
+ LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts);
+ LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4);
}
- else
- {
-
- U32* __restrict tc = (U32*) tex_coordsp.get();
- U32* __restrict vw = (U32*) vertex_weightsp.get();
- U32* __restrict cw = (U32*) clothing_weightsp.get();
-
- do
- {
- v[0] = coords[0];
- v[1] = coords[1];
- v[2] = coords[2];
- coords += 4;
- v += skip;
-
- tc[0] = *(tex_coords++);
- tc[1] = *(tex_coords++);
- tc += skip;
-
- n[0] = normals[0];
- n[1] = normals[1];
- n[2] = normals[2];
- normals += 4;
- n += skip;
-
- vw[0] = *(weights++);
- vw += skip;
-
- cw[0] = *(cloth_weights++);
- cw[1] = *(cloth_weights++);
- cw[2] = *(cloth_weights++);
- cw[3] = *(cloth_weights++);
- cw += skip;
- }
- while (++i < num_verts);
-
- const U32 idx_count = mMesh->getNumFaces()*3;
- indicesp += mMesh->mFaceIndexOffset;
+ const U32 idx_count = mMesh->getNumFaces()*3;
- U16* __restrict idx = indicesp.get();
- S32* __restrict src_idx = (S32*) mMesh->getFaces();
+ indicesp += mMesh->mFaceIndexOffset;
- i = 0;
+ U16* __restrict idx = indicesp.get();
+ S32* __restrict src_idx = (S32*) mMesh->getFaces();
- const S32 offset = (S32) mMesh->mFaceVertexOffset;
+ const S32 offset = (S32) mMesh->mFaceVertexOffset;
- do
- {
- *(idx++) = *(src_idx++)+offset;
- }
- while (++i < idx_count);
+ for (S32 i = 0; i < idx_count; ++i)
+ {
+ *(idx++) = *(src_idx++)+offset;
}
}
}
@@ -824,50 +783,44 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh)
buffer->getVertexStrider(o_vertices, 0);
buffer->getNormalStrider(o_normals, 0);
- //F32 last_weight = F32_MAX;
- LLMatrix4a gBlendMat;
+ F32* __restrict vert = o_vertices[0].mV;
+ F32* __restrict norm = o_normals[0].mV;
const F32* __restrict weights = mMesh->getWeights();
const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords();
const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals();
+ U32 offset = mMesh->mFaceVertexOffset*4;
+ vert += offset;
+ norm += offset;
+
for (U32 index = 0; index < mMesh->getNumVertices(); index++)
{
- U32 bidx = index + mMesh->mFaceVertexOffset;
-
- // blend by first matrix
- F32 w = weights[index];
-
- //LLVector4a coord;
- //coord.load4a(coords[index].mV);
+ // equivalent to joint = floorf(weights[index]);
+ S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index));
+ F32 w = weights[index] - joint;
- //LLVector4a norm;
- //norm.load4a(normals[index].mV);
+ LLMatrix4a gBlendMat;
- S32 joint = llfloor(w);
- w -= joint;
-
- if (w > 0.f)
+ if (w != 0.f)
{
- // Try to keep all the accesses to the matrix data as close
- // together as possible. This function is a hot spot on the
- // Mac. JC
+ // blend between matrices and apply
gBlendMat.setLerp(gJointMatAligned[joint+0],
gJointMatAligned[joint+1], w);
LLVector4a res;
gBlendMat.affineTransform(coords[index], res);
- o_vertices[bidx].setVec(res[0], res[1], res[2]);
+ res.store4a(vert+index*4);
gBlendMat.rotate(normals[index], res);
- o_normals[bidx].setVec(res[0], res[1], res[2]);
+ res.store4a(norm+index*4);
}
else
{ // No lerp required in this case.
LLVector4a res;
gJointMatAligned[joint].affineTransform(coords[index], res);
- o_vertices[bidx].setVec(res[0], res[1], res[2]);
+ res.store4a(vert+index*4);
gJointMatAligned[joint].rotate(normals[index], res);
- o_normals[bidx].setVec(res[0], res[1], res[2]);
+ res.store4a(norm+index*4);
}
}
diff --git a/indra/newview/llviewerjointmesh_sse.cpp b/indra/newview/llviewerjointmesh_sse.cpp
index e586b910cd..4fb5fafad1 100644
--- a/indra/newview/llviewerjointmesh_sse.cpp
+++ b/indra/newview/llviewerjointmesh_sse.cpp
@@ -94,8 +94,8 @@ void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh)
buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset);
const F32* weights = mesh->getWeights();
- const LLVector3* coords = mesh->getCoords();
- const LLVector3* normals = mesh->getNormals();
+ const LLVector3* coords = (const LLVector3*)mesh->getCoords();
+ const LLVector3* normals = (const LLVector3*)mesh->getNormals();
for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index)
{
if( weight != weights[index])
diff --git a/indra/newview/llviewerjointmesh_sse2.cpp b/indra/newview/llviewerjointmesh_sse2.cpp
index 550044c321..58cd75871d 100644
--- a/indra/newview/llviewerjointmesh_sse2.cpp
+++ b/indra/newview/llviewerjointmesh_sse2.cpp
@@ -101,8 +101,8 @@ void LLViewerJointMesh::updateGeometrySSE2(LLFace *face, LLPolyMesh *mesh)
buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset);
const F32* weights = mesh->getWeights();
- const LLVector3* coords = mesh->getCoords();
- const LLVector3* normals = mesh->getNormals();
+ const LLVector3* coords = (const LLVector3*)mesh->getCoords();
+ const LLVector3* normals = (const LLVector3*)mesh->getNormals();
for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index)
{
if( weight != weights[index])
diff --git a/indra/newview/llviewerregion.cpp b/indra/newview/llviewerregion.cpp
index 4fdabd7ff0..268e14674e 100644
--- a/indra/newview/llviewerregion.cpp
+++ b/indra/newview/llviewerregion.cpp
@@ -1497,6 +1497,7 @@ void LLViewerRegion::setSeedCapability(const std::string& url)
capabilityNames.append("FetchLibDescendents");
capabilityNames.append("GetTexture");
capabilityNames.append("GetMesh");
+ capabilityNames.append("GetObjectCost");
capabilityNames.append("GroupProposalBallot");
capabilityNames.append("HomeLocation");
capabilityNames.append("LandResources");
@@ -1513,13 +1514,13 @@ void LLViewerRegion::setSeedCapability(const std::string& url)
capabilityNames.append("ProvisionVoiceAccountRequest");
capabilityNames.append("RemoteParcelRequest");
capabilityNames.append("RequestTextureDownload");
- capabilityNames.append("SimulatorFeatures");
capabilityNames.append("SearchStatRequest");
capabilityNames.append("SearchStatTracking");
capabilityNames.append("SendPostcard");
capabilityNames.append("SendUserReport");
capabilityNames.append("SendUserReportWithScreenshot");
capabilityNames.append("ServerReleaseNotes");
+ capabilityNames.append("SimulatorFeatures");
capabilityNames.append("StartGroupProposal");
capabilityNames.append("TextureStats");
capabilityNames.append("UntrustedSimulatorMessage");
diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp
index a82afbeb76..91c9b762c5 100644
--- a/indra/newview/llvograss.cpp
+++ b/indra/newview/llvograss.cpp
@@ -594,7 +594,7 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
LLVector2 tc[4];
LLVector3 v[4];
- // LLVector3 n[4]; // unused!
+ //LLVector3 n[4];
F32 closest_t = 1.f;
@@ -640,7 +640,6 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
position.mV[2] += blade_height;
v[3] = v1 = position + mRegionp->getOriginAgent();
-
F32 a,b,t;
BOOL hit = FALSE;
diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp
index ef7b161003..eef62ddf1a 100644
--- a/indra/newview/llvosurfacepatch.cpp
+++ b/indra/newview/llvosurfacepatch.cpp
@@ -60,27 +60,77 @@ public:
LLVertexBuffer(MAP_VERTEX | MAP_NORMAL | MAP_TEXCOORD0 | MAP_TEXCOORD1 | MAP_COLOR, GL_DYNAMIC_DRAW_ARB)
{
//texture coordinates 2 and 3 exist, but use the same data as texture coordinate 1
- mOffsets[TYPE_TEXCOORD3] = mOffsets[TYPE_TEXCOORD2] = mOffsets[TYPE_TEXCOORD1];
- mTypeMask |= MAP_TEXCOORD2 | MAP_TEXCOORD3;
};
- /*// virtual
+ // virtual
void setupVertexBuffer(U32 data_mask) const
- {
- if (LLDrawPoolTerrain::getDetailMode() == 0 || LLPipeline::sShadowRender)
+ {
+ U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;
+
+ //assume tex coords 2 and 3 are present
+ U32 type_mask = mTypeMask | MAP_TEXCOORD2 | MAP_TEXCOORD3;
+
+ if ((data_mask & type_mask) != data_mask)
+ {
+ llerrs << "LLVertexBuffer::setupVertexBuffer missing required components for supplied data mask." << llendl;
+ }
+
+ if (data_mask & MAP_NORMAL)
+ {
+ glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL]));
+ }
+ if (data_mask & MAP_TEXCOORD3)
+ { //substitute tex coord 0 for tex coord 3
+ glClientActiveTextureARB(GL_TEXTURE3_ARB);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
+ }
+ if (data_mask & MAP_TEXCOORD2)
+ { //substitute tex coord 0 for tex coord 2
+ glClientActiveTextureARB(GL_TEXTURE2_ARB);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
+ }
+ if (data_mask & MAP_TEXCOORD1)
{
- LLVertexBuffer::setupVertexBuffer(data_mask);
+ glClientActiveTextureARB(GL_TEXTURE1_ARB);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
}
- else if (data_mask & LLVertexBuffer::MAP_TEXCOORD1)
+ if (data_mask & MAP_BINORMAL)
{
- LLVertexBuffer::setupVertexBuffer(data_mask);
+ glClientActiveTextureARB(GL_TEXTURE2_ARB);
+ glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL]));
+ glClientActiveTextureARB(GL_TEXTURE0_ARB);
}
- else
+ if (data_mask & MAP_TEXCOORD0)
{
- LLVertexBuffer::setupVertexBuffer(data_mask);
+ glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0]));
}
- llglassertok();
- }*/
+ if (data_mask & MAP_COLOR)
+ {
+ glColorPointer(4, GL_UNSIGNED_BYTE, LLVertexBuffer::sTypeOffsets[TYPE_COLOR], (void*)(base + mOffsets[TYPE_COLOR]));
+ }
+
+ if (data_mask & MAP_WEIGHT)
+ {
+ glVertexAttribPointerARB(1, 1, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT], (void*)(base + mOffsets[TYPE_WEIGHT]));
+ }
+
+ if (data_mask & MAP_WEIGHT4 && sWeight4Loc != -1)
+ {
+ glVertexAttribPointerARB(sWeight4Loc, 4, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT4], (void*)(base+mOffsets[TYPE_WEIGHT4]));
+ }
+
+ if (data_mask & MAP_CLOTHWEIGHT)
+ {
+ glVertexAttribPointerARB(4, 4, GL_FLOAT, TRUE, LLVertexBuffer::sTypeOffsets[TYPE_CLOTHWEIGHT], (void*)(base + mOffsets[TYPE_CLOTHWEIGHT]));
+ }
+ if (data_mask & MAP_VERTEX)
+ {
+ glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_VERTEX], (void*)(base + 0));
+ }
+ }
};
//============================================================================
diff --git a/indra/newview/llvotextbubble.cpp b/indra/newview/llvotextbubble.cpp
index 428ef20006..339da3c0bf 100644
--- a/indra/newview/llvotextbubble.cpp
+++ b/indra/newview/llvotextbubble.cpp
@@ -45,6 +45,7 @@
#include "llviewertexturelist.h"
#include "llvolume.h"
#include "pipeline.h"
+#include "llvector4a.h"
#include "llviewerregion.h"
LLVOTextBubble::LLVOTextBubble(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp)
@@ -217,7 +218,7 @@ void LLVOTextBubble::updateFaceSize(S32 idx)
else
{
const LLVolumeFace& vol_face = getVolume()->getVolumeFace(idx);
- face->setSize(vol_face.mVertices.size(), vol_face.mIndices.size());
+ face->setSize(vol_face.mNumVertices, vol_face.mNumIndices);
}
}
@@ -235,19 +236,37 @@ void LLVOTextBubble::getGeometry(S32 idx,
const LLVolumeFace& face = getVolume()->getVolumeFace(idx);
- LLVector3 pos = getPositionAgent();
+ LLVector4a pos;
+ pos.load3(getPositionAgent().mV);
+
+ LLVector4a scale;
+ scale.load3(getScale().mV);
+
LLColor4U color = LLColor4U(getTE(idx)->getColor());
U32 offset = mDrawable->getFace(idx)->getGeomIndex();
- for (U32 i = 0; i < face.mVertices.size(); i++)
+ LLVector4a* dst_pos = (LLVector4a*) verticesp.get();
+ LLVector4a* src_pos = (LLVector4a*) face.mPositions;
+
+ LLVector4a* dst_norm = (LLVector4a*) normalsp.get();
+ LLVector4a* src_norm = (LLVector4a*) face.mNormals;
+
+ LLVector2* dst_tc = (LLVector2*) texcoordsp.get();
+ LLVector2* src_tc = (LLVector2*) face.mTexCoords;
+
+ LLVector4a::memcpyNonAliased16((F32*) dst_norm, (F32*) src_norm, face.mNumVertices*4);
+ LLVector4a::memcpyNonAliased16((F32*) dst_tc, (F32*) src_tc, face.mNumVertices*2);
+
+
+ for (U32 i = 0; i < face.mNumVertices; i++)
{
- *verticesp++ = face.mVertices[i].mPosition.scaledVec(getScale()) + pos;
- *normalsp++ = face.mVertices[i].mNormal;
- *texcoordsp++ = face.mVertices[i].mTexCoord;
+ LLVector4a t;
+ t.setMul(src_pos[i], scale);
+ dst_pos[i].setAdd(t, pos);
*colorsp++ = color;
}
- for (U32 i = 0; i < face.mIndices.size(); i++)
+ for (U32 i = 0; i < face.mNumIndices; i++)
{
*indicesp++ = face.mIndices[i] + offset;
}
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index cea964a100..8022f81f19 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -60,6 +60,7 @@
#include "llflexibleobject.h"
#include "llsky.h"
#include "lltexturefetch.h"
+#include "llvector4a.h"
#include "llviewercamera.h"
#include "llviewertexturelist.h"
#include "llviewerobjectlist.h"
@@ -1601,14 +1602,8 @@ void LLVOVolume::updateFaceSize(S32 idx)
else
{
const LLVolumeFace& vol_face = getVolume()->getVolumeFace(idx);
- if (LLPipeline::sUseTriStrips)
- {
- facep->setSize(vol_face.mVertices.size(), vol_face.mTriStrip.size());
- }
- else
- {
- facep->setSize(vol_face.mVertices.size(), vol_face.mIndices.size());
- }
+ facep->setSize(vol_face.mNumVertices, vol_face.mNumIndices);
+
}
}
@@ -1863,21 +1858,25 @@ bool LLVOVolume::hasMedia() const
LLVector3 LLVOVolume::getApproximateFaceNormal(U8 face_id)
{
LLVolume* volume = getVolume();
- LLVector3 result;
+ LLVector4a result;
+ result.clear();
+
+ LLVector3 ret;
if (volume && face_id < volume->getNumVolumeFaces())
{
const LLVolumeFace& face = volume->getVolumeFace(face_id);
- for (S32 i = 0; i < (S32)face.mVertices.size(); ++i)
+ for (S32 i = 0; i < (S32)face.mNumVertices; ++i)
{
- result += face.mVertices[i].mNormal;
+ result.add(face.mNormals[i]);
}
- result = volumeDirectionToAgent(result);
- result.normVec();
+ LLVector3 ret((F32*) &result.mQ);
+ ret = volumeDirectionToAgent(ret);
+ ret.normVec();
}
- return result;
+ return ret;
}
void LLVOVolume::requestMediaDataUpdate(bool isNew)
@@ -3032,7 +3031,7 @@ F32 LLVOVolume::getBinRadius()
for (S32 i = 0; i < volume->getNumVolumeFaces(); ++i)
{
const LLVolumeFace& face = volume->getVolumeFace(i);
- vert_count += face.mVertices.size();
+ vert_count += face.mNumVertices;
}
scale = 1.f/llmax(vert_count/1024.f, 1.f);
@@ -3383,7 +3382,7 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
draw_vec[idx]->mCount += facep->getIndicesCount();
draw_vec[idx]->mEnd += facep->getGeomCount();
draw_vec[idx]->mVSize = llmax(draw_vec[idx]->mVSize, facep->getVirtualSize());
- validate_draw_info(*draw_vec[idx]);
+ draw_vec[idx]->validate();
update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[0]);
update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[1]);
}
@@ -3407,12 +3406,13 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
}
draw_info->mExtents[0] = facep->mExtents[0];
draw_info->mExtents[1] = facep->mExtents[1];
- validate_draw_info(*draw_info);
if (LLPipeline::sUseTriStrips)
{
draw_info->mDrawMode = LLRender::TRIANGLE_STRIP;
}
+
+ draw_info->validate();
}
}