diff options
| author | Dave Parks <davep@lindenlab.com> | 2010-05-22 04:35:02 -0500 | 
|---|---|---|
| committer | Dave Parks <davep@lindenlab.com> | 2010-05-22 04:35:02 -0500 | 
| commit | 4d57cb3c0975ff0bcea0d6fb3498f2d90962ff16 (patch) | |
| tree | 09f22534b7ae5b066ae7fa1b2b9332df791c8ae0 /indra | |
| parent | 8919f4811a7dcaf47dc58159e0ba4ba042183325 (diff) | |
Vectorize/memory align buffers in llvolumeface WIP
Diffstat (limited to 'indra')
| -rw-r--r-- | indra/llmath/llvolume.cpp | 720 | ||||
| -rw-r--r-- | indra/llmath/llvolume.h | 35 | ||||
| -rw-r--r-- | indra/newview/lldrawpoolavatar.cpp | 6 | ||||
| -rw-r--r-- | indra/newview/llface.cpp | 55 | ||||
| -rw-r--r-- | indra/newview/llfloaterimagepreview.cpp | 6 | ||||
| -rw-r--r-- | indra/newview/llhudicon.cpp | 2 | ||||
| -rw-r--r-- | indra/newview/llhudtext.cpp | 5 | ||||
| -rw-r--r-- | indra/newview/llviewercamera.cpp | 5 | ||||
| -rw-r--r-- | indra/newview/llvograss.cpp | 2 | ||||
| -rw-r--r-- | indra/newview/llvotextbubble.cpp | 33 | ||||
| -rw-r--r-- | indra/newview/llvovolume.cpp | 29 | 
11 files changed, 474 insertions, 424 deletions
| diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 4e342b0b48..01fe2be371 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -1,4 +1,5 @@  /**  +   * @file llvolume.cpp   *   * $LicenseInfo:firstyear=2002&license=viewergpl$ @@ -89,8 +90,6 @@ const F32 SKEW_MAX	=  0.95f;  const F32 SCULPT_MIN_AREA = 0.002f;  const S32 SCULPT_MIN_AREA_DETAIL = 1; -#define GEN_TRI_STRIP 0 -  BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm)  {      	LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -134,21 +133,25 @@ BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, con  // and returns the intersection point along dir in intersection_t.  // Moller-Trumbore algorithm -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,  							F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided)  {  	F32 u, v, t;  	/* find vectors for two edges sharing vert0 */ -	LLVector3 edge1 = vert1 - vert0; +	LLVector4a edge1; +	edge1.setSub(vert1, vert0); -	LLVector3 edge2 = vert2 - vert0;; +	 +	LLVector4a edge2; +	edge2.setSub(vert2, vert0);  	/* begin calculating determinant - also used to calculate U parameter */ -	LLVector3 pvec = dir % edge2; -	 +	LLVector4a pvec; +	pvec.setCross3(dir, edge2); +  	/* if determinant is near zero, ray lies in plane of triangle */ -	F32 det = edge1 * pvec; +	F32 det = edge1.dot3(pvec);  	if (!two_sided)  	{ @@ -158,10 +161,11 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons  		}  		/* calculate distance from vert0 to ray origin */ -		LLVector3 tvec = orig - vert0; +		LLVector4a tvec; +		tvec.setSub(orig, vert0);  		/* calculate U parameter and test bounds */ -		u = tvec * pvec;	 +		u = tvec.dot3(pvec);	  		if (u < 0.f || u > det)  		{ @@ -169,17 +173,18 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons  		}  		/* prepare to test V parameter */ -		LLVector3 qvec = tvec % edge1; +		LLVector4a qvec; +		qvec.setCross3(tvec, edge1);  		/* calculate V parameter and test bounds */ -		v = dir * qvec; +		v = dir.dot3(qvec);  		if (v < 0.f || u + v > det)  		{  			return FALSE;  		}  		/* calculate t, scale parameters, ray intersects triangle */ -		t = edge2 * qvec; +		t = edge2.dot3(qvec);  		F32 inv_det = 1.0 / det;  		t *= inv_det;  		u *= inv_det; @@ -195,20 +200,22 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons  		F32 inv_det = 1.0 / det;  		/* calculate distance from vert0 to ray origin */ -		LLVector3 tvec = orig - vert0; +		LLVector4a tvec; +		tvec.setSub(orig, vert0);  		/* calculate U parameter and test bounds */ -		u = (tvec * pvec) * inv_det; +		u = (tvec.dot3(pvec)) * inv_det;  		if (u < 0.f || u > 1.f)  		{  			return FALSE;  			}  		/* prepare to test V parameter */ -		LLVector3 qvec = tvec - edge1; +		LLVector4a qvec; +		qvec.setSub(tvec, edge1);  		/* calculate V parameter and test bounds */ -		v = (dir * qvec) * inv_det; +		v = (dir.dot3(qvec)) * inv_det;  		if (v < 0.f || u + v > 1.f)  		{ @@ -216,7 +223,7 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons  		}  		/* calculate t, ray intersects triangle */ -		t = (edge2 * qvec) * inv_det; +		t = (edge2.dot3(qvec)) * inv_det;  	}  	if (intersection_a != NULL) @@ -4120,13 +4127,14 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,  						norm_mat.rotate(n[v1], t);  						t.normalize3Fast(); -						LLVector3 norm1 = LLVector3(t[0], t[1], t[2]); -						normals.push_back(norm1); +						normals.push_back(LLVector3(t[0], t[1], t[2])); -						vertices.push_back(face.mVertices[v2].mPosition*mat); -						LLVector3 norm2 = face.mVertices[v2].mNormal * norm_mat; -						norm2.normVec(); -						normals.push_back(norm2); +						mat.affineTransform(v[v2], t); +						vertices.push_back(LLVector3(t[0], t[1], t[2])); +						 +						norm_mat.rotate(n[v2], t); +						t.normalize3Fast(); +						normals.push_back(LLVector3(t[0], t[1], t[2]));  						segments.push_back(vertices.size());  					} @@ -4177,6 +4185,10 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,  				genBinormals(i);  			} +			LLVector4a starta, dira; + +			LLVector4a* p = (LLVector4a*) face.mPositions; +  			for (U32 tri = 0; tri < face.mNumIndices/3; tri++)   			{  				S32 index1 = face.mIndices[tri*3+0]; @@ -4185,15 +4197,15 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,  				F32 a, b, t; -				if (LLTriangleRayIntersect(face.mVertices[index1].mPosition, -										   face.mVertices[index2].mPosition, -										   face.mVertices[index3].mPosition, -										   start, dir, &a, &b, &t, FALSE)) +				if (LLTriangleRayIntersect(p[index1], +					p[index2], +					p[index3], +					starta, dira, &a, &b, &t, FALSE))  				{  					if ((t >= 0.f) &&      // if hit is after start  						(t <= 1.f) &&      // and before end  						(t < closest_t))   // and this hit is closer -		{ +					{  						closest_t = t;  						hit_face = i; @@ -4201,27 +4213,35 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end,  						{  							*intersection = start + dir * closest_t;  						} -			 + +  						if (tex_coord != NULL) -			{ -							*tex_coord = ((1.f - a - b)  * face.mVertices[index1].mTexCoord + -										  a              * face.mVertices[index2].mTexCoord + -										  b              * face.mVertices[index3].mTexCoord); +						{ +							LLVector2* tc = (LLVector2*) face.mTexCoords; +							*tex_coord = ((1.f - a - b)  * tc[index1] + +								a              * tc[index2] + +								b              * tc[index3]);  						}  						if (normal != NULL) -				{ -							*normal    = ((1.f - a - b)  * face.mVertices[index1].mNormal +  -										  a              * face.mVertices[index2].mNormal + -										  b              * face.mVertices[index3].mNormal); +						{ +							LLVector4* norm = (LLVector4*) face.mNormals; + +							*normal    = ((1.f - a - b)  * LLVector3(norm[index1]) +  +								a              * LLVector3(norm[index2]) + +								b              * LLVector3(norm[index3]));  						}  						if (bi_normal != NULL) -					{ -							*bi_normal = ((1.f - a - b)  * face.mVertices[index1].mBinormal +  -										  a              * face.mVertices[index2].mBinormal + -										  b              * face.mVertices[index3].mBinormal); +						{ +							LLVector4* binormal = (LLVector4*) face.mBinormals; +							if (binormal) +							{ +								*bi_normal = ((1.f - a - b)  * LLVector3(binormal[index1]) +  +									a              * LLVector3(binormal[index2]) + +									b              * LLVector3(binormal[index3])); +							}  						}  					} @@ -4992,6 +5012,14 @@ std::ostream& operator<<(std::ostream &s, const LLVolume *volumep)  	return s;  } +LLVolumeFace::~LLVolumeFace() +{ +	_mm_free(mPositions); +	_mm_free(mNormals); +	_mm_free(mTexCoords); +	_mm_free(mIndices); +	_mm_free(mBinormals); +}  BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)  { @@ -5012,6 +5040,7 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build)  void LLVolumeFace::optimize(F32 angle_cutoff)  { +#if 0 //disabling until a vectorized version is available  	LLVolumeFace new_face;  	VertexMapData::PointMap point_map; @@ -5063,6 +5092,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)  	mVertices = new_face.mVertices;  	mIndices = new_face.mIndices; +#endif   }  void	LerpPlanarVertex(LLVolumeFace::VertexData& v0, @@ -5127,20 +5157,22 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  		corners[1].mTexCoord=corners[2].mTexCoord;  		corners[2].mTexCoord=swap;  	} -	baseVert.mBinormal = calc_binormal_from_triangle(  + +	LLVector4a binormal; +	 +	calc_binormal_from_triangle( binormal,  		corners[0].mPosition, corners[0].mTexCoord,  		corners[1].mPosition, corners[1].mTexCoord,  		corners[2].mPosition, corners[2].mTexCoord); -	for(int t = 0; t < 4; t++){ -		corners[t].mBinormal = baseVert.mBinormal; -		corners[t].mNormal = baseVert.mNormal; -	} -	mHasBinormals = TRUE; -	if (partial_build) -	{ -		mVertices.clear(); -	} +	S32 size = (grid_size+1)*(grid_size+1); +	resizeVertices(size); +	allocateBinormals(size); + +	LLVector4a* pos = (LLVector4a*) mPositions; +	LLVector4a* norm = (LLVector4a*) mNormals; +	LLVector4a* binorm = (LLVector4a*) mBinormals; +	LLVector2* tc = (LLVector2*) mTexCoords;  	S32	vtop = mNumVertices;  	for(int gx = 0;gx<grid_size+1;gx++){ @@ -5153,7 +5185,11 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  				newVert,  				(F32)gx/(F32)grid_size,  				(F32)gy/(F32)grid_size); -			mVertices.push_back(newVert); + +			(*pos++).load3(newVert.mPosition.mV); +			(*norm++).load3(baseVert.mNormal.mV); +			(*tc++) = newVert.mTexCoord; +			(*binorm++).load4a((F32*) &binormal.mQ);  			if (gx == 0 && gy == 0)  			{ @@ -5170,9 +5206,10 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  	if (!partial_build)  	{ -#if GEN_TRI_STRIP -		mTriStrip.clear(); -#endif +		resizeIndices(grid_size*6); + +		U16* out = mIndices; +  		S32 idxs[] = {0,1,(grid_size+1)+1,(grid_size+1)+1,(grid_size+1),0};  		for(S32 gx = 0;gx<grid_size;gx++)  		{ @@ -5183,61 +5220,19 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)  				{  					for(S32 i=5;i>=0;i--)  					{ -						mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); -					} -					 -#if GEN_TRI_STRIP -					if (gy == 0) -					{ -						mTriStrip.push_back((gx+1)*(grid_size+1)); -						mTriStrip.push_back((gx+1)*(grid_size+1)); -						mTriStrip.push_back(gx*(grid_size+1)); -					} - -					mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); -					mTriStrip.push_back(gy+1+gx*(grid_size+1)); -					 -					 -					if (gy == grid_size-1) -					{ -						mTriStrip.push_back(gy+1+gx*(grid_size+1)); -					} -#endif +						*out++ = (vtop+(gy*(grid_size+1))+gx+idxs[i]); +					}		  				}  				else  				{  					for(S32 i=0;i<6;i++)  					{ -						mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); -					} - -#if GEN_TRI_STRIP -					if (gy == 0) -					{ -						mTriStrip.push_back(gx*(grid_size+1)); -						mTriStrip.push_back(gx*(grid_size+1)); -						mTriStrip.push_back((gx+1)*(grid_size+1)); +						*out++ = (vtop+(gy*(grid_size+1))+gx+idxs[i]);  					} - -					mTriStrip.push_back(gy+1+gx*(grid_size+1)); -					mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); -					 -					if (gy == grid_size-1) -					{ -						mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); -					} -#endif  				}  			}  		} - -#if GEN_TRI_STRIP -		if (mTriStrip.size()%2 == 1) -		{ -			mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -		} -#endif  	}  	return TRUE; @@ -5267,11 +5262,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  	num_vertices = profile.size();  	num_indices = (profile.size() - 2)*3; -	mVertices.resize(num_vertices); +	if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) +	{ +		resizeVertices(num_vertices+1); +		allocateBinormals(num_vertices+1);	 -	if (!partial_build) +		if (!partial_build) +		{ +			resizeIndices(num_indices+3); +		} +	} +	else  	{ -		mIndices.resize(num_indices); +		resizeVertices(num_vertices); +		allocateBinormals(num_vertices); + +		if (!partial_build) +		{ +			resizeIndices(num_indices); +		}  	}  	S32 max_s = volume->getProfile().getTotal(); @@ -5298,79 +5307,87 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  	LLVector3& min = mExtents[0];  	LLVector3& max = mExtents[1]; +	LLVector2* tc = (LLVector2*) mTexCoords; +	LLVector4a* pos = (LLVector4a*) mPositions; +	LLVector4a* norm = (LLVector4a*) mNormals; +	LLVector4a* binorm = (LLVector4a*) mBinormals; +  	// Copy the vertices into the array  	for (S32 i = 0; i < num_vertices; i++)  	{  		if (mTypeMask & TOP_MASK)  		{ -			mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; -			mVertices[i].mTexCoord.mV[1] = profile[i].mV[1]+0.5f; +			tc[i].mV[0] = profile[i].mV[0]+0.5f; +			tc[i].mV[1] = profile[i].mV[1]+0.5f;  		}  		else  		{  			// Mirror for underside. -			mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; -			mVertices[i].mTexCoord.mV[1] = 0.5f - profile[i].mV[1]; +			tc[i].mV[0] = profile[i].mV[0]+0.5f; +			tc[i].mV[1] = 0.5f - profile[i].mV[1];  		} -		mVertices[i].mPosition = mesh[i + offset].mPos; +		pos[i].load3(mesh[i + offset].mPos.mV);  		if (i == 0)  		{ -			min = max = mVertices[i].mPosition; -			min_uv = max_uv = mVertices[i].mTexCoord; +			min = max = mesh[i+offset].mPos; +			min_uv = max_uv = tc[i];  		}  		else  		{ -			update_min_max(min,max, mVertices[i].mPosition); -			update_min_max(min_uv, max_uv, mVertices[i].mTexCoord); +			update_min_max(min,max, mesh[i+offset].mPos); +			update_min_max(min_uv, max_uv, tc[i]);  		}  	}  	mCenter = (min+max)*0.5f;  	cuv = (min_uv + max_uv)*0.5f; -	LLVector3 binormal = calc_binormal_from_triangle(  +	LLVector4a binormal; +	calc_binormal_from_triangle(binormal,  		mCenter, cuv, -		mVertices[0].mPosition, mVertices[0].mTexCoord, -		mVertices[1].mPosition, mVertices[1].mTexCoord); -	binormal.normVec(); +		mesh[0+offset].mPos, tc[0], +		mesh[1+offset].mPos, tc[1]); +	binormal.normalize3Fast(); + +	LLVector4a normal; +	LLVector4a d0, d1; +	LLVector4a center; + +	center.load3(mCenter.mV); -	LLVector3 d0; -	LLVector3 d1; -	LLVector3 normal; +	d0.setSub(center, pos[0]); +	d1.setSub(center, pos[1]); -	d0 = mCenter-mVertices[0].mPosition; -	d1 = mCenter-mVertices[1].mPosition; +	if (mTypeMask & TOP_MASK) +	{ +		normal.setCross3(d0, d1); +	} +	else +	{ +		normal.setCross3(d1, d0); +	} -	normal = (mTypeMask & TOP_MASK) ? (d0%d1) : (d1%d0); -	normal.normVec(); +	normal.normalize3Fast();  	VertexData vd;  	vd.mPosition = mCenter; -	vd.mNormal = normal; -	vd.mBinormal = binormal;  	vd.mTexCoord = cuv;  	if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))  	{ -		mVertices.push_back(vd); +		pos[num_vertices].load4a((F32*) ¢er.mQ); +		tc[num_vertices] = cuv;  		num_vertices++; -		if (!partial_build) -		{ -			vector_append(mIndices, 3); -		}  	} -	  	for (S32 i = 0; i < num_vertices; i++)  	{ -		mVertices[i].mBinormal = binormal; -		mVertices[i].mNormal = normal; +		binorm[i].load4a((F32*) &binormal.mQ); +		norm[i].load4a((F32*) &normal.mQ);  	} -	mHasBinormals = TRUE; -  	if (partial_build)  	{  		return TRUE; @@ -5478,8 +5495,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  					pt2--;  				}  			} - -			makeTriStrip();  		}  		else  		{ @@ -5584,8 +5599,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  					pt2--;  				}  			} - -			makeTriStrip();  		}  	}  	else @@ -5607,131 +5620,63 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)  			mIndices[3*i+v2] = i + 1;  		} -#if GEN_TRI_STRIP -		//make tri strip -		if (mTypeMask & OPEN_MASK) -		{ -			makeTriStrip(); -		} -		else -		{ -			S32 j = num_vertices-2; -			if (mTypeMask & TOP_MASK) -			{ -				mTriStrip.push_back(0); -				for (S32 i = 0; i <= j; ++i) -				{ -					mTriStrip.push_back(i); -					if (i != j) -					{ -						mTriStrip.push_back(j); -					} -					--j; -				} -			} -			else -			{ -				mTriStrip.push_back(j); -				for (S32 i = 0; i <= j; ++i) -				{ -					if (i != j) -					{ -						mTriStrip.push_back(j); -					} -					mTriStrip.push_back(i); -					--j; -				} -			} -			 -			mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -			if (mTriStrip.size()%2 == 1) -			{ -				mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -			} -		} -#endif  	}  	return TRUE;  } -void LLVolumeFace::makeTriStrip() -{ -#if GEN_TRI_STRIP -	for (U32 i = 0; i < mNumIndices; i+=3) -	{ -		U16 i0 = mIndices[i]; -		U16 i1 = mIndices[i+1]; -		U16 i2 = mIndices[i+2]; - -		if ((i/3)%2 == 1) -		{ -			mTriStrip.push_back(i0); -			mTriStrip.push_back(i0); -			mTriStrip.push_back(i1); -			mTriStrip.push_back(i2); -			mTriStrip.push_back(i2); -		} -		else -		{ -			mTriStrip.push_back(i2); -			mTriStrip.push_back(i2); -			mTriStrip.push_back(i1); -			mTriStrip.push_back(i0); -			mTriStrip.push_back(i0); -		} -	} - -	if (mTriStrip.size()%2 == 1) -	{ -		mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -	} -#endif -} -  void LLVolumeFace::createBinormals()  {  	LLMemType m1(LLMemType::MTYPE_VOLUME); -	if (!mHasBinormals) +	if (!mBinormals)  	{ +		allocateBinormals(mNumVertices); +  		//generate binormals +		LLStrider<LLVector3> pos; +		pos = (LLVector3*) mPositions; +		pos.setStride(16); + +		LLVector2* tc = (LLVector2*) mTexCoords; +		LLVector4a* binorm = (LLVector4a*) mBinormals; +  		for (U32 i = 0; i < mNumIndices/3; i++)   		{	//for each triangle -			const VertexData& v0 = mVertices[mIndices[i*3+0]]; -			const VertexData& v1 = mVertices[mIndices[i*3+1]]; -			const VertexData& v2 = mVertices[mIndices[i*3+2]]; +			const U16& i0 = mIndices[i*3+0]; +			const U16& i1 = mIndices[i*3+1]; +			const U16& i2 = mIndices[i*3+2];  			//calculate binormal -			LLVector3 binorm = calc_binormal_from_triangle(v0.mPosition, v0.mTexCoord, -															v1.mPosition, v1.mTexCoord, -															v2.mPosition, v2.mTexCoord); +			LLVector4a binormal; +			calc_binormal_from_triangle(binormal, +										pos[i0], tc[i0], +										pos[i1], tc[i1], +										pos[i2], tc[i2]); -			for (U32 j = 0; j < 3; j++)  -			{ //add triangle normal to vertices -				mVertices[mIndices[i*3+j]].mBinormal += binorm; // * (weight_sum - d[j])/weight_sum; -			} + +			//add triangle normal to vertices +			binorm[i0].add(binormal); +			binorm[i1].add(binormal); +			binorm[i2].add(binormal);  			//even out quad contributions  			if (i % 2 == 0)   			{ -				mVertices[mIndices[i*3+2]].mBinormal += binorm; +				binorm[i2].add(binormal);  			}  			else   			{ -				mVertices[mIndices[i*3+1]].mBinormal += binorm; +				binorm[i1].add(binormal);  			}  		}  		//normalize binormals  		for (U32 i = 0; i < mNumVertices; i++)   		{ -			mVertices[i].mBinormal.normVec(); -			mVertices[i].mNormal.normVec(); +			binorm[i].normalize3Fast();  		} - -		mHasBinormals = TRUE;  	}  } @@ -5754,6 +5699,13 @@ void LLVolumeFace::resizeVertices(S32 num_verts)  	mNumVertices = num_verts;  } +void LLVolumeFace::allocateBinormals(S32 num_verts) +{ +	_mm_free(mBinormals); +	mBinormals = (F32*) _mm_malloc(num_verts*16, 16); +} + +  void LLVolumeFace::resizeIndices(S32 num_indices)  {  	_mm_free(mIndices); @@ -5761,44 +5713,107 @@ void LLVolumeFace::resizeIndices(S32 num_indices)  	//pad index block end to allow for QWORD reads  	S32 size = ((num_indices*2) + 0xF) & ~0xF; -	mIndices = (U16*) _mm_malloc(size);	 +	mIndices = (U16*) _mm_malloc(size,16);	  	mNumIndices = num_indices;  } -void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat, LLMatrix4& norm_mat) +void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx) +{ +	resizeVertices(v.size()); +	resizeIndices(idx.size()); + +	for (U32 i = 0; i < v.size(); ++i) +	{ +		for (U32 j = 0; j < 3; ++j) +		{ +			mPositions[i*4+j] = v[i].mPosition[j]; +			mNormals[i*4+j] = v[i].mNormal[j]; +		} + +		mTexCoords[i*2+0] = v[i].mTexCoord.mV[0]; +		mTexCoords[i*2+1] = v[i].mTexCoord.mV[1]; +	} + +	for (U32 i = 0; i < idx.size(); ++i) +	{ +		mIndices[i] = idx[i]; +	} +} + +void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMatrix4& norm_mat_in)  {  	U16 offset = mNumVertices; -	if (face.mNumVertices + mNumVertices > 65536) +	S32 new_count = face.mNumVertices + mNumVertices; + +	if (new_count > 65536)  	{  		llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl;  	} +	 +	F32* new_pos = (F32*) _mm_malloc(new_count*16, 16); +	F32* new_norm = (F32*) _mm_malloc(new_count*16, 16); +	F32* new_tc = (F32*) _mm_malloc((new_count*8+0xF) & ~0xF, 16); + +	LLVector4a::memcpyNonAliased16(new_pos, mPositions, new_count*4); +	LLVector4a::memcpyNonAliased16(new_norm, mNormals, new_count*4); +	LLVector4a::memcpyNonAliased16(new_tc, mTexCoords, new_count*2); + +	_mm_free(mPositions); +	_mm_free(mNormals); +	_mm_free(mTexCoords); + +	mPositions = new_pos; +	mNormals = new_norm; +	mTexCoords = new_tc; + +	mNumVertices = new_count; + +	LLVector4a* dst_pos = (LLVector4a*) mPositions+offset; +	LLVector2* dst_tc = (LLVector2*) mTexCoords+offset; +	LLVector4a* dst_norm = (LLVector4a*) mNormals+offset; + +	LLVector4a* src_pos = (LLVector4a*) face.mPositions; +	LLVector2* src_tc = (LLVector2*) face.mTexCoords; +	LLVector4a* src_norm = (LLVector4a*) face.mNormals; + +	LLMatrix4a mat, norm_mat; +	mat.loadu(mat_in); +	norm_mat.loadu(norm_mat_in); +  	for (U32 i = 0; i < face.mNumVertices; ++i)  	{ -		VertexData v = face.mVertices[i]; -		v.mPosition = v.mPosition*mat; -		v.mNormal = v.mNormal * norm_mat; +		mat.affineTransform(src_pos[i], dst_pos[i]); +		norm_mat.rotate(src_norm[i], dst_norm[i]); +		dst_norm[i].normalize3Fast(); -		v.mNormal.normalize(); - -		mVertices.push_back(v); +		dst_tc[i] = src_tc[i];  		if (offset == 0 && i == 0)  		{ -			mExtents[0] = mExtents[1] = v.mPosition; +			mExtents[0] = mExtents[1] = LLVector3((F32*) &(dst_pos[i].mQ));  		}  		else  		{ -			update_min_max(mExtents[0], mExtents[1], v.mPosition); +			update_min_max(mExtents[0], mExtents[1], (F32*) &(dst_pos[i].mQ));  		}  	} -	 + +	new_count = mNumIndices + face.mNumIndices; +	U16* new_indices = (U16*) _mm_malloc((new_count*2+0xF) & ~0xF, 16); +	LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, new_count/2); +	_mm_free(mIndices); +	mIndices = new_indices; +	mNumIndices = new_count; + +	U16* dst_idx = mIndices+offset; +  	for (U32 i = 0; i < face.mNumIndices; ++i)  	{ -		mIndices.push_back(face.mIndices[i]+offset); +		dst_idx[i] = face.mIndices[i]+offset;  	}  } @@ -5828,21 +5843,20 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  	num_vertices = mNumS*mNumT;  	num_indices = (mNumS-1)*(mNumT-1)*6; -	mVertices.resize(num_vertices); -  	if (!partial_build)  	{ -		mIndices.resize(num_indices); +		resizeVertices(num_vertices); +		resizeIndices(num_indices);  		if ((volume->getParams().getSculptType() & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_MESH)  		{  			mEdge.resize(num_indices);  		}  	} -	else -	{ -		mHasBinormals = FALSE; -	} + +	LLVector4a* pos = (LLVector4a*) mPositions; +	LLVector4a* norm = (LLVector4a*) mNormals; +	LLVector2* tc = (LLVector2*) mTexCoords;  	S32 begin_stex = llfloor( profile[mBeginS].mV[2] );  	S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; @@ -5894,21 +5908,21 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  				i = mBeginS + s + max_s*t;  			} -			mVertices[cur_vertex].mPosition = mesh[i].mPos; -			mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); +			pos[cur_vertex].load3(mesh[i].mPos.mV); +			tc[cur_vertex] = LLVector2(ss,tt); -			mVertices[cur_vertex].mNormal = LLVector3(0,0,0); -			mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); +			norm[cur_vertex].clear();  			cur_vertex++;  			if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0)  			{ -				mVertices[cur_vertex].mPosition = mesh[i].mPos; -				mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); + +				pos[cur_vertex].load3(mesh[i].mPos.mV); +				tc[cur_vertex] = LLVector2(ss,tt); -				mVertices[cur_vertex].mNormal = LLVector3(0,0,0); -				mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); +				norm[cur_vertex].clear(); +				  				cur_vertex++;  			}  		} @@ -5926,12 +5940,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  			i = mBeginS + s + max_s*t;  			ss = profile[mBeginS + s].mV[2] - begin_stex; -			mVertices[cur_vertex].mPosition = mesh[i].mPos; -			mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); -		 -			mVertices[cur_vertex].mNormal = LLVector3(0,0,0); -			mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); - +			pos[cur_vertex].load3(mesh[i].mPos.mV); +			tc[cur_vertex] = LLVector2(ss,tt); +			norm[cur_vertex].clear();  +			  			cur_vertex++;  		}  	} @@ -5942,10 +5954,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  	LLVector3& face_max = mExtents[1];  	mCenter.clearVec(); -	face_min = face_max = mVertices[0].mPosition; +	face_min = face_max = LLVector3((F32*) &(pos[i].mQ)); +  	for (U32 i = 1; i < mNumVertices; ++i)  	{ -		update_min_max(face_min, face_max, mVertices[i].mPosition); +		update_min_max(face_min, face_max, (F32*) &(pos[i].mQ));  	}  	mCenter = (face_min + face_max) * 0.5f; @@ -5956,18 +5969,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  	if (!partial_build)  	{ -#if GEN_TRI_STRIP -		mTriStrip.clear(); -#endif -  		// Now we generate the indices.  		for (t = 0; t < (mNumT-1); t++)  		{ -#if GEN_TRI_STRIP -			//prepend terminating index to strip -			mTriStrip.push_back(mNumS*t); -#endif -  			for (s = 0; s < (mNumS-1); s++)  			{	  				mIndices[cur_index++] = s   + mNumS*t;			//bottom left @@ -5977,16 +5981,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  				mIndices[cur_index++] = s+1 + mNumS*t;			//bottom right  				mIndices[cur_index++] = s+1 + mNumS*(t+1);		//top right -#if GEN_TRI_STRIP -				if (s == 0) -				{ -					mTriStrip.push_back(s+mNumS*t); -					mTriStrip.push_back(s+mNumS*(t+1)); -				} -				mTriStrip.push_back(s+1+mNumS*t); -				mTriStrip.push_back(s+1+mNumS*(t+1)); -#endif -				  				mEdge[cur_edge++] = (mNumS-1)*2*t+s*2+1;						//bottom left/top right neighbor face   				if (t < mNumT-2) {												//top right/top left neighbor face   					mEdge[cur_edge++] = (mNumS-1)*2*(t+1)+s*2+1; @@ -6027,52 +6021,55 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  				}  				mEdge[cur_edge++] = (mNumS-1)*2*t+s*2;							//top right/bottom left neighbor face	  			} -#if GEN_TRI_STRIP -			//append terminating vertex to strip -			mTriStrip.push_back(mNumS-1+mNumS*(t+1)); -#endif  		} - -#if GEN_TRI_STRIP -		if (mTriStrip.size()%2 == 1) -		{ -			mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); -		} -#endif  	}  	//generate normals   	for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle  	{  		const U16* idx = &(mIndices[i*3]); -			 -		VertexData* v[] =  -		{	&mVertices[idx[0]], &mVertices[idx[1]], &mVertices[idx[2]] }; -					 -		//calculate triangle normal -		LLVector3 norm = (v[0]->mPosition-v[1]->mPosition) % (v[0]->mPosition-v[2]->mPosition); +		 -		v[0]->mNormal += norm; -		v[1]->mNormal += norm; -		v[2]->mNormal += norm; +		LLVector4a* v[] =  +		{	pos+idx[0], pos+idx[1], pos+idx[2] }; +		 +		LLVector4a* n[] =  +		{	norm+idx[0], norm+idx[1], norm+idx[2] }; +		 +		//calculate triangle normal +		LLVector4a a, b, c; +		 +		a.setSub(*v[0], *v[1]); +		b.setSub(*v[0], *v[2]); +		c.setCross3(a,b); +		n[0]->add(c); +		n[1]->add(c); +		n[2]->add(c); +		  		//even out quad contributions -		v[i%2+1]->mNormal += norm; +		n[i%2+1]->add(c);  	}  	// adjust normals based on wrapping and stitching -	BOOL s_bottom_converges = ((mVertices[0].mPosition - mVertices[mNumS*(mNumT-2)].mPosition).magVecSquared() < 0.000001f); -	BOOL s_top_converges = ((mVertices[mNumS-1].mPosition - mVertices[mNumS*(mNumT-2)+mNumS-1].mPosition).magVecSquared() < 0.000001f); +	LLVector4a top; +	top.setSub(pos[0], pos[mNumS*(mNumT-2)]); +	BOOL s_bottom_converges = (top.dot3(top) < 0.000001f); + +	top.setSub(pos[mNumS-1], pos[mNumS*(mNumT-2)+mNumS-1]); +	BOOL s_top_converges = (top.dot3(top) < 0.000001f); +  	if (sculpt_stitching == LL_SCULPT_TYPE_NONE)  // logic for non-sculpt volumes  	{  		if (volume->getPath().isOpen() == FALSE)  		{ //wrap normals on T  			for (S32 i = 0; i < mNumS; i++)  			{ -				LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal; -				mVertices[i].mNormal = norm; -				mVertices[mNumS*(mNumT-1)+i].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]); +				norm[i] = n; +				norm[mNumS*(mNumT-1)+i] = n;  			}  		} @@ -6080,9 +6077,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  		{ //wrap normals on S  			for (S32 i = 0; i < mNumT; i++)  			{ -				LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal; -				mVertices[mNumS * i].mNormal = norm; -				mVertices[mNumS * i+mNumS-1].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]); +				norm[mNumS * i] = n; +				norm[mNumS * i+mNumS-1] = n;  			}  		} @@ -6093,7 +6091,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  			{ //all lower S have same normal  				for (S32 i = 0; i < mNumT; i++)  				{ -					mVertices[mNumS*i].mNormal = LLVector3(1,0,0); +					norm[mNumS*i].set(1,0,0);  				}  			} @@ -6101,7 +6099,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  			{ //all upper S have same normal  				for (S32 i = 0; i < mNumT; i++)  				{ -					mVertices[mNumS*i+mNumS-1].mNormal = LLVector3(-1,0,0); +					norm[mNumS*i+mNumS-1].set(-1,0,0);  				}  			}  		} @@ -6129,30 +6127,33 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  		{  			// average normals for north pole -			LLVector3 average(0.0, 0.0, 0.0); +			LLVector4a average; +			average.clear(); +  			for (S32 i = 0; i < mNumS; i++)  			{ -				average += mVertices[i].mNormal; +				average.add(norm[i]);  			}  			// set average  			for (S32 i = 0; i < mNumS; i++)  			{ -				mVertices[i].mNormal = average; +				norm[i] = average;  			}  			// average normals for south pole -			average = LLVector3(0.0, 0.0, 0.0); +			average.clear(); +  			for (S32 i = 0; i < mNumS; i++)  			{ -				average += mVertices[i + mNumS * (mNumT - 1)].mNormal; +				average.add(norm[i + mNumS * (mNumT - 1)]);  			}  			// set average  			for (S32 i = 0; i < mNumS; i++)  			{ -				mVertices[i + mNumS * (mNumT - 1)].mNormal = average; +				norm[i + mNumS * (mNumT - 1)] = average;  			}  		} @@ -6162,23 +6163,22 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  		{  			for (S32 i = 0; i < mNumT; i++)  			{ -				LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal; -				mVertices[mNumS * i].mNormal = norm; -				mVertices[mNumS * i+mNumS-1].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]); +				norm[mNumS * i] = n; +				norm[mNumS * i+mNumS-1] = n;  			}  		} - -		  		if (wrap_t)  		{  			for (S32 i = 0; i < mNumS; i++)  			{ -				LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal; -				mVertices[i].mNormal = norm; -				mVertices[mNumS*(mNumT-1)+i].mNormal = norm; +				LLVector4a n; +				n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]); +				norm[i] = n; +				norm[mNumS*(mNumT-1)+i] = n;  			} -			  		}  	} @@ -6188,7 +6188,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)  // Finds binormal based on three vertices with texture coordinates.  // Fills in dummy values if the triangle has degenerate texture coordinates. -LLVector3 calc_binormal_from_triangle(  +void calc_binormal_from_triangle(LLVector4a& binormal, +  	const LLVector3& pos0,  	const LLVector2& tex0,  	const LLVector3& pos1, @@ -6196,33 +6197,42 @@ LLVector3 calc_binormal_from_triangle(  	const LLVector3& pos2,  	const LLVector2& tex2)  { -	LLVector3 rx0( pos0.mV[VX], tex0.mV[VX], tex0.mV[VY] ); -	LLVector3 rx1( pos1.mV[VX], tex1.mV[VX], tex1.mV[VY] ); -	LLVector3 rx2( pos2.mV[VX], tex2.mV[VX], tex2.mV[VY] ); +	LLVector4a rx0; rx0.set( pos0.mV[VX], tex0.mV[VX], tex0.mV[VY] ); +	LLVector4a rx1; rx1.set( pos1.mV[VX], tex1.mV[VX], tex1.mV[VY] ); +	LLVector4a rx2; rx2.set( pos2.mV[VX], tex2.mV[VX], tex2.mV[VY] ); -	LLVector3 ry0( pos0.mV[VY], tex0.mV[VX], tex0.mV[VY] ); -	LLVector3 ry1( pos1.mV[VY], tex1.mV[VX], tex1.mV[VY] ); -	LLVector3 ry2( pos2.mV[VY], tex2.mV[VX], tex2.mV[VY] ); +	LLVector4a ry0; ry0.set( pos0.mV[VY], tex0.mV[VX], tex0.mV[VY] ); +	LLVector4a ry1; ry1.set( pos1.mV[VY], tex1.mV[VX], tex1.mV[VY] ); +	LLVector4a ry2; ry2.set( pos2.mV[VY], tex2.mV[VX], tex2.mV[VY] ); -	LLVector3 rz0( pos0.mV[VZ], tex0.mV[VX], tex0.mV[VY] ); -	LLVector3 rz1( pos1.mV[VZ], tex1.mV[VX], tex1.mV[VY] ); -	LLVector3 rz2( pos2.mV[VZ], tex2.mV[VX], tex2.mV[VY] ); +	LLVector4a rz0; rz0.set( pos0.mV[VZ], tex0.mV[VX], tex0.mV[VY] ); +	LLVector4a rz1; rz1.set( pos1.mV[VZ], tex1.mV[VX], tex1.mV[VY] ); +	LLVector4a rz2; rz2.set( pos2.mV[VZ], tex2.mV[VX], tex2.mV[VY] ); -	LLVector3 r0 = (rx0 - rx1) % (rx0 - rx2); -	LLVector3 r1 = (ry0 - ry1) % (ry0 - ry2); -	LLVector3 r2 = (rz0 - rz1) % (rz0 - rz2); +	LLVector4a lhs, rhs; + +	LLVector4a r0;  +	lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); +	r0.setCross3(lhs, rhs); +		 +	LLVector4a r1; +	lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); +	r1.setCross3(lhs, rhs); + +	LLVector4a r2; +	lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); +	r2.setCross3(lhs, rhs); -	if( r0.mV[VX] && r1.mV[VX] && r2.mV[VX] ) +	if( r0[VX] && r1[VX] && r2[VX] )  	{ -		LLVector3 binormal( -				-r0.mV[VZ] / r0.mV[VX], -				-r1.mV[VZ] / r1.mV[VX], -				-r2.mV[VZ] / r2.mV[VX]); +		binormal.set( +				-r0[VZ] / r0[VX], +				-r1[VZ] / r1[VX], +				-r2[VZ] / r2[VX]);  		// binormal.normVec(); -		return binormal;  	}  	else  	{ -		return LLVector3( 0, 1 , 0 ); +		binormal.set( 0, 1 , 0 );  	}  } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 17be642d4a..911db6f94b 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -40,6 +40,7 @@ class LLPathParams;  class LLVolumeParams;  class LLProfile;  class LLPath; +class LLVector4a;  class LLVolumeFace;  class LLVolume; @@ -791,6 +792,19 @@ public:  class LLVolumeFace  {  public: +	class VertexData +	{ +	public: +		LLVector3 mPosition; +		LLVector3 mNormal; +		LLVector3 mBinormal; +		LLVector2 mTexCoord; + +		bool operator<(const VertexData& rhs) const; +		bool operator==(const VertexData& rhs) const; +		bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const; +	}; +  	LLVolumeFace() :   		mID(0),  		mTypeMask(0), @@ -808,26 +822,18 @@ public:  	{  	} +	~LLVolumeFace(); +  	BOOL create(LLVolume* volume, BOOL partial_build = FALSE);  	void createBinormals();  	void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform);  	void resizeVertices(S32 num_verts); +	void allocateBinormals(S32 num_verts);  	void resizeIndices(S32 num_indices); +	void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx); -	class VertexData -	{ -	public: -		LLVector3 mPosition; -		LLVector3 mNormal; -		LLVector3 mBinormal; -		LLVector2 mTexCoord; - -		bool operator<(const VertexData& rhs) const; -		bool operator==(const VertexData& rhs) const; -		bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const; -	};  	class VertexMapData : public LLVolumeFace::VertexData  	{ @@ -1051,7 +1057,8 @@ public:  std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params); -LLVector3 calc_binormal_from_triangle( +void calc_binormal_from_triangle( +		LLVector4a& binormal,  		const LLVector3& pos0,  		const LLVector2& tex0,  		const LLVector3& pos1, @@ -1060,7 +1067,7 @@ LLVector3 calc_binormal_from_triangle(  		const LLVector2& tex2);  BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,  							F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided); diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index 4fb8f5266e..0fa0e80cb7 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -1201,14 +1201,14 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLFace* face, const LLMeshSk  	if (!buff ||   		buff->getTypeMask() != data_mask || -		buff->getRequestedVerts() != vol_face.mVertices.size()) +		buff->getRequestedVerts() != vol_face.mNumVertices)  	{  		face->setGeomIndex(0);  		face->setIndicesIndex(0); -		face->setSize(vol_face.mVertices.size(), vol_face.mIndices.size()); +		face->setSize(vol_face.mNumVertices, vol_face.mNumIndices);  		face->mVertexBuffer = new LLVertexBuffer(data_mask, 0); -		face->mVertexBuffer->allocateBuffer(vol_face.mVertices.size(), vol_face.mIndices.size(), true); +		face->mVertexBuffer->allocateBuffer(vol_face.mNumVertices, vol_face.mNumIndices, true);  		U16 offset = 0; diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 53dc335c16..77e8a6fdf9 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -928,8 +928,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  {  	LLFastTimer t(FTM_FACE_GET_GEOM);  	const LLVolumeFace &vf = volume.getVolumeFace(f); -	S32 num_vertices = (S32)vf.mVertices.size(); -	S32 num_indices = LLPipeline::sUseTriStrips ? (S32)vf.mTriStrip.size() : (S32) vf.mIndices.size(); +	S32 num_vertices = (S32)vf.mNumVertices; +	S32 num_indices = (S32) vf.mNumIndices;  	if (mVertexBuffer.notNull())  	{ @@ -1128,19 +1128,9 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  	if (full_rebuild)  	{  		mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex); -		if (LLPipeline::sUseTriStrips) +		for (U32 i = 0; i < (U32) num_indices; i++)  		{ -			for (U32 i = 0; i < (U32) num_indices; i++) -			{ -				*indicesp++ = vf.mTriStrip[i] + index_offset; -			} -		} -		else -		{ -			for (U32 i = 0; i < (U32) num_indices; i++) -			{ -				*indicesp++ = vf.mIndices[i] + index_offset; -			} +			*indicesp++ = vf.mIndices[i] + index_offset;  		}  	} @@ -1214,28 +1204,41 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  	bool bake_sunlight = !getTextureEntry()->getFullbright() &&  		!mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL); +	//VECTORIZE THIS  	for (S32 i = 0; i < num_vertices; i++)  	{ +		LLVector3 vf_binormal; +		if (vf.mBinormals) +		{ +			vf_binormal.set(vf.mBinormals+i*4); +		} + +		LLVector3 vf_normal; +		vf_normal.set(vf.mNormals+i*4); + +		LLVector3 vf_position; +		vf_position.set(vf.mPositions+i*4); +  		if (rebuild_tcoord)  		{ -			LLVector2 tc = vf.mVertices[i].mTexCoord; +			LLVector2 tc(vf.mTexCoords+i*2);  			if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)  			{ -				LLVector3 vec = vf.mVertices[i].mPosition;  +				LLVector3 vec = vf_position;  				vec.scaleVec(scale);  				switch (texgen)  				{  					case LLTextureEntry::TEX_GEN_PLANAR: -						planarProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec); +						planarProjection(tc, vf_normal, vf.mCenter, vec);  						break;  					case LLTextureEntry::TEX_GEN_SPHERICAL: -						sphericalProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec); +						sphericalProjection(tc, vf_normal, vf.mCenter, vec);  						break;  					case LLTextureEntry::TEX_GEN_CYLINDRICAL: -						cylindricalProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec); +						cylindricalProjection(tc, vf_normal, vf.mCenter, vec);  						break;  					default:  						break; @@ -1345,10 +1348,10 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  			if (bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1))  			{ -				LLVector3 tangent = vf.mVertices[i].mBinormal % vf.mVertices[i].mNormal; +				LLVector3 tangent = vf_binormal % vf_normal;  				LLMatrix3 tangent_to_object; -				tangent_to_object.setRows(tangent, vf.mVertices[i].mBinormal, vf.mVertices[i].mNormal); +				tangent_to_object.setRows(tangent, vf_binormal, vf_normal);  				LLVector3 binormal = binormal_dir * tangent_to_object;  				binormal = binormal * mat_normal; @@ -1366,12 +1369,12 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		if (rebuild_pos)  		{ -			*vertices++ = vf.mVertices[i].mPosition * mat_vert; +			*vertices++ = vf_position * mat_vert;  		}  		if (rebuild_normal)  		{ -			LLVector3 normal = vf.mVertices[i].mNormal * mat_normal; +			LLVector3 normal = vf_normal * mat_normal;  			normal.normVec();  			*normals++ = normal; @@ -1379,21 +1382,21 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		if (rebuild_binormal)  		{ -			LLVector3 binormal = vf.mVertices[i].mBinormal * mat_normal; +			LLVector3 binormal = vf_binormal * mat_normal;  			binormal.normVec();  			*binormals++ = binormal;  		}  		if (rebuild_weights && vf.mWeights.size() > i)  		{ -			*weights++ = vf.mWeights[i]; +			(*weights++) = vf.mWeights[i];  		}  		if (rebuild_color)  		{  			if (bake_sunlight)  			{ -				LLVector3 normal = vf.mVertices[i].mNormal * mat_normal; +				LLVector3 normal = vf_normal * mat_normal;  				normal.normVec();  				F32 da = normal * gPipeline.mSunDir; diff --git a/indra/newview/llfloaterimagepreview.cpp b/indra/newview/llfloaterimagepreview.cpp index 159e4b41ca..dae301ae29 100644 --- a/indra/newview/llfloaterimagepreview.cpp +++ b/indra/newview/llfloaterimagepreview.cpp @@ -852,6 +852,7 @@ S8 LLImagePreviewSculpted::getType() const  void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)  {  +#if 0 //VECTORIZE THIS  	mCameraDistance = distance;  	mCameraZoom = 1.f;  	mCameraPitch = 0.f; @@ -892,6 +893,7 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)  	{  		*(index_strider++) = vf.mIndices[i];  	} +#endif  } @@ -901,7 +903,7 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance)  BOOL LLImagePreviewSculpted::render()  {  	mNeedsUpdate = FALSE; - +#if 0 //VECTORIZE THIS  	LLGLSUIDefault def;  	LLGLDisable no_blend(GL_BLEND);  	LLGLEnable cull(GL_CULL_FACE); @@ -959,7 +961,7 @@ BOOL LLImagePreviewSculpted::render()  	mVertexBuffer->draw(LLRender::TRIANGLES, num_indices, 0);  	gGL.popMatrix(); -		 +#endif  	return TRUE;  } diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp index 28b0e7356a..c7ad0bde7e 100644 --- a/indra/newview/llhudicon.cpp +++ b/indra/newview/llhudicon.cpp @@ -271,6 +271,7 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en  	LLVector3 upper_left = icon_position - (x_scale * 0.5f) + y_scale;  	LLVector3 upper_right = icon_position + (x_scale * 0.5f) + y_scale; +#if 0 //VECTORIZE THIS  	F32 t = 0.f;  	LLVector3 dir = end-start; @@ -284,6 +285,7 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en  		}  		return TRUE;  	} +#endif  	return FALSE;  } diff --git a/indra/newview/llhudtext.cpp b/indra/newview/llhudtext.cpp index 9ed5d13831..64c01d0937 100644 --- a/indra/newview/llhudtext.cpp +++ b/indra/newview/llhudtext.cpp @@ -206,6 +206,8 @@ BOOL LLHUDText::lineSegmentIntersect(const LLVector3& start, const LLVector3& en  			gGL.end();  		} +#if 0 //VECTORIZE THIS +  		LLVector3 dir = end-start;  		F32 t = 0.f; @@ -218,6 +220,9 @@ BOOL LLHUDText::lineSegmentIntersect(const LLVector3& start, const LLVector3& en  				return TRUE;  			}  		} + +#endif +  	}  	return FALSE; diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp index aa82c216d9..7eca276358 100644 --- a/indra/newview/llviewercamera.cpp +++ b/indra/newview/llviewercamera.cpp @@ -788,13 +788,14 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts)  	LLMatrix4 render_mat(vo_volume->getRenderRotation(), LLVector4(vo_volume->getRenderPosition()));  	num_faces = volume->getNumVolumeFaces(); +	//VECTORIZE THIS  	for (i = 0; i < num_faces; i++)  	{  		const LLVolumeFace& face = volume->getVolumeFace(i); -		for (U32 v = 0; v < face.mVertices.size(); v++) +		for (U32 v = 0; v < face.mNumVertices; v++)  		{ -			LLVector4 vec = LLVector4(face.mVertices[v].mPosition) * mat; +			LLVector4 vec = LLVector4(face.mPositions+v*4) * mat;  			if (drawablep->isActive())  			{ diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp index a82afbeb76..edb97eef24 100644 --- a/indra/newview/llvograss.cpp +++ b/indra/newview/llvograss.cpp @@ -640,6 +640,7 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en  		position.mV[2] += blade_height;  		v[3]    = v1 = position + mRegionp->getOriginAgent(); +#if 0 //VECTORIZE THIS  		F32 a,b,t; @@ -703,6 +704,7 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en  				}  			}  		} +#endif  	}  	return ret; diff --git a/indra/newview/llvotextbubble.cpp b/indra/newview/llvotextbubble.cpp index 428ef20006..339da3c0bf 100644 --- a/indra/newview/llvotextbubble.cpp +++ b/indra/newview/llvotextbubble.cpp @@ -45,6 +45,7 @@  #include "llviewertexturelist.h"  #include "llvolume.h"  #include "pipeline.h" +#include "llvector4a.h"  #include "llviewerregion.h"  LLVOTextBubble::LLVOTextBubble(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp) @@ -217,7 +218,7 @@ void LLVOTextBubble::updateFaceSize(S32 idx)  	else  	{  		const LLVolumeFace& vol_face = getVolume()->getVolumeFace(idx); -		face->setSize(vol_face.mVertices.size(), vol_face.mIndices.size()); +		face->setSize(vol_face.mNumVertices, vol_face.mNumIndices);  	}  } @@ -235,19 +236,37 @@ void LLVOTextBubble::getGeometry(S32 idx,  	const LLVolumeFace& face = getVolume()->getVolumeFace(idx); -	LLVector3 pos = getPositionAgent(); +	LLVector4a pos; +	pos.load3(getPositionAgent().mV); + +	LLVector4a scale; +	scale.load3(getScale().mV); +  	LLColor4U color = LLColor4U(getTE(idx)->getColor());  	U32 offset = mDrawable->getFace(idx)->getGeomIndex(); -	for (U32 i = 0; i < face.mVertices.size(); i++) +	LLVector4a* dst_pos = (LLVector4a*) verticesp.get(); +	LLVector4a* src_pos = (LLVector4a*) face.mPositions; +	 +	LLVector4a* dst_norm = (LLVector4a*) normalsp.get(); +	LLVector4a* src_norm  = (LLVector4a*) face.mNormals; +	 +	LLVector2* dst_tc = (LLVector2*) texcoordsp.get(); +	LLVector2* src_tc = (LLVector2*) face.mTexCoords; + +	LLVector4a::memcpyNonAliased16((F32*) dst_norm, (F32*) src_norm, face.mNumVertices*4); +	LLVector4a::memcpyNonAliased16((F32*) dst_tc, (F32*) src_tc, face.mNumVertices*2); +	 +	 +	for (U32 i = 0; i < face.mNumVertices; i++)  	{ -		*verticesp++ = face.mVertices[i].mPosition.scaledVec(getScale()) + pos; -		*normalsp++ = face.mVertices[i].mNormal; -		*texcoordsp++ = face.mVertices[i].mTexCoord; +		LLVector4a t; +		t.setMul(src_pos[i], scale); +		dst_pos[i].setAdd(t, pos);  		*colorsp++ = color;  	} -	for (U32 i = 0; i < face.mIndices.size(); i++) +	for (U32 i = 0; i < face.mNumIndices; i++)  	{  		*indicesp++ = face.mIndices[i] + offset;  	} diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index cea964a100..7d80a66041 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -60,6 +60,7 @@  #include "llflexibleobject.h"  #include "llsky.h"  #include "lltexturefetch.h" +#include "llvector4a.h"  #include "llviewercamera.h"  #include "llviewertexturelist.h"  #include "llviewerobjectlist.h" @@ -1601,14 +1602,8 @@ void LLVOVolume::updateFaceSize(S32 idx)  	else  	{  		const LLVolumeFace& vol_face = getVolume()->getVolumeFace(idx); -		if (LLPipeline::sUseTriStrips) -		{ -			facep->setSize(vol_face.mVertices.size(), vol_face.mTriStrip.size()); -		} -		else -		{ -			facep->setSize(vol_face.mVertices.size(), vol_face.mIndices.size()); -		} +		facep->setSize(vol_face.mNumVertices, vol_face.mNumIndices); +		  	}  } @@ -1863,21 +1858,25 @@ bool LLVOVolume::hasMedia() const  LLVector3 LLVOVolume::getApproximateFaceNormal(U8 face_id)  {  	LLVolume* volume = getVolume(); -	LLVector3 result; +	LLVector4a result; +	result.clear(); + +	LLVector3 ret;  	if (volume && face_id < volume->getNumVolumeFaces())  	{  		const LLVolumeFace& face = volume->getVolumeFace(face_id); -		for (S32 i = 0; i < (S32)face.mVertices.size(); ++i) +		for (S32 i = 0; i < (S32)face.mNumVertices; ++i)  		{ -			result += face.mVertices[i].mNormal; +			result.add(*((LLVector4a*) face.mNormals+i*4));  		} -		result = volumeDirectionToAgent(result); -		result.normVec(); +		LLVector3 ret((F32*) &result.mQ); +		ret = volumeDirectionToAgent(ret); +		ret.normVec();  	} -	return result; +	return ret;  }  void LLVOVolume::requestMediaDataUpdate(bool isNew) @@ -3032,7 +3031,7 @@ F32 LLVOVolume::getBinRadius()  			for (S32 i = 0; i < volume->getNumVolumeFaces(); ++i)  			{  				const LLVolumeFace& face = volume->getVolumeFace(i); -				vert_count += face.mVertices.size(); +				vert_count += face.mNumVertices;  			}  			scale = 1.f/llmax(vert_count/1024.f, 1.f); | 
