diff options
| author | Dave Parks <davep@lindenlab.com> | 2011-12-21 15:55:14 -0600 | 
|---|---|---|
| committer | Dave Parks <davep@lindenlab.com> | 2011-12-21 15:55:14 -0600 | 
| commit | 5546041301a67da6e471a433b29374bc75a4f0d0 (patch) | |
| tree | 346e708da08c1d469a89adef6764851e99ea861c | |
| parent | 5affaf264da937cdf825bf174c3905a6063b5336 (diff) | |
SH-2794 WIP -- work on getting glMapBuffer and friends to behave
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 52 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.h | 22 | ||||
| -rw-r--r-- | indra/newview/llface.cpp | 194 | ||||
| -rwxr-xr-x | indra/newview/llfloatermodelpreview.cpp | 12 | ||||
| -rw-r--r-- | indra/newview/llvosurfacepatch.cpp | 2 | 
5 files changed, 199 insertions, 83 deletions
| diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 20a450fbfb..f8c2a55820 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -148,7 +148,7 @@ U32 wpo2(U32 i)  	return r;  } -U8* LLVBOPool::allocate(U32& name, U32 size) +volatile U8* LLVBOPool::allocate(U32& name, U32 size)  {  	llassert(nhpo2(size) == size); @@ -159,7 +159,7 @@ U8* LLVBOPool::allocate(U32& name, U32 size)  		mFreeList.resize(i+1);  	} -	U8* ret = NULL; +	volatile U8* ret = NULL;  	if (mFreeList[i].empty())  	{ @@ -188,7 +188,7 @@ U8* LLVBOPool::allocate(U32& name, U32 size)  	return ret;  } -void LLVBOPool::release(U32 name, U8* buffer, U32 size) +void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size)  {  	llassert(nhpo2(size) == size); @@ -221,7 +221,7 @@ void LLVBOPool::cleanup()  			if (r.mClientData)  			{ -				ll_aligned_free_16(r.mClientData); +				ll_aligned_free_16((void*) r.mClientData);  			}  			l.pop_front(); @@ -1042,7 +1042,7 @@ void LLVertexBuffer::destroyGLBuffer()  		}  		else  		{ -			FREE_MEM(sPrivatePoolp, mMappedData) ; +			FREE_MEM(sPrivatePoolp, (void*) mMappedData) ;  			mMappedData = NULL;  			mEmpty = TRUE;  		} @@ -1063,7 +1063,7 @@ void LLVertexBuffer::destroyGLIndices()  		}  		else  		{ -			FREE_MEM(sPrivatePoolp, mMappedIndexData) ; +			FREE_MEM(sPrivatePoolp, (void*) mMappedIndexData) ;  			mMappedIndexData = NULL;  			mEmpty = TRUE;  		} @@ -1282,8 +1282,11 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count)  	return true;  } +static LLFastTimer::DeclareTimer FTM_VBO_MAP_BUFFER_RANGE("VBO Map Range"); +static LLFastTimer::DeclareTimer FTM_VBO_MAP_BUFFER("VBO Map"); +  // Map for data access -U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range) +volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range)  {  	bindGLBuffer(true);  	LLMemType mt2(LLMemType::MTYPE_VERTEX_MAP_BUFFER); @@ -1346,13 +1349,14 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran  			}  			else  			{ -				U8* src = NULL; +				volatile U8* src = NULL;  				waitFence();  				if (gGLManager.mHasMapBufferRange)  				{  					if (map_range)  					{  #ifdef GL_ARB_map_buffer_range +						LLFastTimer t(FTM_VBO_MAP_BUFFER_RANGE);  						S32 offset = mOffsets[type] + sTypeSize[type]*index;  						S32 length = (sTypeSize[type]*count+0xF) & ~0xF;  						src = (U8*) glMapBufferRange(GL_ARRAY_BUFFER_ARB, offset, length,  @@ -1376,6 +1380,7 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran  							}  						} +						LLFastTimer t(FTM_VBO_MAP_BUFFER);  						src = (U8*) glMapBufferRange(GL_ARRAY_BUFFER_ARB, 0, mSize,   							GL_MAP_WRITE_BIT |   							GL_MAP_FLUSH_EXPLICIT_BIT); @@ -1403,7 +1408,7 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran  				llassert(src != NULL); -				mMappedData = LL_NEXT_ALIGNED_ADDRESS<U8>(src); +				mMappedData = LL_NEXT_ALIGNED_ADDRESS<volatile U8>(src);  				mAlignedOffset = mMappedData - src;  				stop_glerror(); @@ -1458,7 +1463,11 @@ U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_ran  	}  } -U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range) + +static LLFastTimer::DeclareTimer FTM_VBO_MAP_INDEX_RANGE("IBO Map Range"); +static LLFastTimer::DeclareTimer FTM_VBO_MAP_INDEX("IBO Map"); + +volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)  {  	LLMemType mt2(LLMemType::MTYPE_VERTEX_MAP_BUFFER);  	bindGLIndices(true); @@ -1530,13 +1539,14 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)  			}  			else  			{ -				U8* src = NULL; +				volatile U8* src = NULL;  				waitFence();  				if (gGLManager.mHasMapBufferRange)  				{  					if (map_range)  					{  #ifdef GL_ARB_map_buffer_range +						LLFastTimer t(FTM_VBO_MAP_INDEX_RANGE);  						S32 offset = sizeof(U16)*index;  						S32 length = sizeof(U16)*count;  						src = (U8*) glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length,  @@ -1548,6 +1558,7 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)  					else  					{  #ifdef GL_ARB_map_buffer_range +						LLFastTimer t(FTM_VBO_MAP_INDEX);  						src = (U8*) glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, sizeof(U16)*mNumIndices,   							GL_MAP_WRITE_BIT |   							GL_MAP_FLUSH_EXPLICIT_BIT); @@ -1569,6 +1580,7 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)  				}  				else  				{ +					LLFastTimer t(FTM_VBO_MAP_INDEX);  					map_range = false;  					src = (U8*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB);  				} @@ -1619,6 +1631,8 @@ U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range)  	}  } +static LLFastTimer::DeclareTimer FTM_VBO_UNMAP("VBO Unmap"); +  void LLVertexBuffer::unmapBuffer()  {  	LLMemType mt2(LLMemType::MTYPE_VERTEX_UNMAP_BUFFER); @@ -1627,6 +1641,8 @@ void LLVertexBuffer::unmapBuffer()  		return ; //nothing to unmap  	} +	LLFastTimer t(FTM_VBO_UNMAP); +  	bool updated_all = false ;  	if (mMappedData && mVertexLocked) @@ -1644,7 +1660,7 @@ void LLVertexBuffer::unmapBuffer()  					const MappedRegion& region = mMappedVertexRegions[i];  					S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0;  					S32 length = sTypeSize[region.mType]*region.mCount; -					glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, offset, length, mMappedData+offset); +					glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, offset, length, (U8*) mMappedData+offset);  					stop_glerror();  				} @@ -1653,7 +1669,7 @@ void LLVertexBuffer::unmapBuffer()  			else  			{  				stop_glerror(); -				glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, getSize(), mMappedData); +				glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, getSize(), (U8*) mMappedData);  				stop_glerror();  			}  		} @@ -1708,7 +1724,7 @@ void LLVertexBuffer::unmapBuffer()  					const MappedRegion& region = mMappedIndexRegions[i];  					S32 offset = region.mIndex >= 0 ? sizeof(U16)*region.mIndex : 0;  					S32 length = sizeof(U16)*region.mCount; -					glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length, mMappedIndexData+offset); +					glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, offset, length, (U8*) mMappedIndexData+offset);  					stop_glerror();  				} @@ -1717,7 +1733,7 @@ void LLVertexBuffer::unmapBuffer()  			else  			{  				stop_glerror(); -				glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, getIndicesSize(), mMappedIndexData); +				glBufferSubDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0, getIndicesSize(), (U8*) mMappedIndexData);  				stop_glerror();  			}  		} @@ -1778,7 +1794,7 @@ template <class T,S32 type> struct VertexBufferStrider  	{  		if (type == LLVertexBuffer::TYPE_INDEX)  		{ -			U8* ptr = vbo.mapIndexBuffer(index, count, map_range); +			volatile U8* ptr = vbo.mapIndexBuffer(index, count, map_range);  			if (ptr == NULL)  			{ @@ -1794,7 +1810,7 @@ template <class T,S32 type> struct VertexBufferStrider  		{  			S32 stride = LLVertexBuffer::sTypeSize[type]; -			U8* ptr = vbo.mapVertexBuffer(type, index, count, map_range); +			volatile U8* ptr = vbo.mapVertexBuffer(type, index, count, map_range);  			if (ptr == NULL)  			{ @@ -2109,7 +2125,7 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask)  {  	LLMemType mt2(LLMemType::MTYPE_VERTEX_SETUP_VERTEX_BUFFER);  	stop_glerror(); -	U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData; +	volatile U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;  	/*if ((data_mask & mTypeMask) != data_mask)  	{ diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index 3e6f6a959a..5b93a0389f 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -60,10 +60,10 @@ public:  	U32 mType;  	//size MUST be a power of 2 -	U8* allocate(U32& name, U32 size); +	volatile U8* allocate(U32& name, U32 size);  	//size MUST be the size provided to allocate that returned the given name -	void release(U32 name, U8* buffer, U32 size); +	void release(U32 name, volatile U8* buffer, U32 size);  	//destroy all records in mFreeList  	void cleanup(); @@ -72,7 +72,7 @@ public:  	{  	public:  		U32 mGLName; -		U8* mClientData; +		volatile U8* mClientData;  	};  	typedef std::list<Record> record_list_t; @@ -208,8 +208,8 @@ public:  	LLVertexBuffer(U32 typemask, S32 usage);  	// map for data access -	U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range); -	U8*		mapIndexBuffer(S32 index, S32 count, bool map_range); +	volatile U8*		mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range); +	volatile U8*		mapIndexBuffer(S32 index, S32 count, bool map_range);  	// set for rendering  	virtual void	setBuffer(U32 data_mask); 	// calls  setupVertexBuffer() if data_mask is not 0 @@ -244,14 +244,14 @@ public:  	S32 getNumVerts() const					{ return mNumVerts; }  	S32 getNumIndices() const				{ return mNumIndices; } -	U8* getIndicesPointer() const			{ return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; } -	U8* getVerticesPointer() const			{ return useVBOs() ? (U8*) mAlignedOffset : mMappedData; } +	volatile U8* getIndicesPointer() const			{ return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; } +	volatile U8* getVerticesPointer() const			{ return useVBOs() ? (U8*) mAlignedOffset : mMappedData; }  	U32 getTypeMask() const					{ return mTypeMask; }  	bool hasDataType(S32 type) const		{ return ((1 << type) & getTypeMask()); }  	S32 getSize() const;  	S32 getIndicesSize() const				{ return mIndicesSize; } -	U8* getMappedData() const				{ return mMappedData; } -	U8* getMappedIndices() const			{ return mMappedIndexData; } +	volatile U8* getMappedData() const				{ return mMappedData; } +	volatile U8* getMappedIndices() const			{ return mMappedIndexData; }  	S32 getOffset(S32 type) const			{ return mOffsets[type]; }  	S32 getUsage() const					{ return mUsage; } @@ -278,8 +278,8 @@ protected:  	U32		mGLIndices;		// GL IBO handle  	U32		mGLArray;		// GL VAO handle -	U8*		mMappedData;	// pointer to currently mapped data (NULL if unmapped) -	U8*		mMappedIndexData;	// pointer to currently mapped indices (NULL if unmapped) +	volatile U8* mMappedData;	// pointer to currently mapped data (NULL if unmapped) +	volatile U8* mMappedIndexData;	// pointer to currently mapped indices (NULL if unmapped)  	BOOL	mVertexLocked;			// if TRUE, vertex buffer is being or has been written to in client memory  	BOOL	mIndexLocked;			// if TRUE, index buffer is being or has been written to in client memory  	BOOL	mFinal;			// if TRUE, buffer can not be mapped again diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 6dbeae6677..941b8db2cf 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -693,6 +693,49 @@ static void xform(LLVector2 &tex_coord, F32 cosAng, F32 sinAng, F32 offS, F32 of  	tex_coord.mV[1] = t;  } +// Transform the texture coordinates for this face. +static void xform4a(LLVector4a &tex_coord, const LLVector4a& trans, const LLVector4Logical& mask, const LLVector4a& rot0, const LLVector4a& rot1, const LLVector4a& offset, const LLVector4a& scale)  +{ +	//tex coord is two coords, <s0, t0, s1, t1> +	LLVector4a st; + +	// Texture transforms are done about the center of the face. +	st.setAdd(tex_coord, trans); +	 +	// Handle rotation +	LLVector4a rot_st; +		 +	// <s0 * cosAng, s0*-sinAng, s1*cosAng, s1*-sinAng> +	LLVector4a s0; +	s0.splat(st, 0); +	LLVector4a s1; +	s1.splat(st, 2); +	LLVector4a ss; +	ss.setSelectWithMask(mask, s1, s0); + +	LLVector4a a;  +	a.setMul(rot0, ss); +	 +	// <t0*sinAng, t0*cosAng, t1*sinAng, t1*cosAng> +	LLVector4a t0; +	t0.splat(st, 1); +	LLVector4a t1; +	t1.splat(st, 3); +	LLVector4a tt; +	tt.setSelectWithMask(mask, t1, t0); + +	LLVector4a b; +	b.setMul(rot1, tt); +		 +	st.setAdd(a,b); + +	// Then scale +	st.mul(scale); + +	// Then offset +	tex_coord.setAdd(st, offset); +} +  bool less_than_max_mag(const LLVector4a& vec)  { @@ -1060,6 +1103,16 @@ static LLFastTimer::DeclareTimer FTM_FACE_GEOM_EMISSIVE("Emissive");  static LLFastTimer::DeclareTimer FTM_FACE_GEOM_WEIGHTS("Weights");  static LLFastTimer::DeclareTimer FTM_FACE_GEOM_BINORMAL("Binormal");  static LLFastTimer::DeclareTimer FTM_FACE_GEOM_INDEX("Index"); +static LLFastTimer::DeclareTimer FTM_FACE_GEOM_INDEX_TAIL("Tail"); +static LLFastTimer::DeclareTimer FTM_FACE_POSITION_STORE("Pos"); +static LLFastTimer::DeclareTimer FTM_FACE_TEXTURE_INDEX_STORE("TexIdx"); +static LLFastTimer::DeclareTimer FTM_FACE_POSITION_PAD("Pad"); +static LLFastTimer::DeclareTimer FTM_FACE_TEX_DEFAULT("Default"); +static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK("Quick"); +static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_NO_XFORM("No Xform"); +static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_XFORM("Xform"); + +static LLFastTimer::DeclareTimer FTM_FACE_TEX_QUICK_PLANAR("Quick Planar");  BOOL LLFace::getGeometryVolume(const LLVolume& volume,  							   const S32 &f, @@ -1104,16 +1157,12 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  	}  	LLStrider<LLVector3> vert; -	LLVector4a* vertices = NULL;  	LLStrider<LLVector2> tex_coords;  	LLStrider<LLVector2> tex_coords2; -	LLVector4a* normals = NULL;  	LLStrider<LLVector3> norm;  	LLStrider<LLColor4U> colors; -	LLVector4a* binormals = NULL;  	LLStrider<LLVector3> binorm;  	LLStrider<U16> indicesp; -	LLVector4a* weights = NULL;  	LLStrider<LLVector4> wght;  	BOOL full_rebuild = force_rebuild || mDrawablep->isState(LLDrawable::REBUILD_VOLUME); @@ -1202,7 +1251,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		LLFastTimer t(FTM_FACE_GEOM_INDEX);  		mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex, mIndicesCount, map_range); -		__m128i* dst = (__m128i*) indicesp.get(); +		volatile __m128i* dst = (__m128i*) indicesp.get();  		__m128i* src = (__m128i*) vf.mIndices;  		__m128i offset = _mm_set1_epi16(index_offset); @@ -1211,12 +1260,17 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		for (S32 i = 0; i < end; i++)  		{  			__m128i res = _mm_add_epi16(src[i], offset); -			_mm_storeu_si128(dst+i, res); +			_mm_storeu_si128((__m128i*) dst++, res);  		} -		for (S32 i = end*8; i < num_indices; ++i)  		{ -			indicesp[i] = vf.mIndices[i]+index_offset; +			LLFastTimer t(FTM_FACE_GEOM_INDEX_TAIL); +			U16* idx = (U16*) dst; + +			for (S32 i = end*8; i < num_indices; ++i) +			{ +				*idx++ = vf.mIndices[i]+index_offset; +			}  		}  		if (map_range) @@ -1373,19 +1427,48 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  			if (texgen != LLTextureEntry::TEX_GEN_PLANAR)  			{ +				LLFastTimer t(FTM_FACE_TEX_QUICK);  				if (!do_tex_mat)  				{  					if (!do_xform)  					{ +						LLFastTimer t(FTM_FACE_TEX_QUICK_NO_XFORM);  						LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));  					}  					else  					{ -						for (S32 i = 0; i < num_vertices; i++) +						LLFastTimer t(FTM_FACE_TEX_QUICK_XFORM); +						F32* dst = (F32*) tex_coords.get(); +						LLVector4a* src = (LLVector4a*) vf.mTexCoords; + +						LLVector4a trans; +						trans.splat(-0.5f); + +						LLVector4a rot0; +						rot0.set(cos_ang, -sin_ang, cos_ang, -sin_ang); + +						LLVector4a rot1; +						rot1.set(sin_ang, cos_ang, sin_ang, cos_ang); + +						LLVector4a scale; +						scale.set(ms, mt, ms, mt); + +						LLVector4a offset; +						offset.set(os+0.5f, ot+0.5f, os+0.5f, ot+0.5f); + +						LLVector4Logical mask; +						mask.clear(); +						mask.setElement<2>(); +						mask.setElement<3>(); + +						U32 count = num_vertices/2 + num_vertices%2; + +						for (S32 i = 0; i < count; i++)  						{	 -							LLVector2 tc(vf.mTexCoords[i]); -							xform(tc, cos_ang, sin_ang, os, ot, ms, mt); -							*tex_coords++ = tc;	 +							LLVector4a res = *src++; +							xform4a(res, trans, mask, rot0, rot1, offset, scale); +							res.store4a(dst); +							dst += 4;  						}  					}  				} @@ -1407,6 +1490,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  			}  			else  			{ //no bump, no atlas, tex gen planar +				LLFastTimer t(FTM_FACE_TEX_QUICK_PLANAR);  				if (do_tex_mat)  				{  					for (S32 i = 0; i < num_vertices; i++) @@ -1451,6 +1535,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		}  		else  		{ //either bump mapped or in atlas, just do the whole expensive loop +			LLFastTimer t(FTM_FACE_TEX_DEFAULT);  			mVertexBuffer->getTexCoord0Strider(tex_coords, mGeomIndex, mGeomCount, map_range);  			std::vector<LLVector2> bump_tc; @@ -1642,44 +1727,55 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		llassert(num_vertices > 0);  		mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range); -		vertices = (LLVector4a*) vert.get(); -	 +			 +  		LLMatrix4a mat_vert;  		mat_vert.loadu(mat_vert_in);  		LLVector4a* src = vf.mPositions; -		LLVector4a* dst = vertices; +		volatile F32* dst = (volatile F32*) vert.get(); -		LLVector4a* end = dst+num_vertices; -		do -		{	 -			mat_vert.affineTransform(*src++, *dst++); -		} -		while(dst < end); +		volatile F32* end = dst+num_vertices*4; +		LLVector4a res; -		F32 index = (F32) (mTextureIndex < 255 ? mTextureIndex : 0); +		LLVector4a texIdx; +		F32 index = (F32) (mTextureIndex < 255 ? mTextureIndex : 0);  		llassert(index <= LLGLSLShader::sIndexedTextureChannels-1); -		F32 *index_dst = (F32*) vertices; -		F32 *index_end = (F32*) end; -		index_dst += 3; -		index_end += 3; -		do +		LLVector4Logical mask; +		mask.clear(); +		mask.setElement<3>(); +		 +		texIdx.set(0,0,0,index); +  		{ -			*index_dst = index; -			index_dst += 4; +			LLFastTimer t(FTM_FACE_POSITION_STORE); +			LLVector4a tmp; + +			do +			{	 +				mat_vert.affineTransform(*src++, res); +				tmp.setSelectWithMask(mask, texIdx, res); +				tmp.store4a((F32*) dst); +				dst += 4; +			} +			while(dst < end);  		} -		while (index_dst < index_end); -		 -		S32 aligned_pad_vertices = mGeomCount - num_vertices; -		LLVector4a* last_vec = end - 1; -		while (aligned_pad_vertices > 0) +  		{ -			--aligned_pad_vertices; -			*dst++ = *last_vec; +			LLFastTimer t(FTM_FACE_POSITION_PAD); +			S32 aligned_pad_vertices = mGeomCount - num_vertices; +			res.set(res[0], res[1], res[2], 0.f); + +			while (aligned_pad_vertices > 0) +			{ +				--aligned_pad_vertices; +				res.store4a((F32*) dst); +				dst += 4; +			}  		} -		 +  		if (map_range)  		{  			mVertexBuffer->flush(); @@ -1690,14 +1786,15 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  	{  		LLFastTimer t(FTM_FACE_GEOM_NORMAL);  		mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range); -		normals = (LLVector4a*) norm.get(); +		F32* normals = (F32*) norm.get();  		for (S32 i = 0; i < num_vertices; i++)  		{	  			LLVector4a normal;  			mat_normal.rotate(vf.mNormals[i], normal);  			normal.normalize3fast(); -			normals[i] = normal; +			normal.store4a(normals); +			normals += 4;  		}  		if (map_range) @@ -1710,14 +1807,15 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  	{  		LLFastTimer t(FTM_FACE_GEOM_BINORMAL);  		mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range); -		binormals = (LLVector4a*) binorm.get(); +		F32* binormals = (F32*) binorm.get();  		for (S32 i = 0; i < num_vertices; i++)  		{	  			LLVector4a binormal;  			mat_normal.rotate(vf.mBinormals[i], binormal);  			binormal.normalize3fast(); -			binormals[i] = binormal; +			binormal.store4a(binormals); +			binormals += 4;  		}  		if (map_range) @@ -1730,8 +1828,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  	{  		LLFastTimer t(FTM_FACE_GEOM_WEIGHTS);  		mVertexBuffer->getWeight4Strider(wght, mGeomIndex, mGeomCount, map_range); -		weights = (LLVector4a*) wght.get(); -		LLVector4a::memcpyNonAliased16((F32*) weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32)); +		F32* weights = (F32*) wght.get(); +		LLVector4a::memcpyNonAliased16(weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));  		if (map_range)  		{  			mVertexBuffer->flush(); @@ -1750,7 +1848,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		src.loadua((F32*) vec); -		LLVector4a* dst = (LLVector4a*) colors.get(); +		F32* dst = (F32*) colors.get();  		S32 num_vecs = num_vertices/4;  		if (num_vertices%4 > 0)  		{ @@ -1759,7 +1857,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		for (S32 i = 0; i < num_vecs; i++)  		{	 -			dst[i] = src; +			src.store4a(dst); +			dst += 4;  		}  		if (map_range) @@ -1789,7 +1888,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		src.loadua((F32*) vec); -		LLVector4a* dst = (LLVector4a*) emissive.get(); +		F32* dst = (F32*) emissive.get();  		S32 num_vecs = num_vertices/4;  		if (num_vertices%4 > 0)  		{ @@ -1798,7 +1897,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,  		for (S32 i = 0; i < num_vecs; i++)  		{	 -			dst[i] = src; +			src.store4a(dst); +			dst += 4;  		}  		if (map_range) diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp index 64bdcccd9f..9122e5a8f5 100755 --- a/indra/newview/llfloatermodelpreview.cpp +++ b/indra/newview/llfloatermodelpreview.cpp @@ -3969,7 +3969,7 @@ void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_lim  				U32 num_indices = mVertexBuffer[5][mdl][i]->getNumIndices();  				if (num_indices > 2)  				{ -					glodInsertElements(mObject[mdl], i, GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, mVertexBuffer[5][mdl][i]->getIndicesPointer(), 0, 0.f); +					glodInsertElements(mObject[mdl], i, GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, (U8*) mVertexBuffer[5][mdl][i]->getIndicesPointer(), 0, 0.f);  				}  				tri_count += num_indices/3;  				stop_gloderror(); @@ -4083,14 +4083,14 @@ void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_lim  				{  					buff->allocateBuffer(sizes[i*2+1], sizes[i*2], true);  					buff->setBuffer(type_mask); -					glodFillElements(mObject[base], names[i], GL_UNSIGNED_SHORT, buff->getIndicesPointer()); +					glodFillElements(mObject[base], names[i], GL_UNSIGNED_SHORT, (U8*) buff->getIndicesPointer());  					stop_gloderror();  				}  				else  				{ //this face was eliminated, create a dummy triangle (one vertex, 3 indices, all 0)  					buff->allocateBuffer(1, 3, true); -					memset(buff->getMappedData(), 0, buff->getSize()); -					memset(buff->getIndicesPointer(), 0, buff->getIndicesSize()); +					memset((U8*) buff->getMappedData(), 0, buff->getSize()); +					memset((U8*) buff->getIndicesPointer(), 0, buff->getIndicesSize());  				}  				buff->validateRange(0, buff->getNumVerts()-1, buff->getNumIndices(), 0); @@ -4880,8 +4880,8 @@ void LLModelPreview::addEmptyFace( LLModel* pTarget )  	LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(type_mask, 0);  	buff->allocateBuffer(1, 3, true); -	memset( buff->getMappedData(), 0, buff->getSize() ); -	memset( buff->getIndicesPointer(), 0, buff->getIndicesSize() ); +	memset( (U8*) buff->getMappedData(), 0, buff->getSize() ); +	memset( (U8*) buff->getIndicesPointer(), 0, buff->getIndicesSize() );  	buff->validateRange( 0, buff->getNumVerts()-1, buff->getNumIndices(), 0 ); diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp index c3a2e6a712..bf6158eeaf 100644 --- a/indra/newview/llvosurfacepatch.cpp +++ b/indra/newview/llvosurfacepatch.cpp @@ -65,7 +65,7 @@ public:  			return;  		} -		U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData; +		volatile U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData;  		//assume tex coords 2 and 3 are present  		U32 type_mask = mTypeMask | MAP_TEXCOORD2 | MAP_TEXCOORD3; | 
