summaryrefslogtreecommitdiff
path: root/indra/newview/llface.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/newview/llface.cpp')
-rwxr-xr-xindra/newview/llface.cpp99
1 files changed, 69 insertions, 30 deletions
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 6b3127decf..ef91a459e7 100755
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -1407,6 +1407,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
{ //use transform feedback to pack vertex buffer
//gGLDebugLoggingEnabled = TRUE;
LLFastTimer t(FTM_FACE_GEOM_FEEDBACK);
+ LLGLEnable discard(GL_RASTERIZER_DISCARD);
LLVertexBuffer* buff = (LLVertexBuffer*) vf.mVertexBuffer.get();
if (vf.mVertexBuffer.isNull() || buff->getNumVerts() != vf.mNumVertices)
@@ -1955,21 +1956,31 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (rebuild_pos)
{
- LLFastTimer t(FTM_FACE_GEOM_POSITION);
+ LLVector4a* src = vf.mPositions;
+
+ //_mm_prefetch((char*)src, _MM_HINT_T0);
+
+ LLVector4a* end = src+num_vertices;
+ //LLVector4a* end_64 = end-4;
+
+ //LLFastTimer t(FTM_FACE_GEOM_POSITION);
llassert(num_vertices > 0);
mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range);
-
LLMatrix4a mat_vert;
mat_vert.loadu(mat_vert_in);
+
+ F32* dst = (F32*) vert.get();
+ F32* end_f32 = dst+mGeomCount*4;
- LLVector4a* src = vf.mPositions;
- volatile F32* dst = (volatile F32*) vert.get();
-
- volatile F32* end = dst+num_vertices*4;
- LLVector4a res;
+ //_mm_prefetch((char*)dst, _MM_HINT_NTA);
+ //_mm_prefetch((char*)src, _MM_HINT_NTA);
+
+ //_mm_prefetch((char*)dst, _MM_HINT_NTA);
+ LLVector4a res0; //,res1,res2,res3;
+
LLVector4a texIdx;
S32 index = mTextureIndex < 255 ? mTextureIndex : 0;
@@ -1986,29 +1997,53 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
texIdx.set(0,0,0,val);
+ LLVector4a tmp;
+
{
- LLFastTimer t(FTM_FACE_POSITION_STORE);
- LLVector4a tmp;
+ //LLFastTimer t2(FTM_FACE_POSITION_STORE);
- do
- {
- mat_vert.affineTransform(*src++, res);
- tmp.setSelectWithMask(mask, texIdx, res);
+ /*if (num_vertices > 4)
+ { //more than 64 bytes
+ while (src < end_64)
+ {
+ _mm_prefetch((char*)src + 64, _MM_HINT_T0);
+ _mm_prefetch((char*)dst + 64, _MM_HINT_T0);
+
+ mat_vert.affineTransform(*src, res0);
+ tmp.setSelectWithMask(mask, texIdx, res0);
+ tmp.store4a((F32*) dst);
+
+ mat_vert.affineTransform(*(src+1), res1);
+ tmp.setSelectWithMask(mask, texIdx, res1);
+ tmp.store4a((F32*) dst+4);
+
+ mat_vert.affineTransform(*(src+2), res2);
+ tmp.setSelectWithMask(mask, texIdx, res2);
+ tmp.store4a((F32*) dst+8);
+
+ mat_vert.affineTransform(*(src+3), res3);
+ tmp.setSelectWithMask(mask, texIdx, res3);
+ tmp.store4a((F32*) dst+12);
+
+ dst += 16;
+ src += 4;
+ }
+ }*/
+
+ while (src < end)
+ {
+ mat_vert.affineTransform(*src++, res0);
+ tmp.setSelectWithMask(mask, texIdx, res0);
tmp.store4a((F32*) dst);
dst += 4;
}
- while(dst < end);
}
-
+
{
- LLFastTimer t(FTM_FACE_POSITION_PAD);
- S32 aligned_pad_vertices = mGeomCount - num_vertices;
- res.set(res[0], res[1], res[2], 0.f);
-
- while (aligned_pad_vertices > 0)
+ //LLFastTimer t(FTM_FACE_POSITION_PAD);
+ while (dst < end_f32)
{
- --aligned_pad_vertices;
- res.store4a((F32*) dst);
+ res0.store4a((F32*) dst);
dst += 4;
}
}
@@ -2022,15 +2057,17 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
if (rebuild_normal)
{
- LLFastTimer t(FTM_FACE_GEOM_NORMAL);
+ //LLFastTimer t(FTM_FACE_GEOM_NORMAL);
mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
F32* normals = (F32*) norm.get();
- for (S32 i = 0; i < num_vertices; i++)
- {
+ LLVector4a* src = vf.mNormals;
+ LLVector4a* end = src+num_vertices;
+
+ while (src < end)
+ {
LLVector4a normal;
- mat_normal.rotate(vf.mNormals[i], normal);
- normal.normalize3fast();
+ mat_normal.rotate(*src++, normal);
normal.store4a(normals);
normals += 4;
}
@@ -2047,11 +2084,13 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range);
F32* binormals = (F32*) binorm.get();
- for (S32 i = 0; i < num_vertices; i++)
+ LLVector4a* src = vf.mBinormals;
+ LLVector4a* end = vf.mBinormals+num_vertices;
+
+ while (src < end)
{
LLVector4a binormal;
- mat_normal.rotate(vf.mBinormals[i], binormal);
- binormal.normalize3fast();
+ mat_normal.rotate(*src++, binormal);
binormal.store4a(binormals);
binormals += 4;
}