From 1655256c0c1f536593292d81b89f3580ad1dee73 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Sat, 29 May 2010 04:18:44 -0500 Subject: UI rendering optimization. --- indra/llrender/llrender.cpp | 97 ++++++++++++++++++++++++++------------------- indra/llrender/llrender.h | 13 +++--- indra/llui/llview.cpp | 22 ++++++---- 3 files changed, 80 insertions(+), 52 deletions(-) diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index 2472339ec4..3f70ccacb1 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -39,6 +39,7 @@ #include "llimagegl.h" #include "llrendertarget.h" #include "lltexture.h" +#include "llvector4a.h" LLRender gGL; @@ -53,6 +54,7 @@ U32 LLRender::sUICalls = 0; U32 LLRender::sUIVerts = 0; static const U32 LL_NUM_TEXTURE_LAYERS = 16; +static const U32 LL_MAX_UI_STACK_DEPTH = 32; static GLenum sGLTextureType[] = { @@ -757,14 +759,27 @@ LLRender::LLRender() mCount(0), mMode(LLRender::TRIANGLES), mCurrTextureUnitIndex(0), - mMaxAnisotropy(0.f) + mMaxAnisotropy(0.f), + mUIStackDepth(0) { mBuffer = new LLVertexBuffer(immediate_mask, 0); mBuffer->allocateBuffer(4096, 0, TRUE); - mBuffer->getVertexStrider(mVerticesp); - mBuffer->getTexCoord0Strider(mTexcoordsp); - mBuffer->getColorStrider(mColorsp); - + + LLStrider vert; + LLStrider tc; + LLStrider color; + + mBuffer->getVertexStrider(vert); + mBuffer->getTexCoord0Strider(tc); + mBuffer->getColorStrider(color); + + mVerticesp = (LLVector4a*) vert.get(); + mTexcoordsp = tc.get(); + mColorsp = color.get(); + + mUIOffset = (LLVector4a*) _mm_malloc(LL_MAX_UI_STACK_DEPTH*sizeof(LLVector4a), 16); + mUIScale = (LLVector4a*) _mm_malloc(LL_MAX_UI_STACK_DEPTH*sizeof(LLVector4a), 16); + mTexUnits.reserve(LL_NUM_TEXTURE_LAYERS); for (U32 i = 0; i < LL_NUM_TEXTURE_LAYERS; i++) { @@ -800,6 +815,11 @@ void LLRender::shutdown() mTexUnits.clear(); delete mDummyTexUnit; mDummyTexUnit = NULL; + + _mm_free(mUIOffset); + mUIOffset = NULL; + _mm_free(mUIScale); + mUIScale = NULL; } void LLRender::refreshState(void) @@ -848,84 +868,83 @@ void LLRender::popMatrix() void LLRender::translateUI(F32 x, F32 y, F32 z) { - if (mUIOffset.empty()) + if (mUIStackDepth == 0) { llerrs << "Need to push a UI translation frame before offsetting" << llendl; } - mUIOffset.front().mV[0] += x; - mUIOffset.front().mV[1] += y; - mUIOffset.front().mV[2] += z; + LLVector4a trans(x,y,z); + mUIOffset[mUIStackDepth-1].add(trans); } void LLRender::scaleUI(F32 x, F32 y, F32 z) { - if (mUIScale.empty()) + if (mUIStackDepth == 0) { llerrs << "Need to push a UI transformation frame before scaling." << llendl; } - mUIScale.front().scaleVec(LLVector3(x,y,z)); + LLVector4a scale(x,y,z); + mUIScale[mUIStackDepth-1].mul(scale); } void LLRender::pushUIMatrix() { - if (mUIOffset.empty()) + if (mUIStackDepth == 0) { - mUIOffset.push_front(LLVector3(0,0,0)); + mUIOffset[0].clear(); + mUIScale[0].splat(1.f); } - else + else if (mUIStackDepth < LL_MAX_UI_STACK_DEPTH) { - mUIOffset.push_front(mUIOffset.front()); - } - - if (mUIScale.empty()) - { - mUIScale.push_front(LLVector3(1,1,1)); + mUIOffset[mUIStackDepth] = mUIOffset[mUIStackDepth-1]; + mUIScale[mUIStackDepth] = mUIScale[mUIStackDepth-1]; } else { - mUIScale.push_front(mUIScale.front()); + llerrs << "Blown UI matrix stack." << llendl; } + + ++mUIStackDepth; + } void LLRender::popUIMatrix() { - if (mUIOffset.empty()) + if (mUIStackDepth == 0) { llerrs << "UI offset stack blown." << llendl; } - mUIOffset.pop_front(); - mUIScale.pop_front(); + --mUIStackDepth; } LLVector3 LLRender::getUITranslation() { - if (mUIOffset.empty()) + if (mUIStackDepth == 0) { llerrs << "UI offset stack empty." << llendl; } - return mUIOffset.front(); + return LLVector3(mUIOffset[mUIStackDepth-1].getF32()); } LLVector3 LLRender::getUIScale() { - if (mUIScale.empty()) + if (mUIStackDepth == 0) { llerrs << "UI scale stack empty." << llendl; } - return mUIScale.front(); + return LLVector3(mUIScale[mUIStackDepth-1].getF32()); } void LLRender::loadUIIdentity() { - if (mUIOffset.empty()) + if (mUIStackDepth == 0) { llerrs << "Need to push UI translation frame before clearing offset." << llendl; } - mUIOffset.front().setVec(0,0,0); - mUIScale.front().setVec(1,1,1); + mUIOffset[mUIStackDepth-1].clear(); + mUIScale[mUIStackDepth-1].splat(1.f); } void LLRender::setColorMask(bool writeColor, bool writeAlpha) @@ -1154,7 +1173,7 @@ void LLRender::flush() } #endif - if (!mUIOffset.empty()) + if (mUIStackDepth > 0) { sUICalls++; sUIVerts += mCount; @@ -1206,24 +1225,22 @@ void LLRender::vertex3f(const GLfloat& x, const GLfloat& y, const GLfloat& z) return; } - if (mUIOffset.empty()) + LLVector3& v = reinterpret_cast(mVerticesp[mCount]); + v.set(x,y,z); + if (mUIStackDepth != 0) { - mVerticesp[mCount] = LLVector3(x,y,z); - } - else - { - LLVector3 vert = (LLVector3(x,y,z)+mUIOffset.front()).scaledVec(mUIScale.front()); - mVerticesp[mCount] = vert; + v += reinterpret_cast(mUIOffset[mUIStackDepth-1]); + v.scaleVec(reinterpret_cast(mUIScale[mUIStackDepth-1])); } mCount++; if (mCount < 4096) { - mVerticesp[mCount] = mVerticesp[mCount-1]; mColorsp[mCount] = mColorsp[mCount-1]; mTexcoordsp[mCount] = mTexcoordsp[mCount-1]; } } + void LLRender::vertex2i(const GLint& x, const GLint& y) { vertex3f((GLfloat) x, (GLfloat) y, 0); diff --git a/indra/llrender/llrender.h b/indra/llrender/llrender.h index 3cda4b5770..2bacf16dc6 100644 --- a/indra/llrender/llrender.h +++ b/indra/llrender/llrender.h @@ -52,6 +52,7 @@ class LLCubeMap; class LLImageGL; class LLRenderTarget; class LLTexture ; +class LLVector4a; class LLTexUnit { @@ -360,9 +361,10 @@ private: F32 mCurrAlphaFuncVal; LLPointer mBuffer; - LLStrider mVerticesp; - LLStrider mTexcoordsp; - LLStrider mColorsp; + LLVector4a* mVerticesp; + LLVector2* mTexcoordsp; + LLColor4U* mColorsp; + std::vector mTexUnits; LLTexUnit* mDummyTexUnit; @@ -372,9 +374,10 @@ private: eBlendFactor mCurrBlendAlphaDFactor; F32 mMaxAnisotropy; - std::list mUIOffset; - std::list mUIScale; + LLVector4a* mUIOffset; + LLVector4a* mUIScale; + U32 mUIStackDepth; }; extern F64 gGLModelView[16]; diff --git a/indra/llui/llview.cpp b/indra/llui/llview.cpp index e67f0ec3fc..459041f67d 100644 --- a/indra/llui/llview.cpp +++ b/indra/llui/llview.cpp @@ -123,7 +123,8 @@ LLView::Params::Params() } LLView::LLView(const LLView::Params& p) -: mName(p.name), +: mVisible(p.visible), + mName(p.name), mParentView(NULL), mReshapeFlags(FOLLOWS_NONE), mFromXUI(p.from_xui), @@ -132,7 +133,6 @@ LLView::LLView(const LLView::Params& p) mNextInsertionOrdinal(0), mHoverCursor(getCursorFromString(p.hover_cursor)), mEnabled(p.enabled), - mVisible(p.visible), mMouseOpaque(p.mouse_opaque), mSoundFlags(p.sound_flags), mUseBoundingRect(p.use_bounding_rect), @@ -1309,7 +1309,13 @@ void LLView::drawChildren() { if (!mChildList.empty()) { - LLRect rootRect = getRootView()->getRect(); + static const LLRect* rootRect = NULL; + + if (!mParentView) + { + rootRect = &mRect; + } + LLRect screenRect; ++sDepth; @@ -1319,13 +1325,15 @@ void LLView::drawChildren() child_list_reverse_iter_t child = child_iter++; LLView *viewp = *child; - if (viewp->getVisible() && viewp->getRect().isValid()) + if (!viewp->getVisible() || !viewp->getRect().isValid()) + { + continue; + } + { - // check for bad data - llassert_always(viewp->getVisible() == TRUE); // Only draw views that are within the root view localRectToScreen(viewp->getRect(),&screenRect); - if ( rootRect.overlaps(screenRect) && LLUI::sDirtyRect.overlaps(screenRect)) + if ( rootRect->overlaps(screenRect) && LLUI::sDirtyRect.overlaps(screenRect)) { LLUI::pushMatrix(); { -- cgit v1.2.3 From 0a54fb6b24790263c45f096415fee0e2d03323e0 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Sat, 29 May 2010 04:19:25 -0500 Subject: Faster texture coordinate updates. --- indra/newview/llface.cpp | 581 +++++++++++++++++++++++++++-------------------- 1 file changed, 334 insertions(+), 247 deletions(-) diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 7a0aa9244d..98a50ca4e7 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -1073,8 +1073,6 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, mVertexBuffer->getColorStrider(colors, mGeomIndex); } - F32 r = 0, os = 0, ot = 0, ms = 0, mt = 0, cos_ang = 0, sin_ang = 0; - BOOL is_static = mDrawablep->isStatic(); BOOL is_global = is_static; @@ -1089,57 +1087,6 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, clearState(GLOBAL); } - LLVector2 tmin, tmax; - - if (rebuild_tcoord) - { - if (tep) - { - r = tep->getRotation(); - os = tep->mOffsetS; - ot = tep->mOffsetT; - ms = tep->mScaleS; - mt = tep->mScaleT; - cos_ang = cos(r); - sin_ang = sin(r); - } - else - { - cos_ang = 1.0f; - sin_ang = 0.0f; - os = 0.0f; - ot = 0.0f; - ms = 1.0f; - mt = 1.0f; - } - } - - U8 tex_mode = 0; - - if (isState(TEXTURE_ANIM)) - { - LLVOVolume* vobj = (LLVOVolume*) (LLViewerObject*) mVObjp; - tex_mode = vobj->mTexAnimMode; - - if (!tex_mode) - { - clearState(TEXTURE_ANIM); - } - else - { - os = ot = 0.f; - r = 0.f; - cos_ang = 1.f; - sin_ang = 0.f; - ms = mt = 1.f; - } - - if (getVirtualSize() >= MIN_TEX_ANIM_SIZE) - { //don't override texture transform during tc bake - tex_mode = 0; - } - } - LLColor4U color = tep->getColor(); if (rebuild_color) @@ -1183,242 +1130,382 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, } } + LLMatrix4a mat_normal; + mat_normal.loadu(mat_norm_in); - //bump setup - LLVector4a binormal_dir( -sin_ang, cos_ang, 0.f ); - LLVector4a bump_s_primary_light_ray(0.f, 0.f, 0.f); - LLVector4a bump_t_primary_light_ray(0.f, 0.f, 0.f); + //if it's not fullbright and has no normals, bake sunlight based on face normal + //bool bake_sunlight = !getTextureEntry()->getFullbright() && + // !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL); - LLQuaternion bump_quat; - if (mDrawablep->isActive()) - { - bump_quat = LLQuaternion(mDrawablep->getRenderMatrix()); - } - - if (bump_code) + F32 r = 0, os = 0, ot = 0, ms = 0, mt = 0, cos_ang = 0, sin_ang = 0; + + if (rebuild_tcoord) { - mVObjp->getVolume()->genBinormals(f); - F32 offset_multiple; - switch( bump_code ) + bool do_xform; + + if (tep) { - case BE_NO_BUMP: - offset_multiple = 0.f; - break; - case BE_BRIGHTNESS: - case BE_DARKNESS: - if( mTexture.notNull() && mTexture->hasGLTexture()) + r = tep->getRotation(); + os = tep->mOffsetS; + ot = tep->mOffsetT; + ms = tep->mScaleS; + mt = tep->mScaleT; + cos_ang = cos(r); + sin_ang = sin(r); + + if (cos_ang != 1.f || + sin_ang != 0.f || + os != 0.f || + ot != 0.f || + ms != 1.f || + mt != 1.f) { - // Offset by approximately one texel - S32 cur_discard = mTexture->getDiscardLevel(); - S32 max_size = llmax( mTexture->getWidth(), mTexture->getHeight() ); - max_size <<= cur_discard; - const F32 ARTIFICIAL_OFFSET = 2.f; - offset_multiple = ARTIFICIAL_OFFSET / (F32)max_size; + do_xform = true; } else { - offset_multiple = 1.f/256; - } - break; - - default: // Standard bumpmap textures. Assumed to be 256x256 - offset_multiple = 1.f / 256; - break; + do_xform = false; + } } - - F32 s_scale = 1.f; - F32 t_scale = 1.f; - if( tep ) + else { - tep->getScale( &s_scale, &t_scale ); + do_xform = false; } - // Use the nudged south when coming from above sun angle, such - // that emboss mapping always shows up on the upward faces of cubes when - // it's noon (since a lot of builders build with the sun forced to noon). - LLVector3 sun_ray = gSky.mVOSkyp->mBumpSunDir; - LLVector3 moon_ray = gSky.getMoonDirection(); - LLVector3& primary_light_ray = (sun_ray.mV[VZ] > 0) ? sun_ray : moon_ray; + + //bump setup + LLVector4a binormal_dir( -sin_ang, cos_ang, 0.f ); + LLVector4a bump_s_primary_light_ray(0.f, 0.f, 0.f); + LLVector4a bump_t_primary_light_ray(0.f, 0.f, 0.f); - bump_s_primary_light_ray.load3((offset_multiple * s_scale * primary_light_ray).mV); - bump_t_primary_light_ray.load3((offset_multiple * t_scale * primary_light_ray).mV); - } + LLQuaternion bump_quat; + if (mDrawablep->isActive()) + { + bump_quat = LLQuaternion(mDrawablep->getRenderMatrix()); + } - U8 texgen = getTextureEntry()->getTexGen(); - if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT) - { //planar texgen needs binormals - mVObjp->getVolume()->genBinormals(f); - } - - LLMatrix4a mat_normal; - mat_normal.loadu(mat_norm_in); - - //if it's not fullbright and has no normals, bake sunlight based on face normal - //bool bake_sunlight = !getTextureEntry()->getFullbright() && - // !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL); - - if (rebuild_tcoord) - { - LLVector4a scalea; - scalea.load3(scale.mV); - + if (bump_code) + { + mVObjp->getVolume()->genBinormals(f); + F32 offset_multiple; + switch( bump_code ) + { + case BE_NO_BUMP: + offset_multiple = 0.f; + break; + case BE_BRIGHTNESS: + case BE_DARKNESS: + if( mTexture.notNull() && mTexture->hasGLTexture()) + { + // Offset by approximately one texel + S32 cur_discard = mTexture->getDiscardLevel(); + S32 max_size = llmax( mTexture->getWidth(), mTexture->getHeight() ); + max_size <<= cur_discard; + const F32 ARTIFICIAL_OFFSET = 2.f; + offset_multiple = ARTIFICIAL_OFFSET / (F32)max_size; + } + else + { + offset_multiple = 1.f/256; + } + break; - for (S32 i = 0; i < num_vertices; i++) - { - LLVector2 tc(vf.mTexCoords[i]); - - LLVector4a& norm = vf.mNormals[i]; - - LLVector4a& center = *(vf.mCenter); + default: // Standard bumpmap textures. Assumed to be 256x256 + offset_multiple = 1.f / 256; + break; + } - if (texgen != LLTextureEntry::TEX_GEN_DEFAULT) + F32 s_scale = 1.f; + F32 t_scale = 1.f; + if( tep ) { - LLVector4a vec = vf.mPositions[i]; - - vec.mul(scalea); - - switch (texgen) - { - case LLTextureEntry::TEX_GEN_PLANAR: - planarProjection(tc, norm, center, vec); - break; - case LLTextureEntry::TEX_GEN_SPHERICAL: - sphericalProjection(tc, norm, center, vec); - break; - case LLTextureEntry::TEX_GEN_CYLINDRICAL: - cylindricalProjection(tc, norm, center, vec); - break; - default: - break; - } + tep->getScale( &s_scale, &t_scale ); } + // Use the nudged south when coming from above sun angle, such + // that emboss mapping always shows up on the upward faces of cubes when + // it's noon (since a lot of builders build with the sun forced to noon). + LLVector3 sun_ray = gSky.mVOSkyp->mBumpSunDir; + LLVector3 moon_ray = gSky.getMoonDirection(); + LLVector3& primary_light_ray = (sun_ray.mV[VZ] > 0) ? sun_ray : moon_ray; + + bump_s_primary_light_ray.load3((offset_multiple * s_scale * primary_light_ray).mV); + bump_t_primary_light_ray.load3((offset_multiple * t_scale * primary_light_ray).mV); + } - if (tex_mode && mTextureMatrix) + U8 texgen = getTextureEntry()->getTexGen(); + if (rebuild_tcoord && texgen != LLTextureEntry::TEX_GEN_DEFAULT) + { //planar texgen needs binormals + mVObjp->getVolume()->genBinormals(f); + } + + U8 tex_mode = 0; + + if (isState(TEXTURE_ANIM)) + { + LLVOVolume* vobj = (LLVOVolume*) (LLViewerObject*) mVObjp; + tex_mode = vobj->mTexAnimMode; + + if (!tex_mode) { - LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); - tmp = tmp * *mTextureMatrix; - tc.mV[0] = tmp.mV[0]; - tc.mV[1] = tmp.mV[1]; + clearState(TEXTURE_ANIM); } else { - xform(tc, cos_ang, sin_ang, os, ot, ms, mt); + os = ot = 0.f; + r = 0.f; + cos_ang = 1.f; + sin_ang = 0.f; + ms = mt = 1.f; + + do_xform = false; } - if(in_atlas) - { - // - //manually calculate tex-coord per vertex for varying address modes. - //should be removed if shader can handle this. - // + if (getVirtualSize() >= MIN_TEX_ANIM_SIZE) + { //don't override texture transform during tc bake + tex_mode = 0; + } + } - S32 int_part = 0 ; - switch(mTexture->getAddressMode()) - { - case LLTexUnit::TAM_CLAMP: - if(tc.mV[0] < 0.f) - { - tc.mV[0] = 0.f ; - } - else if(tc.mV[0] > 1.f) - { - tc.mV[0] = 1.f; - } + LLVector4a scalea; + scalea.load3(scale.mV); - if(tc.mV[1] < 0.f) - { - tc.mV[1] = 0.f ; - } - else if(tc.mV[1] > 1.f) - { - tc.mV[1] = 1.f; - } - break; - case LLTexUnit::TAM_MIRROR: - if(tc.mV[0] < 0.f) - { - tc.mV[0] = -tc.mV[0] ; - } - int_part = (S32)tc.mV[0] ; - if(int_part & 1) //odd number - { - tc.mV[0] = int_part + 1 - tc.mV[0] ; - } - else //even number - { - tc.mV[0] -= int_part ; - } + bool do_bump = bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1); + bool do_tex_mat = tex_mode && mTextureMatrix; - if(tc.mV[1] < 0.f) + if (!in_atlas && !do_bump) + { //not in atlas or not bump mapped, might be able to do a cheap update + if (texgen != LLTextureEntry::TEX_GEN_PLANAR) + { + if (!do_tex_mat) + { + if (!do_xform) { - tc.mV[1] = -tc.mV[1] ; + LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2); } - int_part = (S32)tc.mV[1] ; - if(int_part & 1) //odd number + else { - tc.mV[1] = int_part + 1 - tc.mV[1] ; + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + xform(tc, cos_ang, sin_ang, os, ot, ms, mt); + *tex_coords++ = tc; + } } - else //even number - { - tc.mV[1] -= int_part ; + } + else + { //do tex mat, no texgen, no atlas, no bump + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + LLVector4a& norm = vf.mNormals[i]; + LLVector4a& center = *(vf.mCenter); + + LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); + tmp = tmp * *mTextureMatrix; + tc.mV[0] = tmp.mV[0]; + tc.mV[1] = tmp.mV[1]; + *tex_coords++ = tc; } - break; - case LLTexUnit::TAM_WRAP: - if(tc.mV[0] > 1.f) - tc.mV[0] -= (S32)(tc.mV[0] - 0.00001f) ; - else if(tc.mV[0] < -1.f) - tc.mV[0] -= (S32)(tc.mV[0] + 0.00001f) ; - - if(tc.mV[1] > 1.f) - tc.mV[1] -= (S32)(tc.mV[1] - 0.00001f) ; - else if(tc.mV[1] < -1.f) - tc.mV[1] -= (S32)(tc.mV[1] + 0.00001f) ; - - if(tc.mV[0] < 0.f) - { - tc.mV[0] = 1.0f + tc.mV[0] ; + } + } + else + { //no bump, no atlas, tex gen planar + if (do_tex_mat) + { + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + LLVector4a& norm = vf.mNormals[i]; + LLVector4a& center = *(vf.mCenter); + LLVector4a vec = vf.mPositions[i]; + vec.mul(scalea); + planarProjection(tc, norm, center, vec); + + LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); + tmp = tmp * *mTextureMatrix; + tc.mV[0] = tmp.mV[0]; + tc.mV[1] = tmp.mV[1]; + + *tex_coords++ = tc; } - if(tc.mV[1] < 0.f) - { - tc.mV[1] = 1.0f + tc.mV[1] ; + } + else + { + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + LLVector4a& norm = vf.mNormals[i]; + LLVector4a& center = *(vf.mCenter); + LLVector4a vec = vf.mPositions[i]; + vec.mul(scalea); + planarProjection(tc, norm, center, vec); + + xform(tc, cos_ang, sin_ang, os, ot, ms, mt); + + *tex_coords++ = tc; } - break; - default: - break; } - - tc.mV[0] = tcoord_xoffset + tcoord_xscale * tc.mV[0] ; - tc.mV[1] = tcoord_yoffset + tcoord_yscale * tc.mV[1] ; } + } + else + { //either bump mapped or in atlas, just do the whole expensive loop + for (S32 i = 0; i < num_vertices; i++) + { + LLVector2 tc(vf.mTexCoords[i]); + LLVector4a& norm = vf.mNormals[i]; + + LLVector4a& center = *(vf.mCenter); - *tex_coords++ = tc; - - if (bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1)) - { - LLVector4a tangent; - tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]); - - LLMatrix4a tangent_to_object; - tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]); - LLVector4a t; - tangent_to_object.rotate(binormal_dir, t); - LLVector4a binormal; - mat_normal.rotate(t, binormal); - - //VECTORIZE THIS - if (mDrawablep->isActive()) + if (texgen != LLTextureEntry::TEX_GEN_DEFAULT) + { + LLVector4a vec = vf.mPositions[i]; + + vec.mul(scalea); + + switch (texgen) + { + case LLTextureEntry::TEX_GEN_PLANAR: + planarProjection(tc, norm, center, vec); + break; + case LLTextureEntry::TEX_GEN_SPHERICAL: + sphericalProjection(tc, norm, center, vec); + break; + case LLTextureEntry::TEX_GEN_CYLINDRICAL: + cylindricalProjection(tc, norm, center, vec); + break; + default: + break; + } + } + + if (tex_mode && mTextureMatrix) { - LLVector3 t; - t.set(binormal.getF32()); - t *= bump_quat; - binormal.load3(t.mV); + LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f); + tmp = tmp * *mTextureMatrix; + tc.mV[0] = tmp.mV[0]; + tc.mV[1] = tmp.mV[1]; + } + else + { + xform(tc, cos_ang, sin_ang, os, ot, ms, mt); } - binormal.normalize3fast(); - tc += LLVector2( bump_s_primary_light_ray.dot3(tangent), bump_t_primary_light_ray.dot3(binormal) ); + if(in_atlas) + { + // + //manually calculate tex-coord per vertex for varying address modes. + //should be removed if shader can handle this. + // + + S32 int_part = 0 ; + switch(mTexture->getAddressMode()) + { + case LLTexUnit::TAM_CLAMP: + if(tc.mV[0] < 0.f) + { + tc.mV[0] = 0.f ; + } + else if(tc.mV[0] > 1.f) + { + tc.mV[0] = 1.f; + } + + if(tc.mV[1] < 0.f) + { + tc.mV[1] = 0.f ; + } + else if(tc.mV[1] > 1.f) + { + tc.mV[1] = 1.f; + } + break; + case LLTexUnit::TAM_MIRROR: + if(tc.mV[0] < 0.f) + { + tc.mV[0] = -tc.mV[0] ; + } + int_part = (S32)tc.mV[0] ; + if(int_part & 1) //odd number + { + tc.mV[0] = int_part + 1 - tc.mV[0] ; + } + else //even number + { + tc.mV[0] -= int_part ; + } + + if(tc.mV[1] < 0.f) + { + tc.mV[1] = -tc.mV[1] ; + } + int_part = (S32)tc.mV[1] ; + if(int_part & 1) //odd number + { + tc.mV[1] = int_part + 1 - tc.mV[1] ; + } + else //even number + { + tc.mV[1] -= int_part ; + } + break; + case LLTexUnit::TAM_WRAP: + if(tc.mV[0] > 1.f) + tc.mV[0] -= (S32)(tc.mV[0] - 0.00001f) ; + else if(tc.mV[0] < -1.f) + tc.mV[0] -= (S32)(tc.mV[0] + 0.00001f) ; + + if(tc.mV[1] > 1.f) + tc.mV[1] -= (S32)(tc.mV[1] - 0.00001f) ; + else if(tc.mV[1] < -1.f) + tc.mV[1] -= (S32)(tc.mV[1] + 0.00001f) ; + + if(tc.mV[0] < 0.f) + { + tc.mV[0] = 1.0f + tc.mV[0] ; + } + if(tc.mV[1] < 0.f) + { + tc.mV[1] = 1.0f + tc.mV[1] ; + } + break; + default: + break; + } - *tex_coords2++ = tc; - } + tc.mV[0] = tcoord_xoffset + tcoord_xscale * tc.mV[0] ; + tc.mV[1] = tcoord_yoffset + tcoord_yscale * tc.mV[1] ; + } + + + *tex_coords++ = tc; + + if (bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1)) + { + LLVector4a tangent; + tangent.setCross3(vf.mBinormals[i], vf.mNormals[i]); + + LLMatrix4a tangent_to_object; + tangent_to_object.setRows(tangent, vf.mBinormals[i], vf.mNormals[i]); + LLVector4a t; + tangent_to_object.rotate(binormal_dir, t); + LLVector4a binormal; + mat_normal.rotate(t, binormal); + + //VECTORIZE THIS + if (mDrawablep->isActive()) + { + LLVector3 t; + t.set(binormal.getF32()); + t *= bump_quat; + binormal.load3(t.mV); + } + + binormal.normalize3fast(); + tc += LLVector2( bump_s_primary_light_ray.dot3(tangent), bump_t_primary_light_ray.dot3(binormal) ); + + *tex_coords2++ = tc; + } + } } } -- cgit v1.2.3 From 0e7f4dc5cef8a97cb1dd08aa2f79538ced267888 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Sat, 29 May 2010 05:37:38 -0500 Subject: Octree per LLVolumeFace WIP --- indra/llmath/lloctree.h | 18 + indra/llmath/llvolume.cpp | 625 ++++++++++++++++++++++------- indra/llmath/llvolume.h | 45 ++- indra/newview/llhudicon.cpp | 7 +- indra/newview/llhudtext.cpp | 7 +- indra/newview/llpanelprimmediacontrols.cpp | 1 + indra/newview/llspatialpartition.cpp | 30 ++ indra/newview/llvograss.cpp | 8 +- 8 files changed, 582 insertions(+), 159 deletions(-) diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 2f34fb1bb0..8bba12783f 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -72,6 +72,13 @@ public: virtual void visit(const LLOctreeNode* branch) = 0; }; +template +class LLOctreeTravelerDepthFirst : public LLOctreeTraveler +{ +public: + virtual void traverse(const LLOctreeNode* node); +}; + template class LLOctreeNode : public LLTreeNode { @@ -710,4 +717,15 @@ void LLOctreeTraveler::traverse(const LLOctreeNode* node) traverse(node->getChild(i)); } } + +template +void LLOctreeTravelerDepthFirst::traverse(const LLOctreeNode* node) +{ + for (U32 i = 0; i < node->getChildCount(); i++) + { + traverse(node->getChild(i)); + } + node->accept(this); +} + #endif diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 9b6e2488e6..d261811aa2 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -45,6 +45,7 @@ #include "m4math.h" #include "m3math.h" #include "llmatrix4a.h" +#include "lloctree.h" #include "lldarray.h" #include "llvolume.h" #include "llstl.h" @@ -132,6 +133,51 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent return true; } +BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size) +{ + LLVector4a fAWdU; + LLVector4a dir; + LLVector4a diff; + + dir.setSub(end, start); + dir.mul(0.5f); + + diff.setAdd(end,start); + diff.mul(0.5f); + diff.sub(center); + fAWdU.setAbs(dir); + + LLVector4a rhs; + rhs.setAdd(size, fAWdU); + + LLVector4a lhs; + lhs.setAbs(diff); + + S32 grt = lhs.greaterThan4(rhs).getComparisonMask(); + + if (grt & 0x7) + { + return false; + } + + LLVector4a f; + f.setCross3(dir, diff); + f.setAbs(f); + + LLVector4a v0; v0.mQ = _mm_shuffle_ps(size.mQ, size.mQ, _MM_SHUFFLE(3,1,0,0)); + LLVector4a v1; v1.mQ = _mm_shuffle_ps(fAWdU.mQ, fAWdU.mQ, _MM_SHUFFLE(3,2,2,1)); + lhs.setMul(v0, v1); + + v0.mQ = _mm_shuffle_ps(size.mQ, size.mQ, _MM_SHUFFLE(3,2,2,1)); + v1.mQ = _mm_shuffle_ps(fAWdU.mQ, fAWdU.mQ, _MM_SHUFFLE(3,1,0,0)); + rhs.setMul(v0, v1); + rhs.add(lhs); + + grt = f.greaterThan4(rhs).getComparisonMask(); + + return (grt & 0x7) ? false : true; +} + // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. // returns TRUE if intersecting and returns barycentric coordinates in intersection_a, intersection_b, @@ -139,15 +185,13 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent // Moller-Trumbore algorithm BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, - F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided) + F32& intersection_a, F32& intersection_b, F32& intersection_t) { - F32 u, v, t; /* find vectors for two edges sharing vert0 */ LLVector4a edge1; edge1.setSub(vert1, vert0); - LLVector4a edge2; edge2.setSub(vert2, vert0); @@ -156,87 +200,116 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co pvec.setCross3(dir, edge2); /* if determinant is near zero, ray lies in plane of triangle */ - F32 det = edge1.dot3(pvec); - - if (!two_sided) + LLVector4a det; + det.setAllDot3(edge1, pvec); + + if (det.greaterEqual4(LLVector4a::getApproximatelyZero()).getComparisonMask()) { - if (det < F_APPROXIMATELY_ZERO) - { - return FALSE; - } - /* calculate distance from vert0 to ray origin */ LLVector4a tvec; tvec.setSub(orig, vert0); /* calculate U parameter and test bounds */ - u = tvec.dot3(pvec); + LLVector4a u; + u.setAllDot3(tvec,pvec); - if (u < 0.f || u > det) + if (u.greaterEqual4(LLVector4a::getZero()).getComparisonMask() && + u.lessEqual4(det).getComparisonMask()) { - return FALSE; + /* prepare to test V parameter */ + LLVector4a qvec; + qvec.setCross3(tvec, edge1); + + /* calculate V parameter and test bounds */ + LLVector4a v; + v.setAllDot3(dir, qvec); + + + //if (!(v < 0.f || u + v > det)) + + LLVector4a sum_uv; + sum_uv.setAdd(u, v); + + S32 v_gequal = v.greaterEqual4(LLVector4a::getZero()).getComparisonMask(); + S32 sum_lequal = sum_uv.lessEqual4(det).getComparisonMask(); + + if (v_gequal && sum_lequal) + { + /* calculate t, scale parameters, ray intersects triangle */ + LLVector4a t; + t.setAllDot3(edge2,qvec); + + t.div(det); + u.div(det); + v.div(det); + + intersection_a = u[0]; + intersection_b = v[0]; + intersection_t = t[0]; + return TRUE; + } } - - /* prepare to test V parameter */ - LLVector4a qvec; - qvec.setCross3(tvec, edge1); + } - /* calculate V parameter and test bounds */ - v = dir.dot3(qvec); - if (v < 0.f || u + v > det) - { - return FALSE; - } + return FALSE; +} - /* calculate t, scale parameters, ray intersects triangle */ - t = edge2.dot3(qvec); - F32 inv_det = 1.0 / det; - t *= inv_det; - u *= inv_det; - v *= inv_det; - } +BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, + F32& intersection_a, F32& intersection_b, F32& intersection_t) +{ + F32 u, v, t; - else // two sided - { - if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO) - { - return FALSE; - } - F32 inv_det = 1.0 / det; + /* find vectors for two edges sharing vert0 */ + LLVector4a edge1; + edge1.setSub(vert1, vert0); + + + LLVector4a edge2; + edge2.setSub(vert2, vert0); - /* calculate distance from vert0 to ray origin */ - LLVector4a tvec; - tvec.setSub(orig, vert0); - - /* calculate U parameter and test bounds */ - u = (tvec.dot3(pvec)) * inv_det; - if (u < 0.f || u > 1.f) - { - return FALSE; - } + /* begin calculating determinant - also used to calculate U parameter */ + LLVector4a pvec; + pvec.setCross3(dir, edge2); - /* prepare to test V parameter */ - LLVector4a qvec; - qvec.setSub(tvec, edge1); - - /* calculate V parameter and test bounds */ - v = (dir.dot3(qvec)) * inv_det; - - if (v < 0.f || u + v > 1.f) - { - return FALSE; - } + /* if determinant is near zero, ray lies in plane of triangle */ + F32 det = edge1.dot3(pvec); + + + if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO) + { + return FALSE; + } + + F32 inv_det = 1.f / det; + + /* calculate distance from vert0 to ray origin */ + LLVector4a tvec; + tvec.setSub(orig, vert0); + + /* calculate U parameter and test bounds */ + u = (tvec.dot3(pvec)) * inv_det; + if (u < 0.f || u > 1.f) + { + return FALSE; + } - /* calculate t, ray intersects triangle */ - t = (edge2.dot3(qvec)) * inv_det; + /* prepare to test V parameter */ + tvec.sub(edge1); + + /* calculate V parameter and test bounds */ + v = (dir.dot3(tvec)) * inv_det; + + if (v < 0.f || u + v > 1.f) + { + return FALSE; } + + /* calculate t, ray intersects triangle */ + t = (edge2.dot3(tvec)) * inv_det; - if (intersection_a != NULL) - *intersection_a = u; - if (intersection_b != NULL) - *intersection_b = v; - if (intersection_t != NULL) - *intersection_t = t; + intersection_a = u; + intersection_b = v; + intersection_t = t; return TRUE; @@ -244,7 +317,7 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co //helper for non-aligned vectors BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, - F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided) + F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided) { LLVector4a vert0a, vert1a, vert2a, origa, dira; vert0a.load3(vert0.mV); @@ -253,11 +326,130 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons origa.load3(orig.mV); dira.load3(dir.mV); - return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira, - intersection_a, intersection_b, intersection_t, two_sided); + if (two_sided) + { + return LLTriangleRayIntersectTwoSided(vert0a, vert1a, vert2a, origa, dira, + intersection_a, intersection_b, intersection_t); + } + else + { + return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira, + intersection_a, intersection_b, intersection_t); + } } +class LLVolumeOctreeListener : public LLOctreeListener +{ +public: + + LLVolumeOctreeListener(LLOctreeNode* node) + { + node->addListener(this); + + mBounds = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*4, 16); + mExtents = mBounds+2; + } + + ~LLVolumeOctreeListener() + { + _mm_free(mBounds); + } + + //LISTENER FUNCTIONS + virtual void handleChildAddition(const LLOctreeNode* parent, + LLOctreeNode* child) + { + new LLVolumeOctreeListener(child); + } + + virtual void handleStateChange(const LLTreeNode* node) { } + virtual void handleChildRemoval(const LLOctreeNode* parent, + const LLOctreeNode* child) { } + virtual void handleInsertion(const LLTreeNode* node, LLVolumeFace::Triangle* tri) { } + virtual void handleRemoval(const LLTreeNode* node, LLVolumeFace::Triangle* tri) { } + virtual void handleDestruction(const LLTreeNode* node) { } + + +public: + LLVector4a* mBounds; // bounding box (center, size) of this node and all its children (tight fit to objects) + LLVector4a* mExtents; // extents (min, max) of this node and all its children +}; + +class LLVolumeOctreeRebound : public LLOctreeTravelerDepthFirst +{ +public: + const LLVolumeFace* mFace; + + LLVolumeOctreeRebound(const LLVolumeFace* face) + { + mFace = face; + } + + virtual void visit(const LLOctreeNode* branch) + { + LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0); + + LLVector4a& min = node->mExtents[0]; + LLVector4a& max = node->mExtents[1]; + + if (branch->getElementCount() != 0) + { + const LLVolumeFace::Triangle* tri = *(branch->getData().begin()); + + min = *(tri->mV[0]); + max = *(tri->mV[0]); + + for (LLOctreeNode::const_element_iter iter = + branch->getData().begin(); iter != branch->getData().end(); ++iter) + { + //stretch by triangles in node + tri = *iter; + + min.setMin(*tri->mV[0]); + min.setMin(*tri->mV[1]); + min.setMin(*tri->mV[2]); + + max.setMax(*tri->mV[0]); + max.setMax(*tri->mV[1]); + max.setMax(*tri->mV[2]); + } + + for (S32 i = 0; i < branch->getChildCount(); ++i) + { //stretch by child extents + LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0); + min.setMin(child->mExtents[0]); + max.setMax(child->mExtents[1]); + } + } + else if (branch->getChildCount() != 0) + { + LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); + + min = child->mExtents[0]; + max = child->mExtents[1]; + + for (S32 i = 1; i < branch->getChildCount(); ++i) + { //stretch by child extents + child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0); + min.setMin(child->mExtents[0]); + max.setMax(child->mExtents[1]); + } + } + else + { + llerrs << "WTF? Empty leaf" << llendl; + } + + node->mBounds[0].setAdd(min, max); + node->mBounds[0].mul(0.5f); + + node->mBounds[1].setSub(max,min); + node->mBounds[1].mul(0.5f); + } +}; + + //------------------------------------------------------------------- // statics //------------------------------------------------------------------- @@ -4244,6 +4436,114 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, } +class LLOctreeTriangleRayIntersect : public LLOctreeTraveler +{ +public: + const LLVolumeFace* mFace; + LLVector4a mStart; + LLVector4a mDir; + LLVector4a mEnd; + LLVector3* mIntersection; + LLVector2* mTexCoord; + LLVector3* mNormal; + LLVector3* mBinormal; + F32* mClosestT; + bool mHitFace; + + LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, + const LLVolumeFace* face, F32* closest_t, + LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) + : mFace(face), + mStart(start), + mDir(dir), + mIntersection(intersection), + mTexCoord(tex_coord), + mNormal(normal), + mBinormal(bi_normal), + mClosestT(closest_t), + mHitFace(false) + { + mEnd.setAdd(mStart, mDir); + } + + void traverse(const LLOctreeNode* node) + { + LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0); + + /*const F32* start = mStart.getF32(); + const F32* end = mEnd.getF32(); + const F32* center = vl->mBounds[0].getF32(); + const F32* size = vl->mBounds[1].getF32();*/ + + if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) + { + node->accept(this); + for (S32 i = 0; i < node->getChildCount(); ++i) + { + traverse(node->getChild(i)); + } + } + } + + void visit(const LLOctreeNode* node) + { + for (LLOctreeNode::const_element_iter iter = + node->getData().begin(); iter != node->getData().end(); ++iter) + { + const LLVolumeFace::Triangle* tri = *iter; + + F32 a, b, t; + + if (LLTriangleRayIntersect(*tri->mV[0], *tri->mV[1], *tri->mV[2], + mStart, mDir, a, b, t)) + { + if ((t >= 0.f) && // if hit is after start + (t <= 1.f) && // and before end + (t < *mClosestT)) // and this hit is closer + { + *mClosestT = t; + mHitFace = true; + + if (mIntersection != NULL) + { + LLVector4a intersect = mDir; + intersect.mul(*mClosestT); + intersect.add(mStart); + mIntersection->set(intersect.getF32()); + } + + + if (mTexCoord != NULL) + { + LLVector2* tc = (LLVector2*) mFace->mTexCoords; + *mTexCoord = ((1.f - a - b) * tc[tri->mIndex[0]] + + a * tc[tri->mIndex[1]] + + b * tc[tri->mIndex[2]]); + + } + + if (mNormal != NULL) + { + LLVector4* norm = (LLVector4*) mFace->mNormals; + + *mNormal = ((1.f - a - b) * LLVector3(norm[tri->mIndex[0]]) + + a * LLVector3(norm[tri->mIndex[1]]) + + b * LLVector3(norm[tri->mIndex[2]])); + } + + if (mBinormal != NULL) + { + LLVector4* binormal = (LLVector4*) mFace->mBinormals; + *mBinormal = ((1.f - a - b) * LLVector3(binormal[tri->mIndex[0]]) + + a * LLVector3(binormal[tri->mIndex[1]]) + + b * LLVector3(binormal[tri->mIndex[2]])); + } + } + } + } + } +}; + S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) @@ -4288,66 +4588,19 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en { genBinormals(i); } - - LLVector4a* p = (LLVector4a*) face.mPositions; - for (U32 tri = 0; tri < face.mNumIndices/3; tri++) + if (!face.mOctree) { - S32 index1 = face.mIndices[tri*3+0]; - S32 index2 = face.mIndices[tri*3+1]; - S32 index3 = face.mIndices[tri*3+2]; - - F32 a, b, t; + face.createOctree(); + } - if (LLTriangleRayIntersect(p[index1], - p[index2], - p[index3], - start, dir, &a, &b, &t, FALSE)) - { - if ((t >= 0.f) && // if hit is after start - (t <= 1.f) && // and before end - (t < closest_t)) // and this hit is closer - { - closest_t = t; - hit_face = i; - - if (intersection != NULL) - { - LLVector4a intersect = dir; - intersect.mul(closest_t); - intersect.add(start); - intersection->set(intersect.getF32()); - } - - - if (tex_coord != NULL) - { - LLVector2* tc = (LLVector2*) face.mTexCoords; - *tex_coord = ((1.f - a - b) * tc[index1] + - a * tc[index2] + - b * tc[index3]); - - } - - if (normal != NULL) - { - LLVector4* norm = (LLVector4*) face.mNormals; - - *normal = ((1.f - a - b) * LLVector3(norm[index1]) + - a * LLVector3(norm[index2]) + - b * LLVector3(norm[index3])); - } - - if (bi_normal != NULL) - { - LLVector4* binormal = (LLVector4*) face.mBinormals; - *bi_normal = ((1.f - a - b) * LLVector3(binormal[index1]) + - a * LLVector3(binormal[index2]) + - b * LLVector3(binormal[index3])); - } + LLVector4a* p = (LLVector4a*) face.mPositions; - } - } + LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); + intersect.traverse(face.mOctree); + if (intersect.mHitFace) + { + hit_face = i; } } } @@ -5128,13 +5381,29 @@ LLVolumeFace::LLVolumeFace() : mBinormals(NULL), mTexCoords(NULL), mIndices(NULL), - mWeights(NULL) + mWeights(NULL), + mOctree(NULL) { mExtents = (LLVector4a*) _mm_malloc(48, 16); mCenter = mExtents+2; } LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) +: mID(0), + mTypeMask(0), + mBeginS(0), + mBeginT(0), + mNumS(0), + mNumT(0), + mNumVertices(0), + mNumIndices(0), + mPositions(NULL), + mNormals(NULL), + mBinormals(NULL), + mTexCoords(NULL), + mIndices(NULL), + mWeights(NULL), + mOctree(NULL) { mExtents = (LLVector4a*) _mm_malloc(48, 16); mCenter = mExtents+2; @@ -5157,13 +5426,9 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) mNumVertices = 0; mNumIndices = 0; - mPositions = NULL; - mNormals = NULL; - mBinormals = NULL; - mTexCoords = NULL; - mWeights = NULL; - mIndices = NULL; + freeData(); + LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 12); resizeVertices(src.mNumVertices); @@ -5179,6 +5444,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, vert_size); + if (src.mBinormals) { allocateBinormals(src.mNumVertices); @@ -5216,18 +5482,38 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) } LLVolumeFace::~LLVolumeFace() +{ + _mm_free(mExtents); + mExtents = NULL; + + freeData(); +} + +void LLVolumeFace::freeData() { _mm_free(mPositions); + mPositions = NULL; _mm_free(mNormals); + mNormals = NULL; _mm_free(mTexCoords); + mTexCoords = NULL; _mm_free(mIndices); + mIndices = NULL; _mm_free(mBinormals); + mBinormals = NULL; _mm_free(mWeights); - _mm_free(mExtents); + mWeights = NULL; + + delete mOctree; + mOctree = NULL; } BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) { + //tree for this face is no longer valid + delete mOctree; + mOctree = NULL; + if (mTypeMask & CAP_MASK) { return createCap(volume, partial_build); @@ -5250,6 +5536,18 @@ void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv) cv.mTexCoord = mTexCoords[index]; } +bool LLVolumeFace::VertexMapData::operator==(const LLVolumeFace::VertexData& rhs) const +{ + return getPosition().equal3(rhs.getPosition()) && + mTexCoord == rhs.mTexCoord && + getNormal().equal3(rhs.getNormal()); +} + +bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector4a& a, const LLVector4a& b) const +{ + return a.less3(b); +} + void LLVolumeFace::optimize(F32 angle_cutoff) { LLVolumeFace new_face; @@ -5305,6 +5603,65 @@ void LLVolumeFace::optimize(F32 angle_cutoff) swapData(new_face); } + +void LLVolumeFace::createOctree() +{ + mOctree = new LLOctreeRoot(LLVector3d(0,0,0), LLVector3d(1,1,1), NULL); + new LLVolumeOctreeListener(mOctree); + + for (U32 i = 0; i < mNumIndices; i+= 3) + { + Triangle* tri = new Triangle(); + + const LLVector4a& v0 = mPositions[mIndices[i]]; + const LLVector4a& v1 = mPositions[mIndices[i+1]]; + const LLVector4a& v2 = mPositions[mIndices[i+2]]; + + tri->mV[0] = &v0; + tri->mV[1] = &v1; + tri->mV[2] = &v2; + + tri->mIndex[0] = mIndices[i]; + tri->mIndex[1] = mIndices[i+1]; + tri->mIndex[2] = mIndices[i+2]; + + LLVector4a min = v0; + min.setMin(v1); + min.setMin(v2); + + LLVector4a max = v0; + max.setMax(v1); + max.setMax(v2); + + LLVector4a center; + center.setAdd(min, max); + center.mul(0.5f); + + + tri->mPositionGroup.setVec(center[0], center[1], center[2]); + + LLVector4a size; + size.setSub(max,min); + + tri->mRadius = size.length3() * 0.5f; + + mOctree->insert(tri); + } + + LLVolumeOctreeRebound rebound(this); + rebound.traverse(mOctree); +} + +const LLVector3d& LLVolumeFace::Triangle::getPositionGroup() const +{ + return mPositionGroup; +} + +const F64& LLVolumeFace::Triangle::getBinRadius() const +{ + return mRadius; +} + void LLVolumeFace::swapData(LLVolumeFace& rhs) { llswap(rhs.mPositions, mPositions); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 7c63266aab..a40a21b405 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -40,6 +40,9 @@ class LLPathParams; class LLVolumeParams; class LLProfile; class LLPath; + +template class LLOctreeNode; +class LLVector4a; class LLVolumeFace; class LLVolume; @@ -49,15 +52,14 @@ class LLVolume; //#include "vmath.h" #include "v2math.h" #include "v3math.h" +#include "v3dmath.h" #include "v4math.h" -#include "llvector4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" #include "llrefcount.h" #include "llfile.h" - //============================================================================ const S32 MIN_DETAIL_FACES = 6; @@ -830,6 +832,9 @@ public: LLVolumeFace& operator=(const LLVolumeFace& rhs); ~LLVolumeFace(); +private: + void freeData(); +public: BOOL create(LLVolume* volume, BOOL partial_build = FALSE); void createBinormals(); @@ -855,26 +860,19 @@ public: public: U16 mIndex; - bool operator==(const LLVolumeFace::VertexData& rhs) const - { - return getPosition().equal3(rhs.getPosition()) && - mTexCoord == rhs.mTexCoord && - getNormal().equal3(rhs.getNormal()); - } + bool operator==(const LLVolumeFace::VertexData& rhs) const; struct ComparePosition { - bool operator()(const LLVector4a& a, const LLVector4a& b) const - { - return a.less3(b); - } + bool operator()(const LLVector4a& a, const LLVector4a& b) const; }; typedef std::map, VertexMapData::ComparePosition > PointMap; }; void optimize(F32 angle_cutoff = 2.f); - + void createOctree(); + enum { SINGLE_MASK = 0x0001, @@ -919,6 +917,21 @@ public: // mWeights.size() should be empty or match mVertices.size() LLVector4a* mWeights; + class Triangle : public LLRefCount + { + public: + const LLVector4a* mV[3]; + U16 mIndex[3]; + + LLVector3d mPositionGroup; + F64 mRadius; + + virtual const LLVector3d& getPositionGroup() const; + virtual const F64& getBinRadius() const; + }; + + LLOctreeNode* mOctree; + private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); @@ -1084,10 +1097,12 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, - F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided); + F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, - F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided); + F32& intersection_a, F32& intersection_b, F32& intersection_t); +BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, + F32& intersection_a, F32& intersection_b, F32& intersection_t); diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp index 3c5a4de7f8..63040904df 100644 --- a/indra/newview/llhudicon.cpp +++ b/indra/newview/llhudicon.cpp @@ -286,7 +286,6 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en LLVector4a upper_right; upper_right.setAdd(lower_right, y_scalea); - F32 t = 0.f; LLVector4a enda; enda.load3(end.mV); LLVector4a starta; @@ -294,8 +293,10 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en LLVector4a dir; dir.setSub(enda, starta); - if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, starta, dir, NULL, NULL, &t, FALSE) || - LLTriangleRayIntersect(upper_left, lower_left, lower_right, starta, dir, NULL, NULL, &t, FALSE)) + F32 a,b,t; + + if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, starta, dir, a,b,t) || + LLTriangleRayIntersect(upper_left, lower_left, lower_right, starta, dir, a,b,t)) { if (intersection) { diff --git a/indra/newview/llhudtext.cpp b/indra/newview/llhudtext.cpp index 9ed5d13831..7f9eddc837 100644 --- a/indra/newview/llhudtext.cpp +++ b/indra/newview/llhudtext.cpp @@ -207,10 +207,11 @@ BOOL LLHUDText::lineSegmentIntersect(const LLVector3& start, const LLVector3& en } LLVector3 dir = end-start; - F32 t = 0.f; + F32 a,b,t; - if (LLTriangleRayIntersect(v[0], v[1], v[2], start, dir, NULL, NULL, &t, FALSE) || - LLTriangleRayIntersect(v[2], v[3], v[0], start, dir, NULL, NULL, &t, FALSE) ) + + if (LLTriangleRayIntersect(v[0], v[1], v[2], start, dir, a, b, t, FALSE) || + LLTriangleRayIntersect(v[2], v[3], v[0], start, dir, a, b, t, FALSE) ) { if (t <= 1.f) { diff --git a/indra/newview/llpanelprimmediacontrols.cpp b/indra/newview/llpanelprimmediacontrols.cpp index a5804aa04e..98fbebbc5d 100644 --- a/indra/newview/llpanelprimmediacontrols.cpp +++ b/indra/newview/llpanelprimmediacontrols.cpp @@ -65,6 +65,7 @@ #include "llweb.h" #include "llwindow.h" #include "llfloatertools.h" // to enable hide if build tools are up +#include "llvector4a.h" // Functions pulled from pipeline.cpp glh::matrix4f glh_get_current_modelview(); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 470c332b42..60e704d360 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -2769,6 +2769,19 @@ void renderLights(LLDrawable* drawablep) } } +class LLRenderOctree : public LLOctreeTraveler +{ +public: + void visit(const LLOctreeNode* branch) + { + const LLVector3d& c = branch->getCenter(); + const LLVector3d& s = branch->getSize(); + + LLVector3 pos((F32) c.mdV[0], (F32) c.mdV[1], (F32) c.mdV[2]); + LLVector3 size((F32) s.mdV[0], (F32) s.mdV[1], (F32) s.mdV[2]); + drawBoxOutline(pos, size); + } +}; void renderRaycast(LLDrawable* drawablep) { @@ -2787,6 +2800,23 @@ void renderRaycast(LLDrawable* drawablep) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); pushVerts(drawablep->getFace(gDebugRaycastFaceHit), LLVertexBuffer::MAP_VERTEX); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + + LLVOVolume* vobj = drawablep->getVOVolume(); + LLVolume* volume = vobj->getVolume(); + if (volume && volume->getNumVolumeFaces() > gDebugRaycastFaceHit) + { + const LLVolumeFace& face = volume->getVolumeFace(gDebugRaycastFaceHit); + if (!face.mOctree) + { + ((LLVolumeFace*) &face)->createOctree(); + } + + gGL.pushMatrix(); + glMultMatrixf((F32*) vobj->getRelativeXform().mMatrix); + LLRenderOctree render; + render.traverse(face.mOctree); + gGL.popMatrix(); + } } else if (drawablep->isAvatar()) { diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp index 91c9b762c5..fe1e36cbe8 100644 --- a/indra/newview/llvograss.cpp +++ b/indra/newview/llvograss.cpp @@ -647,23 +647,23 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en U32 idx0 = 0,idx1 = 0,idx2 = 0; - if (LLTriangleRayIntersect(v[0], v[1], v[2], start, dir, &a, &b, &t, FALSE)) + if (LLTriangleRayIntersect(v[0], v[1], v[2], start, dir, a, b, t, FALSE)) { hit = TRUE; idx0 = 0; idx1 = 1; idx2 = 2; } - else if (LLTriangleRayIntersect(v[1], v[3], v[2], start, dir, &a, &b, &t, FALSE)) + else if (LLTriangleRayIntersect(v[1], v[3], v[2], start, dir, a, b, t, FALSE)) { hit = TRUE; idx0 = 1; idx1 = 3; idx2 = 2; } - else if (LLTriangleRayIntersect(v[2], v[1], v[0], start, dir, &a, &b, &t, FALSE)) + else if (LLTriangleRayIntersect(v[2], v[1], v[0], start, dir, a, b, t, FALSE)) { normal1 = -normal1; hit = TRUE; idx0 = 2; idx1 = 1; idx2 = 0; } - else if (LLTriangleRayIntersect(v[2], v[3], v[1], start, dir, &a, &b, &t, FALSE)) + else if (LLTriangleRayIntersect(v[2], v[3], v[1], start, dir, a, b, t, FALSE)) { normal1 = -normal1; hit = TRUE; -- cgit v1.2.3 From 9a869d630162292864e01fdd1707efc609fbd6b4 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Sat, 29 May 2010 19:55:13 -0500 Subject: Octree triven raycast works, time to profile. --- indra/llmath/CMakeLists.txt | 2 + indra/llmath/lltreenode.h | 3 + indra/llmath/llvolume.cpp | 202 ++--------------------------------- indra/llmath/llvolume.h | 2 + indra/newview/llspatialpartition.cpp | 28 +++-- indra/newview/llviewerwindow.cpp | 21 +++- indra/newview/llviewerwindow.h | 6 +- 7 files changed, 58 insertions(+), 206 deletions(-) diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index 367486eee7..dda07133d5 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -22,6 +22,7 @@ set(llmath_SOURCE_FILES llsphere.cpp llvolume.cpp llvolumemgr.cpp + llvolumeoctree.cpp llsdutil_math.cpp m3math.cpp m4math.cpp @@ -66,6 +67,7 @@ set(llmath_HEADER_FILES llmatrix4a.h llvolume.h llvolumemgr.h + llvolumeoctree.h llsdutil_math.h m3math.h m4math.h diff --git a/indra/llmath/lltreenode.h b/indra/llmath/lltreenode.h index ee9836241a..e6d2521b2a 100644 --- a/indra/llmath/lltreenode.h +++ b/indra/llmath/lltreenode.h @@ -34,6 +34,9 @@ #include "stdtypes.h" #include "xform.h" +#include "llpointer.h" +#include "llrefcount.h" + #include template class LLTreeNode; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index d261811aa2..c4172de651 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -48,6 +48,7 @@ #include "lloctree.h" #include "lldarray.h" #include "llvolume.h" +#include "llvolumeoctree.h" #include "llstl.h" #include "llsdserialize.h" #include "llvector4a.h" @@ -133,50 +134,6 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent return true; } -BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size) -{ - LLVector4a fAWdU; - LLVector4a dir; - LLVector4a diff; - - dir.setSub(end, start); - dir.mul(0.5f); - - diff.setAdd(end,start); - diff.mul(0.5f); - diff.sub(center); - fAWdU.setAbs(dir); - - LLVector4a rhs; - rhs.setAdd(size, fAWdU); - - LLVector4a lhs; - lhs.setAbs(diff); - - S32 grt = lhs.greaterThan4(rhs).getComparisonMask(); - - if (grt & 0x7) - { - return false; - } - - LLVector4a f; - f.setCross3(dir, diff); - f.setAbs(f); - - LLVector4a v0; v0.mQ = _mm_shuffle_ps(size.mQ, size.mQ, _MM_SHUFFLE(3,1,0,0)); - LLVector4a v1; v1.mQ = _mm_shuffle_ps(fAWdU.mQ, fAWdU.mQ, _MM_SHUFFLE(3,2,2,1)); - lhs.setMul(v0, v1); - - v0.mQ = _mm_shuffle_ps(size.mQ, size.mQ, _MM_SHUFFLE(3,2,2,1)); - v1.mQ = _mm_shuffle_ps(fAWdU.mQ, fAWdU.mQ, _MM_SHUFFLE(3,1,0,0)); - rhs.setMul(v0, v1); - rhs.add(lhs); - - grt = f.greaterThan4(rhs).getComparisonMask(); - - return (grt & 0x7) ? false : true; -} // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. @@ -203,7 +160,7 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co LLVector4a det; det.setAllDot3(edge1, pvec); - if (det.greaterEqual4(LLVector4a::getApproximatelyZero()).getComparisonMask()) + if (det.greaterEqual4(LLVector4a::getApproximatelyZero()).getComparisonMask() & 0x7) { /* calculate distance from vert0 to ray origin */ LLVector4a tvec; @@ -213,8 +170,8 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co LLVector4a u; u.setAllDot3(tvec,pvec); - if (u.greaterEqual4(LLVector4a::getZero()).getComparisonMask() && - u.lessEqual4(det).getComparisonMask()) + if ((u.greaterEqual4(LLVector4a::getZero()).getComparisonMask() & 0x7) && + (u.lessEqual4(det).getComparisonMask() & 0x7)) { /* prepare to test V parameter */ LLVector4a qvec; @@ -230,10 +187,10 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co LLVector4a sum_uv; sum_uv.setAdd(u, v); - S32 v_gequal = v.greaterEqual4(LLVector4a::getZero()).getComparisonMask(); - S32 sum_lequal = sum_uv.lessEqual4(det).getComparisonMask(); + S32 v_gequal = v.greaterEqual4(LLVector4a::getZero()).getComparisonMask() & 0x7; + S32 sum_lequal = sum_uv.lessEqual4(det).getComparisonMask() & 0x7; - if (v_gequal && sum_lequal) + if (v_gequal && sum_lequal) { /* calculate t, scale parameters, ray intersects triangle */ LLVector4a t; @@ -338,44 +295,6 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons } } - -class LLVolumeOctreeListener : public LLOctreeListener -{ -public: - - LLVolumeOctreeListener(LLOctreeNode* node) - { - node->addListener(this); - - mBounds = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*4, 16); - mExtents = mBounds+2; - } - - ~LLVolumeOctreeListener() - { - _mm_free(mBounds); - } - - //LISTENER FUNCTIONS - virtual void handleChildAddition(const LLOctreeNode* parent, - LLOctreeNode* child) - { - new LLVolumeOctreeListener(child); - } - - virtual void handleStateChange(const LLTreeNode* node) { } - virtual void handleChildRemoval(const LLOctreeNode* parent, - const LLOctreeNode* child) { } - virtual void handleInsertion(const LLTreeNode* node, LLVolumeFace::Triangle* tri) { } - virtual void handleRemoval(const LLTreeNode* node, LLVolumeFace::Triangle* tri) { } - virtual void handleDestruction(const LLTreeNode* node) { } - - -public: - LLVector4a* mBounds; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector4a* mExtents; // extents (min, max) of this node and all its children -}; - class LLVolumeOctreeRebound : public LLOctreeTravelerDepthFirst { public: @@ -4436,113 +4355,6 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, } -class LLOctreeTriangleRayIntersect : public LLOctreeTraveler -{ -public: - const LLVolumeFace* mFace; - LLVector4a mStart; - LLVector4a mDir; - LLVector4a mEnd; - LLVector3* mIntersection; - LLVector2* mTexCoord; - LLVector3* mNormal; - LLVector3* mBinormal; - F32* mClosestT; - bool mHitFace; - - LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, - const LLVolumeFace* face, F32* closest_t, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) - : mFace(face), - mStart(start), - mDir(dir), - mIntersection(intersection), - mTexCoord(tex_coord), - mNormal(normal), - mBinormal(bi_normal), - mClosestT(closest_t), - mHitFace(false) - { - mEnd.setAdd(mStart, mDir); - } - - void traverse(const LLOctreeNode* node) - { - LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0); - - /*const F32* start = mStart.getF32(); - const F32* end = mEnd.getF32(); - const F32* center = vl->mBounds[0].getF32(); - const F32* size = vl->mBounds[1].getF32();*/ - - if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) - { - node->accept(this); - for (S32 i = 0; i < node->getChildCount(); ++i) - { - traverse(node->getChild(i)); - } - } - } - - void visit(const LLOctreeNode* node) - { - for (LLOctreeNode::const_element_iter iter = - node->getData().begin(); iter != node->getData().end(); ++iter) - { - const LLVolumeFace::Triangle* tri = *iter; - - F32 a, b, t; - - if (LLTriangleRayIntersect(*tri->mV[0], *tri->mV[1], *tri->mV[2], - mStart, mDir, a, b, t)) - { - if ((t >= 0.f) && // if hit is after start - (t <= 1.f) && // and before end - (t < *mClosestT)) // and this hit is closer - { - *mClosestT = t; - mHitFace = true; - - if (mIntersection != NULL) - { - LLVector4a intersect = mDir; - intersect.mul(*mClosestT); - intersect.add(mStart); - mIntersection->set(intersect.getF32()); - } - - - if (mTexCoord != NULL) - { - LLVector2* tc = (LLVector2*) mFace->mTexCoords; - *mTexCoord = ((1.f - a - b) * tc[tri->mIndex[0]] + - a * tc[tri->mIndex[1]] + - b * tc[tri->mIndex[2]]); - - } - - if (mNormal != NULL) - { - LLVector4* norm = (LLVector4*) mFace->mNormals; - - *mNormal = ((1.f - a - b) * LLVector3(norm[tri->mIndex[0]]) + - a * LLVector3(norm[tri->mIndex[1]]) + - b * LLVector3(norm[tri->mIndex[2]])); - } - - if (mBinormal != NULL) - { - LLVector4* binormal = (LLVector4*) mFace->mBinormals; - *mBinormal = ((1.f - a - b) * LLVector3(binormal[tri->mIndex[0]]) + - a * LLVector3(binormal[tri->mIndex[1]]) + - b * LLVector3(binormal[tri->mIndex[2]])); - } - } - } - } - } -}; S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index a40a21b405..0ae8aa19ca 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -42,6 +42,7 @@ class LLProfile; class LLPath; template class LLOctreeNode; + class LLVector4a; class LLVolumeFace; class LLVolume; @@ -1095,6 +1096,7 @@ void calc_binormal_from_triangle( BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size); BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); +BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size); BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 60e704d360..355173772b 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -38,6 +38,7 @@ #include "llviewerobjectlist.h" #include "llvovolume.h" #include "llvolume.h" +#include "llvolumeoctree.h" #include "llviewercamera.h" #include "llface.h" #include "llviewercontrol.h" @@ -2769,17 +2770,26 @@ void renderLights(LLDrawable* drawablep) } } -class LLRenderOctree : public LLOctreeTraveler +class LLRenderOctreeRaycast : public LLOctreeTriangleRayIntersect { public: + + LLRenderOctreeRaycast(const LLVector3& start, const LLVector3& end) + { + mStart.load3(start.mV); + mEnd.load3(end.mV); + mDir.setSub(mEnd, mStart); + } + void visit(const LLOctreeNode* branch) { - const LLVector3d& c = branch->getCenter(); - const LLVector3d& s = branch->getSize(); + LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) branch->getListener(0); - LLVector3 pos((F32) c.mdV[0], (F32) c.mdV[1], (F32) c.mdV[2]); - LLVector3 size((F32) s.mdV[0], (F32) s.mdV[1], (F32) s.mdV[2]); - drawBoxOutline(pos, size); + LLVector3 center, size; + center.set(vl->mBounds[0].getF32()); + size.set(vl->mBounds[1].getF32()); + + drawBoxOutline(center, size); } }; @@ -2813,7 +2823,11 @@ void renderRaycast(LLDrawable* drawablep) gGL.pushMatrix(); glMultMatrixf((F32*) vobj->getRelativeXform().mMatrix); - LLRenderOctree render; + LLVector3 start, end; + start = vobj->agentPositionToVolume(gDebugRaycastStart); + end = vobj->agentPositionToVolume(gDebugRaycastEnd); + + LLRenderOctreeRaycast render(start, end); render.traverse(face.mOctree); gGL.popMatrix(); } diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp index 2422995288..0564f02ce5 100644 --- a/indra/newview/llviewerwindow.cpp +++ b/indra/newview/llviewerwindow.cpp @@ -231,6 +231,8 @@ LLVector2 gDebugRaycastTexCoord; LLVector3 gDebugRaycastNormal; LLVector3 gDebugRaycastBinormal; S32 gDebugRaycastFaceHit; +LLVector3 gDebugRaycastStart; +LLVector3 gDebugRaycastEnd; // HUD display lines in lower right BOOL gDisplayWindInfo = FALSE; @@ -2529,7 +2531,9 @@ void LLViewerWindow::updateUI() &gDebugRaycastIntersection, &gDebugRaycastTexCoord, &gDebugRaycastNormal, - &gDebugRaycastBinormal); + &gDebugRaycastBinormal, + &gDebugRaycastStart, + &gDebugRaycastEnd); } updateMouseDelta(); @@ -3445,7 +3449,9 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de LLVector3 *intersection, LLVector2 *uv, LLVector3 *normal, - LLVector3 *binormal) + LLVector3 *binormal, + LLVector3* start, + LLVector3* end) { S32 x = mouse_x; S32 y = mouse_y; @@ -3477,7 +3483,16 @@ LLViewerObject* LLViewerWindow::cursorIntersect(S32 mouse_x, S32 mouse_y, F32 de LLVector3 mouse_world_start = mouse_point_global; LLVector3 mouse_world_end = mouse_point_global + mouse_direction_global * depth; - + if (start) + { + *start = mouse_world_start; + } + + if (end) + { + *end = mouse_world_end; + } + LLViewerObject* found = NULL; if (this_object) // check only this object diff --git a/indra/newview/llviewerwindow.h b/indra/newview/llviewerwindow.h index 410445d97f..156a1ff8ad 100644 --- a/indra/newview/llviewerwindow.h +++ b/indra/newview/llviewerwindow.h @@ -361,7 +361,9 @@ public: LLVector3 *intersection = NULL, LLVector2 *uv = NULL, LLVector3 *normal = NULL, - LLVector3 *binormal = NULL); + LLVector3 *binormal = NULL, + LLVector3* start = NULL, + LLVector3* end = NULL); // Returns a pointer to the last object hit @@ -507,6 +509,8 @@ extern LLVector2 gDebugRaycastTexCoord; extern LLVector3 gDebugRaycastNormal; extern LLVector3 gDebugRaycastBinormal; extern S32 gDebugRaycastFaceHit; +extern LLVector3 gDebugRaycastStart; +extern LLVector3 gDebugRaycastEnd; extern S32 CHAT_BAR_HEIGHT; -- cgit v1.2.3 From 26ba00b5554d20ee958693ced87b36fa7f6e3d99 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 3 Jun 2010 12:52:28 -0500 Subject: Vectorized octree and much of llspatialpartition and lldrawable. Octree driven raycast. --- indra/llmath/llcamera.cpp | 216 ++++++--------- indra/llmath/llcamera.h | 29 +- indra/llmath/lloctree.h | 239 ++++++++-------- indra/llmath/llvolume.cpp | 31 +-- indra/llmath/llvolume.h | 16 +- indra/llrender/llvertexbuffer.h | 11 + indra/llui/lllineeditor.cpp | 9 +- indra/newview/lldrawable.cpp | 163 ++++++----- indra/newview/lldrawable.h | 34 ++- indra/newview/llface.cpp | 164 +++++++---- indra/newview/llface.h | 15 +- indra/newview/llflexibleobject.cpp | 6 +- indra/newview/llflexibleobject.h | 2 +- indra/newview/llselectmgr.cpp | 16 +- indra/newview/llspatialpartition.cpp | 520 ++++++++++++++++++++++------------- indra/newview/llspatialpartition.h | 80 ++++-- indra/newview/llsurfacepatch.cpp | 6 +- indra/newview/llviewerdisplay.cpp | 7 +- indra/newview/llviewerobject.cpp | 40 ++- indra/newview/llviewerobject.h | 4 +- indra/newview/llviewerpartsim.cpp | 4 +- indra/newview/llvoavatar.cpp | 100 ++++--- indra/newview/llvoavatar.h | 10 +- indra/newview/llvopartgroup.cpp | 10 +- indra/newview/llvopartgroup.h | 2 +- indra/newview/llvosurfacepatch.cpp | 19 +- indra/newview/llvosurfacepatch.h | 2 +- indra/newview/llvotree.cpp | 17 +- indra/newview/llvotree.h | 2 +- indra/newview/llvovolume.cpp | 35 +-- indra/newview/llvovolume.h | 6 +- indra/newview/llvowater.cpp | 20 +- indra/newview/llvowater.h | 2 +- indra/newview/pipeline.cpp | 257 +++++++++++++++-- indra/newview/pipeline.h | 9 + 35 files changed, 1333 insertions(+), 770 deletions(-) diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp index 487ed6451f..6b56e4870e 100644 --- a/indra/llmath/llcamera.cpp +++ b/indra/llmath/llcamera.cpp @@ -48,10 +48,10 @@ LLCamera::LLCamera() : mPlaneCount(6), mFrustumCornerDist(0.f) { + alignPlanes(); calculateFrustumPlanes(); } - LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane) : LLCoordFrame(), mViewHeightInPixels(view_height_in_pixels), @@ -59,6 +59,7 @@ LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_p mPlaneCount(6), mFrustumCornerDist(0.f) { + alignPlanes(); mAspect = llclamp(aspect_ratio, MIN_ASPECT_RATIO, MAX_ASPECT_RATIO); mNearPlane = llclamp(near_plane, MIN_NEAR_PLANE, MAX_NEAR_PLANE); if(far_plane < 0) far_plane = DEFAULT_FAR_PLANE; @@ -67,6 +68,23 @@ LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_p setView(vertical_fov_rads); } +LLCamera::~LLCamera() +{ + +} + +const LLCamera& LLCamera::operator=(const LLCamera& rhs) +{ + memcpy(this, &rhs, sizeof(LLCamera)); + alignPlanes(); + LLVector4a::memcpyNonAliased16((F32*) mAgentPlanes, (F32*) rhs.mAgentPlanes, 4*7); + return *this; +} + +void LLCamera::alignPlanes() +{ + mAgentPlanes = (LLPlane*) LL_NEXT_ALIGNED_ADDRESS(mAgentPlaneBuffer); +} // ---------------- LLCamera::getFoo() member functions ---------------- @@ -91,8 +109,8 @@ F32 LLCamera::getMaxView() const void LLCamera::setUserClipPlane(LLPlane plane) { mPlaneCount = 7; - mAgentPlanes[6].p = plane; - mAgentPlanes[6].mask = calcPlaneMask(plane); + mAgentPlanes[6] = plane; + mPlaneMask[6] = calcPlaneMask(plane); } void LLCamera::disableUserClipPlane() @@ -164,129 +182,66 @@ size_t LLCamera::readFrustumFromBuffer(const char *buffer) // ---------------- test methods ---------------- -S32 LLCamera::AABBInFrustum(const LLVector3 ¢er, const LLVector3& radius) -{ - static const LLVector3 scaler[] = { - LLVector3(-1,-1,-1), - LLVector3( 1,-1,-1), - LLVector3(-1, 1,-1), - LLVector3( 1, 1,-1), - LLVector3(-1,-1, 1), - LLVector3( 1,-1, 1), - LLVector3(-1, 1, 1), - LLVector3( 1, 1, 1) +S32 LLCamera::AABBInFrustum(const LLVector4a ¢er, const LLVector4a& radius) +{ + static const LLVector4a scaler[] = { + LLVector4a(-1,-1,-1), + LLVector4a( 1,-1,-1), + LLVector4a(-1, 1,-1), + LLVector4a( 1, 1,-1), + LLVector4a(-1,-1, 1), + LLVector4a( 1,-1, 1), + LLVector4a(-1, 1, 1), + LLVector4a( 1, 1, 1) }; U8 mask = 0; S32 result = 2; - /*if (mFrustumCornerDist > 0.f && radius.magVecSquared() > mFrustumCornerDist * mFrustumCornerDist) - { //box is larger than frustum, check frustum quads against box planes - - static const LLVector3 dir[] = - { - LLVector3(1, 0, 0), - LLVector3(-1, 0, 0), - LLVector3(0, 1, 0), - LLVector3(0, -1, 0), - LLVector3(0, 0, 1), - LLVector3(0, 0, -1) - }; - - U32 quads[] = + for (U32 i = 0; i < mPlaneCount; i++) + { + mask = mPlaneMask[i]; + if (mask == 0xff) { - 0, 1, 2, 3, - 0, 1, 5, 4, - 2, 3, 7, 6, - 3, 0, 7, 4, - 1, 2, 6, 4, - 4, 5, 6, 7 - }; - - result = 0; - - BOOL total_inside = TRUE; - for (U32 i = 0; i < 6; i++) - { - LLVector3 p = center + radius.scaledVec(dir[i]); - F32 d = -p*dir[i]; - - for (U32 j = 0; j < 6; j++) - { //for each quad - F32 dist = mAgentFrustum[quads[j*4+0]]*dir[i] + d; - if (dist > 0) - { //at least one frustum point is outside the AABB - total_inside = FALSE; - for (U32 k = 1; k < 4; k++) - { //for each other point on quad - if ( mAgentFrustum[quads[j*4+k]]*dir[i]+d <= 0.f) - { //quad is straddling some plane of AABB - return 1; - } - } - } - else - { - for (U32 k = 1; k < 4; k++) - { - if (mAgentFrustum[quads[j*4+k]]*dir[i]+d > 0.f) - { - return 1; - } - } - } - } + continue; } - if (total_inside) + const LLPlane& p = mAgentPlanes[i]; + const LLVector4a& n = reinterpret_cast(p); + float d = p.mV[3]; + LLVector4a rscale; + rscale.setMul(radius, scaler[mask]); + + LLVector4a minp, maxp; + minp.setSub(center, rscale); + maxp.setAdd(center, rscale); + + if (n.dot3(minp) > -d) { - result = 1; + return 0; } - } - else*/ - { - for (U32 i = 0; i < mPlaneCount; i++) + + if (n.dot3(maxp) > -d) { - mask = mAgentPlanes[i].mask; - if (mask == 0xff) - { - continue; - } - LLPlane p = mAgentPlanes[i].p; - LLVector3 n = LLVector3(p); - float d = p.mV[3]; - LLVector3 rscale = radius.scaledVec(scaler[mask]); - - LLVector3 minp = center - rscale; - LLVector3 maxp = center + rscale; - - if (n * minp > -d) - { - return 0; - } - - if (n * maxp > -d) - { - result = 1; - } + result = 1; } } - return result; } -S32 LLCamera::AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& radius) -{ - static const LLVector3 scaler[] = { - LLVector3(-1,-1,-1), - LLVector3( 1,-1,-1), - LLVector3(-1, 1,-1), - LLVector3( 1, 1,-1), - LLVector3(-1,-1, 1), - LLVector3( 1,-1, 1), - LLVector3(-1, 1, 1), - LLVector3( 1, 1, 1) + +S32 LLCamera::AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius) +{ + static const LLVector4a scaler[] = { + LLVector4a(-1,-1,-1), + LLVector4a( 1,-1,-1), + LLVector4a(-1, 1,-1), + LLVector4a( 1, 1,-1), + LLVector4a(-1,-1, 1), + LLVector4a( 1,-1, 1), + LLVector4a(-1, 1, 1), + LLVector4a( 1, 1, 1) }; U8 mask = 0; @@ -299,25 +254,28 @@ S32 LLCamera::AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& r continue; } - mask = mAgentPlanes[i].mask; + mask = mPlaneMask[i]; if (mask == 0xff) { continue; } - LLPlane p = mAgentPlanes[i].p; - LLVector3 n = LLVector3(p); + + const LLPlane& p = mAgentPlanes[i]; + const LLVector4a& n = reinterpret_cast(p); float d = p.mV[3]; - LLVector3 rscale = radius.scaledVec(scaler[mask]); + LLVector4a rscale; + rscale.setMul(radius, scaler[mask]); - LLVector3 minp = center - rscale; - LLVector3 maxp = center + rscale; + LLVector4a minp, maxp; + minp.setSub(center, rscale); + maxp.setAdd(center, rscale); - if (n * minp > -d) + if (n.dot3(minp) > -d) { return 0; } - if (n * maxp > -d) + if (n.dot3(maxp) > -d) { result = 1; } @@ -447,12 +405,12 @@ int LLCamera::sphereInFrustum(const LLVector3 &sphere_center, const F32 radius) int res = 2; for (int i = 0; i < 6; i++) { - if (mAgentPlanes[i].mask == 0xff) + if (mPlaneMask[i] == 0xff) { continue; } - float d = mAgentPlanes[i].p.dist(sphere_center); + float d = mAgentPlanes[i].dist(sphere_center); if (d > radius) { @@ -644,12 +602,14 @@ void LLCamera::ignoreAgentFrustumPlane(S32 idx) return; } - mAgentPlanes[idx].mask = 0xff; - mAgentPlanes[idx].p.clearVec(); + mPlaneMask[idx] = 0xff; + mAgentPlanes[idx].clearVec(); } void LLCamera::calcAgentFrustumPlanes(LLVector3* frust) { + alignPlanes(); + for (int i = 0; i < 8; i++) { mAgentFrustum[i] = frust[i]; @@ -662,27 +622,27 @@ void LLCamera::calcAgentFrustumPlanes(LLVector3* frust) //order of planes is important, keep most likely to fail in the front of the list //near - frust[0], frust[1], frust[2] - mAgentPlanes[2].p = planeFromPoints(frust[0], frust[1], frust[2]); + mAgentPlanes[2] = planeFromPoints(frust[0], frust[1], frust[2]); //far - mAgentPlanes[5].p = planeFromPoints(frust[5], frust[4], frust[6]); + mAgentPlanes[5] = planeFromPoints(frust[5], frust[4], frust[6]); //left - mAgentPlanes[0].p = planeFromPoints(frust[4], frust[0], frust[7]); + mAgentPlanes[0] = planeFromPoints(frust[4], frust[0], frust[7]); //right - mAgentPlanes[1].p = planeFromPoints(frust[1], frust[5], frust[6]); + mAgentPlanes[1] = planeFromPoints(frust[1], frust[5], frust[6]); //top - mAgentPlanes[4].p = planeFromPoints(frust[3], frust[2], frust[6]); + mAgentPlanes[4] = planeFromPoints(frust[3], frust[2], frust[6]); //bottom - mAgentPlanes[3].p = planeFromPoints(frust[1], frust[0], frust[4]); + mAgentPlanes[3] = planeFromPoints(frust[1], frust[0], frust[4]); //cache plane octant facing mask for use in AABBInFrustum for (U32 i = 0; i < mPlaneCount; i++) { - mAgentPlanes[i].mask = calcPlaneMask(mAgentPlanes[i].p); + mPlaneMask[i] = calcPlaneMask(mAgentPlanes[i]); } } diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index d6c5f7bbb1..c40e819dcf 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -37,6 +37,7 @@ #include "llmath.h" #include "llcoordframe.h" #include "llplane.h" +#include "llvector4a.h" const F32 DEFAULT_FIELD_OF_VIEW = 60.f * DEG_TO_RAD; const F32 DEFAULT_ASPECT_RATIO = 640.f / 480.f; @@ -79,6 +80,14 @@ class LLCamera : public LLCoordFrame { public: + + LLCamera(const LLCamera& rhs) + { + *this = rhs; + } + + const LLCamera& operator=(const LLCamera& rhs); + enum { PLANE_LEFT = 0, PLANE_RIGHT = 1, @@ -129,13 +138,9 @@ private: LLPlane mWorldPlanes[PLANE_NUM]; LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; - struct frustum_plane - { - frustum_plane() : mask(0) {} - LLPlane p; - U8 mask; - }; - frustum_plane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + LLPlane* mAgentPlanes; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + U8 mAgentPlaneBuffer[sizeof(LLPlane)*8]; + U8 mPlaneMask[7]; U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in @@ -143,12 +148,14 @@ private: public: LLVector3 mAgentFrustum[8]; //8 corners of 6-plane frustum F32 mFrustumCornerDist; //distance to corner of frustum against far clip plane - LLPlane& getAgentPlane(U32 idx) { return mAgentPlanes[idx].p; } + LLPlane& getAgentPlane(U32 idx) { return mAgentPlanes[idx]; } public: LLCamera(); LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane); - virtual ~LLCamera(){} // no-op virtual destructor + virtual ~LLCamera(); + + void alignPlanes(); void setUserClipPlane(LLPlane plane); void disableUserClipPlane(); @@ -199,8 +206,8 @@ public: S32 sphereInFrustum(const LLVector3 ¢er, const F32 radius) const; S32 pointInFrustum(const LLVector3 &point) const { return sphereInFrustum(point, 0.0f); } S32 sphereInFrustumFull(const LLVector3 ¢er, const F32 radius) const { return sphereInFrustum(center, radius); } - S32 AABBInFrustum(const LLVector3 ¢er, const LLVector3& radius); - S32 AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& radius); + S32 AABBInFrustum(const LLVector4a& center, const LLVector4a& radius); + S32 AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius); //does a quick 'n dirty sphere-sphere check S32 sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius); diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 8bba12783f..ae2259dba0 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -35,6 +35,7 @@ #include "lltreenode.h" #include "v3math.h" +#include "llvector4a.h" #include #include @@ -44,7 +45,7 @@ #define OCT_ERRS LL_WARNS("OctreeErrors") #endif -#define LL_OCTREE_PARANOIA_CHECK 0 +#define LL_OCTREE_PARANOIA_CHECK 1 #if LL_DARWIN #define LL_OCTREE_MAX_CAPACITY 32 #else @@ -94,23 +95,22 @@ public: typedef LLOctreeNode oct_node; typedef LLOctreeListener oct_listener; - static const U8 OCTANT_POSITIVE_X = 0x01; - static const U8 OCTANT_POSITIVE_Y = 0x02; - static const U8 OCTANT_POSITIVE_Z = 0x04; - - LLOctreeNode( LLVector3d center, - LLVector3d size, + LLOctreeNode( const LLVector4a& center, + const LLVector4a& size, BaseType* parent, - U8 octant = 255) + S32 octant = -1) : mParent((oct_node*)parent), - mCenter(center), - mSize(size), mOctant(octant) { + mD = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*4, 16); + + mD[CENTER] = center; + mD[SIZE] = size; + updateMinMax(); - if ((mOctant == 255) && mParent) + if ((mOctant == -1) && mParent) { - mOctant = ((oct_node*) mParent)->getOctant(mCenter.mdV); + mOctant = ((oct_node*) mParent)->getOctant(mD[CENTER]); } clearChildren(); @@ -124,43 +124,30 @@ public: { delete getChild(i); } + + _mm_free(mD); } inline const BaseType* getParent() const { return mParent; } - inline void setParent(BaseType* parent) { mParent = (oct_node*) parent; } - inline const LLVector3d& getCenter() const { return mCenter; } - inline const LLVector3d& getSize() const { return mSize; } - inline void setCenter(LLVector3d center) { mCenter = center; } - inline void setSize(LLVector3d size) { mSize = size; } - inline oct_node* getNodeAt(T* data) { return getNodeAt(data->getPositionGroup(), data->getBinRadius()); } - inline U8 getOctant() const { return mOctant; } - inline void setOctant(U8 octant) { mOctant = octant; } + inline void setParent(BaseType* parent) { mParent = (oct_node*) parent; } + inline const LLVector4a& getCenter() const { return mD[CENTER]; } + inline const LLVector4a& getSize() const { return mD[SIZE]; } + inline void setCenter(const LLVector4a& center) { mD[CENTER] = center; } + inline void setSize(const LLVector4a& size) { mD[SIZE] = size; } + inline oct_node* getNodeAt(T* data) { return getNodeAt(data->getPositionGroup(), data->getBinRadius()); } + inline S32 getOctant() const { return mOctant; } + inline void setOctant(S32 octant) { mOctant = octant; } inline const oct_node* getOctParent() const { return (const oct_node*) getParent(); } inline oct_node* getOctParent() { return (oct_node*) getParent(); } - U8 getOctant(const F64 pos[]) const //get the octant pos is in + S32 getOctant(const LLVector4a& pos) const //get the octant pos is in { - U8 ret = 0; - - if (pos[0] > mCenter.mdV[0]) - { - ret |= OCTANT_POSITIVE_X; - } - if (pos[1] > mCenter.mdV[1]) - { - ret |= OCTANT_POSITIVE_Y; - } - if (pos[2] > mCenter.mdV[2]) - { - ret |= OCTANT_POSITIVE_Z; - } - - return ret; + return pos.greaterThan4(mD[CENTER]).getComparisonMask() & 0x7; } - inline bool isInside(const LLVector3d& pos, const F64& rad) const + inline bool isInside(const LLVector4a& pos, const F32& rad) const { - return rad <= mSize.mdV[0]*2.0 && isInside(pos); + return rad <= mD[SIZE][0]*2.f && isInside(pos); } inline bool isInside(T* data) const @@ -168,29 +155,27 @@ public: return isInside(data->getPositionGroup(), data->getBinRadius()); } - bool isInside(const LLVector3d& pos) const + bool isInside(const LLVector4a& pos) const { - const F64& x = pos.mdV[0]; - const F64& y = pos.mdV[1]; - const F64& z = pos.mdV[2]; - - if (x > mMax.mdV[0] || x <= mMin.mdV[0] || - y > mMax.mdV[1] || y <= mMin.mdV[1] || - z > mMax.mdV[2] || z <= mMin.mdV[2]) + S32 gt = pos.greaterThan4(mD[MAX]).getComparisonMask() & 0x7; + if (gt) { return false; } - + + S32 lt = pos.lessEqual4(mD[MIN]).getComparisonMask() & 0x7; + if (lt) + { + return false; + } + return true; } void updateMinMax() { - for (U32 i = 0; i < 3; i++) - { - mMax.mdV[i] = mCenter.mdV[i] + mSize.mdV[i]; - mMin.mdV[i] = mCenter.mdV[i] - mSize.mdV[i]; - } + mD[MAX].setAdd(mD[CENTER], mD[SIZE]); + mD[MIN].setSub(mD[CENTER], mD[SIZE]); } inline oct_listener* getOctListener(U32 index) @@ -203,34 +188,34 @@ public: return contains(xform->getBinRadius()); } - bool contains(F64 radius) + bool contains(F32 radius) { if (mParent == NULL) { //root node contains nothing return false; } - F64 size = mSize.mdV[0]; - F64 p_size = size * 2.0; + F32 size = mD[SIZE][0]; + F32 p_size = size * 2.f; - return (radius <= 0.001 && size <= 0.001) || + return (radius <= 0.001f && size <= 0.001f) || (radius <= p_size && radius > size); } - static void pushCenter(LLVector3d ¢er, const LLVector3d &size, const T* data) + static void pushCenter(LLVector4a ¢er, const LLVector4a &size, const T* data) { - const LLVector3d& pos = data->getPositionGroup(); - for (U32 i = 0; i < 3; i++) - { - if (pos.mdV[i] > center.mdV[i]) - { - center.mdV[i] += size.mdV[i]; - } - else - { - center.mdV[i] -= size.mdV[i]; - } - } + const LLVector4a& pos = data->getPositionGroup(); + + LLVector4a gt = pos.greaterThan4(center); + + LLVector4a up; + up.mQ = _mm_and_ps(size.mQ, gt.mQ); + + LLVector4a down; + down.mQ = _mm_andnot_ps(gt.mQ, size.mQ); + + center.add(up); + center.sub(down); } void accept(oct_traveler* visitor) { visitor->visit(this); } @@ -249,21 +234,21 @@ public: void accept(tree_traveler* visitor) const { visitor->visit(this); } void accept(oct_traveler* visitor) const { visitor->visit(this); } - oct_node* getNodeAt(const LLVector3d& pos, const F64& rad) + oct_node* getNodeAt(const LLVector4a& pos, const F32& rad) { LLOctreeNode* node = this; if (node->isInside(pos, rad)) { //do a quick search by octant - U8 octant = node->getOctant(pos.mdV); + S32 octant = node->getOctant(pos); BOOL keep_going = TRUE; //traverse the tree until we find a node that has no node //at the appropriate octant or is smaller than the object. //by definition, that node is the smallest node that contains // the data - while (keep_going && node->getSize().mdV[0] >= rad) + while (keep_going && node->getSize()[0] >= rad) { keep_going = FALSE; for (U32 i = 0; i < node->getChildCount() && !keep_going; i++) @@ -271,7 +256,7 @@ public: if (node->getChild(i)->getOctant() == octant) { node = node->getChild(i); - octant = node->getOctant(pos.mdV); + octant = node->getOctant(pos); keep_going = TRUE; } } @@ -289,7 +274,7 @@ public: { if (data == NULL) { - //OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; + OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; return false; } LLOctreeNode* parent = getOctParent(); @@ -299,7 +284,7 @@ public: { if (getElementCount() < LL_OCTREE_MAX_CAPACITY && (contains(data->getBinRadius()) || - (data->getBinRadius() > getSize().mdV[0] && + (data->getBinRadius() > getSize()[0] && parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY))) { //it belongs here #if LL_OCTREE_PARANOIA_CHECK @@ -330,16 +315,22 @@ public: } //it's here, but no kids are in the right place, make a new kid - LLVector3d center(getCenter()); - LLVector3d size(getSize()*0.5); + LLVector4a center = getCenter(); + LLVector4a size = getSize(); + size.mul(0.5f); //push center in direction of data LLOctreeNode::pushCenter(center, size, data); // handle case where floating point number gets too small - if( llabs(center.mdV[0] - getCenter().mdV[0]) < F_APPROXIMATELY_ZERO && - llabs(center.mdV[1] - getCenter().mdV[1]) < F_APPROXIMATELY_ZERO && - llabs(center.mdV[2] - getCenter().mdV[2]) < F_APPROXIMATELY_ZERO) + LLVector4a val; + val.setSub(center, getCenter()); + val.setAbs(val); + LLVector4a app_zero; + app_zero.mQ = F_APPROXIMATELY_ZERO_4A; + S32 lt = val.lessThan4(app_zero).getComparisonMask() & 0x7; + + if( lt == 0x7 ) { mData.insert(data); BaseType::insert(data); @@ -357,7 +348,7 @@ public: //make sure no existing node matches this position for (U32 i = 0; i < getChildCount(); i++) { - if (mChild[i]->getCenter() == center) + if (mChild[i]->getCenter().equal3(center)) { OCT_ERRS << "Octree detected duplicate child center and gave up." << llendl; return false; @@ -375,7 +366,7 @@ public: else { //it's not in here, give it to the root - //OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl; + OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl; oct_node* node = this; @@ -482,13 +473,19 @@ public: void addChild(oct_node* child, BOOL silent = FALSE) { #if LL_OCTREE_PARANOIA_CHECK + + if (child->getSize().equal3(getSize())) + { + OCT_ERRS << "Child size is same as parent size!" << llendl; + } + for (U32 i = 0; i < getChildCount(); i++) { - if(mChild[i]->getSize() != child->getSize()) + if(!mChild[i]->getSize().equal3(child->getSize())) { OCT_ERRS <<"Invalid octree child size." << llendl; } - if (mChild[i]->getCenter() == child->getCenter()) + if (mChild[i]->getCenter().equal3(child->getCenter())) { OCT_ERRS <<"Duplicate octree child position." << llendl; } @@ -513,7 +510,7 @@ public: } } - void removeChild(U8 index, BOOL destroy = FALSE) + void removeChild(S32 index, BOOL destroy = FALSE) { for (U32 i = 0; i < this->getListenerCount(); i++) { @@ -554,18 +551,26 @@ public: } } - //OCT_ERRS << "Octree failed to delete requested child." << llendl; + OCT_ERRS << "Octree failed to delete requested child." << llendl; } protected: + typedef enum + { + CENTER = 0, + SIZE = 1, + MAX = 2, + MIN = 3 + } eDName; + + LLVector4a* mD; + + oct_node* mParent; + S32 mOctant; + child_list mChild; element_list mData; - oct_node* mParent; - LLVector3d mCenter; - LLVector3d mSize; - LLVector3d mMax; - LLVector3d mMin; - U8 mOctant; + }; //just like a regular node, except it might expand on insert and compress on balance @@ -576,9 +581,9 @@ public: typedef LLOctreeNode BaseType; typedef LLOctreeNode oct_node; - LLOctreeRoot( LLVector3d center, - LLVector3d size, - BaseType* parent) + LLOctreeRoot(const LLVector4a& center, + const LLVector4a& size, + BaseType* parent) : BaseType(center, size, parent) { } @@ -619,28 +624,33 @@ public: { if (data == NULL) { - //OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl; + OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl; return false; } if (data->getBinRadius() > 4096.0) { - //OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl; + OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl; return false; } - const F64 MAX_MAG = 1024.0*1024.0; + LLVector4a MAX_MAG; + MAX_MAG.splat(1024.f*1024.f); + + const LLVector4a& v = data->getPositionGroup(); + + LLVector4a val; + val.setSub(v, mD[CENTER]); + val.setAbs(val); + S32 lt = val.lessThan4(MAX_MAG).getComparisonMask() & 0x7; - const LLVector3d& v = data->getPositionGroup(); - if (!(fabs(v.mdV[0]-this->mCenter.mdV[0]) < MAX_MAG && - fabs(v.mdV[1]-this->mCenter.mdV[1]) < MAX_MAG && - fabs(v.mdV[2]-this->mCenter.mdV[2]) < MAX_MAG)) + if (lt != 0x7) { - //OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl; + OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl; return false; } - if (this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())) + if (this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())) { //we got it, just act like a branch oct_node* node = getNodeAt(data); @@ -656,31 +666,34 @@ public: else if (this->getChildCount() == 0) { //first object being added, just wrap it up - while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) + while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) { - LLVector3d center, size; + LLVector4a center, size; center = this->getCenter(); size = this->getSize(); LLOctreeNode::pushCenter(center, size, data); this->setCenter(center); - this->setSize(size*2); + size.mul(2.f); + this->setSize(size); this->updateMinMax(); } LLOctreeNode::insert(data); } else { - while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) + while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) { //the data is outside the root node, we need to grow - LLVector3d center(this->getCenter()); - LLVector3d size(this->getSize()); + LLVector4a center(this->getCenter()); + LLVector4a size(this->getSize()); //expand this node - LLVector3d newcenter(center); + LLVector4a newcenter(center); LLOctreeNode::pushCenter(newcenter, size, data); this->setCenter(newcenter); - this->setSize(size*2); + LLVector4a size2 = size; + size2.mul(2.f); + this->setSize(size2); this->updateMinMax(); //copy our children to a new branch diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index c4172de651..72833c019f 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -295,7 +295,7 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons } } -class LLVolumeOctreeRebound : public LLOctreeTravelerDepthFirst +class LLVolumeOctreeRebound : public LLOctreeTravelerDepthFirst { public: const LLVolumeFace* mFace; @@ -305,7 +305,7 @@ public: mFace = face; } - virtual void visit(const LLOctreeNode* branch) + virtual void visit(const LLOctreeNode* branch) { LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0); @@ -314,12 +314,12 @@ public: if (branch->getElementCount() != 0) { - const LLVolumeFace::Triangle* tri = *(branch->getData().begin()); + const LLVolumeTriangle* tri = *(branch->getData().begin()); min = *(tri->mV[0]); max = *(tri->mV[0]); - for (LLOctreeNode::const_element_iter iter = + for (LLOctreeNode::const_element_iter iter = branch->getData().begin(); iter != branch->getData().end(); ++iter) { //stretch by triangles in node @@ -4394,7 +4394,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en LLVector4a box_size; box_size.setSub(face.mExtents[1], face.mExtents[0]); - if (LLLineSegmentBoxIntersect(start.getF32(), end.getF32(), box_center.getF32(), box_size.getF32())) + if (LLLineSegmentBoxIntersect(start, end, box_center, box_size)) { if (bi_normal != NULL) // if the caller wants binormals, we may need to generate them { @@ -5418,12 +5418,17 @@ void LLVolumeFace::optimize(F32 angle_cutoff) void LLVolumeFace::createOctree() { - mOctree = new LLOctreeRoot(LLVector3d(0,0,0), LLVector3d(1,1,1), NULL); + LLVector4a center; + LLVector4a size; + center.splat(0.f); + size.splat(1.f); + + mOctree = new LLOctreeRoot(center, size, NULL); new LLVolumeOctreeListener(mOctree); for (U32 i = 0; i < mNumIndices; i+= 3) { - Triangle* tri = new Triangle(); + LLPointer tri = new LLVolumeTriangle(); const LLVector4a& v0 = mPositions[mIndices[i]]; const LLVector4a& v1 = mPositions[mIndices[i+1]]; @@ -5449,8 +5454,7 @@ void LLVolumeFace::createOctree() center.setAdd(min, max); center.mul(0.5f); - - tri->mPositionGroup.setVec(center[0], center[1], center[2]); + *tri->mPositionGroup = center; LLVector4a size; size.setSub(max,min); @@ -5464,15 +5468,6 @@ void LLVolumeFace::createOctree() rebound.traverse(mOctree); } -const LLVector3d& LLVolumeFace::Triangle::getPositionGroup() const -{ - return mPositionGroup; -} - -const F64& LLVolumeFace::Triangle::getBinRadius() const -{ - return mRadius; -} void LLVolumeFace::swapData(LLVolumeFace& rhs) { diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 0ae8aa19ca..c49d1c650d 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -46,6 +46,7 @@ template class LLOctreeNode; class LLVector4a; class LLVolumeFace; class LLVolume; +class LLVolumeTriangle; #include "lldarray.h" #include "lluuid.h" @@ -918,20 +919,7 @@ public: // mWeights.size() should be empty or match mVertices.size() LLVector4a* mWeights; - class Triangle : public LLRefCount - { - public: - const LLVector4a* mV[3]; - U16 mIndex[3]; - - LLVector3d mPositionGroup; - F64 mRadius; - - virtual const LLVector3d& getPositionGroup() const; - virtual const F64& getBinRadius() const; - }; - - LLOctreeNode* mOctree; + LLOctreeNode* mOctree; private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index 47146a5ec4..715309b64a 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -79,6 +79,17 @@ protected: class LLVertexBuffer : public LLRefCount { public: + LLVertexBuffer(const LLVertexBuffer& rhs) + { + *this = rhs; + } + + const LLVertexBuffer& operator=(const LLVertexBuffer& rhs) + { + llerrs << "Illegal operation!" << llendl; + return *this; + } + static LLVBOPool sStreamVBOPool; static LLVBOPool sDynamicVBOPool; static LLVBOPool sStreamIBOPool; diff --git a/indra/llui/lllineeditor.cpp b/indra/llui/lllineeditor.cpp index 45f9de8e8d..c0cc294c02 100644 --- a/indra/llui/lllineeditor.cpp +++ b/indra/llui/lllineeditor.cpp @@ -377,7 +377,14 @@ void LLLineEditor::setText(const LLStringExplicit &new_text) setCursor(llmin((S32)mText.length(), getCursor())); // Set current history line to end of history. - mCurrentHistoryLine = mLineHistory.end() - 1; + if (mLineHistory.empty()) + { + mCurrentHistoryLine = mLineHistory.end(); + } + else + { + mCurrentHistoryLine = mLineHistory.end() - 1; + } mPrevText = mText; } diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp index 03eee12707..04e433dcfd 100644 --- a/indra/newview/lldrawable.cpp +++ b/indra/newview/lldrawable.cpp @@ -41,6 +41,7 @@ #include "llcriticaldamp.h" #include "llface.h" #include "lllightconstants.h" +#include "llmatrix4a.h" #include "llsky.h" #include "llsurfacepatch.h" #include "llviewercamera.h" @@ -91,8 +92,12 @@ void LLDrawable::incrementVisible() sCurVisible++; sCurPixelAngle = (F32) gViewerWindow->getWindowHeightRaw()/LLViewerCamera::getInstance()->getView(); } + void LLDrawable::init() { + mExtents = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*3, 32); + mPositionGroup = mExtents + 2; + // mXform mParent = NULL; mRenderType = 0; @@ -121,6 +126,11 @@ void LLDrawable::initClass() void LLDrawable::destroy() { + if (gDebugGL) + { + gPipeline.checkReferences(this); + } + if (isDead()) { sNumZombieDrawables--; @@ -139,6 +149,9 @@ void LLDrawable::destroy() { llinfos << "- Zombie drawables: " << sNumZombieDrawables << llendl; }*/ + + _mm_free(mExtents); + mExtents = mPositionGroup = NULL; } void LLDrawable::markDead() @@ -714,12 +727,14 @@ void LLDrawable::updateDistance(LLCamera& camera, bool force_update) LLFace* facep = getFace(i); if (force_update || facep->getPoolType() == LLDrawPool::POOL_ALPHA) { - LLVector3 box = (facep->mExtents[1] - facep->mExtents[0]) * 0.25f; + LLVector4a box; + box.setSub(facep->mExtents[1], facep->mExtents[0]); + box.mul(0.25f); LLVector3 v = (facep->mCenterLocal-camera.getOrigin()); const LLVector3& at = camera.getAtAxis(); for (U32 j = 0; j < 3; j++) { - v.mV[j] -= box.mV[j] * at.mV[j]; + v.mV[j] -= box[j] * at.mV[j]; } facep->mDistance = v * camera.getAtAxis(); } @@ -728,7 +743,7 @@ void LLDrawable::updateDistance(LLCamera& camera, bool force_update) } else { - pos = LLVector3(getPositionGroup()); + pos = LLVector3(getPositionGroup().getF32()); } pos -= camera.getOrigin(); @@ -777,7 +792,7 @@ BOOL LLDrawable::updateGeometry(BOOL priority) return res; } -void LLDrawable::shiftPos(const LLVector3 &shift_vector) +void LLDrawable::shiftPos(const LLVector4a &shift_vector) { if (isDead()) { @@ -809,9 +824,9 @@ void LLDrawable::shiftPos(const LLVector3 &shift_vector) for (S32 i = 0; i < getNumFaces(); i++) { LLFace *facep = getFace(i); - facep->mCenterAgent += shift_vector; - facep->mExtents[0] += shift_vector; - facep->mExtents[1] += shift_vector; + facep->mCenterAgent += LLVector3(shift_vector.getF32()); + facep->mExtents[0].add(shift_vector); + facep->mExtents[1].add(shift_vector); if (!volume && facep->hasGeometry()) { @@ -820,9 +835,9 @@ void LLDrawable::shiftPos(const LLVector3 &shift_vector) } } - mExtents[0] += shift_vector; - mExtents[1] += shift_vector; - mPositionGroup += LLVector3d(shift_vector); + mExtents[0].add(shift_vector); + mExtents[1].add(shift_vector); + mPositionGroup->add(shift_vector); } else if (mSpatialBridge) { @@ -830,9 +845,9 @@ void LLDrawable::shiftPos(const LLVector3 &shift_vector) } else if (isAvatar()) { - mExtents[0] += shift_vector; - mExtents[1] += shift_vector; - mPositionGroup += LLVector3d(shift_vector); + mExtents[0].add(shift_vector); + mExtents[1].add(shift_vector); + mPositionGroup->add(shift_vector); } mVObjp->onShift(shift_vector); @@ -844,21 +859,26 @@ const LLVector3& LLDrawable::getBounds(LLVector3& min, LLVector3& max) const return mXform.getPositionW(); } -const LLVector3* LLDrawable::getSpatialExtents() const +const LLVector4a* LLDrawable::getSpatialExtents() const { return mExtents; } -void LLDrawable::setSpatialExtents(LLVector3 min, LLVector3 max) +void LLDrawable::setSpatialExtents(const LLVector3& min, const LLVector3& max) +{ + mExtents[0].load3(min.mV); + mExtents[1].load3(max.mV); +} + +void LLDrawable::setSpatialExtents(const LLVector4a& min, const LLVector4a& max) { - LLVector3 size = max - min; mExtents[0] = min; - mExtents[1] = max; + mExtents[1] = max; } -void LLDrawable::setPositionGroup(const LLVector3d& pos) +void LLDrawable::setPositionGroup(const LLVector4a& pos) { - mPositionGroup.setVec(pos); + *mPositionGroup = pos; } void LLDrawable::updateSpatialExtents() @@ -872,7 +892,7 @@ void LLDrawable::updateSpatialExtents() if (mSpatialBridge.notNull()) { - mPositionGroup.setVec(0,0,0); + mPositionGroup->splat(0.f); } } @@ -1083,59 +1103,72 @@ void LLSpatialBridge::updateSpatialExtents() root->rebound(); } - LLXformMatrix* mat = mDrawable->getXform(); - - LLVector3 offset = root->mBounds[0]; - LLVector3 size = root->mBounds[1]; + LLVector4a offset; + LLVector4a size = root->mBounds[1]; - LLVector3 center = LLVector3(0,0,0) * mat->getWorldMatrix(); - LLQuaternion rotation = LLQuaternion(mat->getWorldMatrix()); + //VECTORIZE THIS + LLMatrix4a mat; + mat.loadu(mDrawable->getXform()->getWorldMatrix()); + + LLVector4a t; + t.splat(0.f); + + LLVector4a center; + mat.affineTransform(t, center); - offset *= rotation; - center += offset; + mat.rotate(root->mBounds[0], offset); + center.add(offset); - LLVector3 v[4]; + LLVector4a v[4]; + //get 4 corners of bounding box - v[0] = (size * rotation); - v[1] = (LLVector3(-size.mV[0], -size.mV[1], size.mV[2]) * rotation); - v[2] = (LLVector3(size.mV[0], -size.mV[1], -size.mV[2]) * rotation); - v[3] = (LLVector3(-size.mV[0], size.mV[1], -size.mV[2]) * rotation); + mat.rotate(size,v[0]); - LLVector3& newMin = mExtents[0]; - LLVector3& newMax = mExtents[1]; + LLVector4a scale; + + scale.set(-1.f, -1.f, 1.f); + scale.mul(size); + mat.rotate(scale, v[1]); + + scale.set(1.f, -1.f, -1.f); + scale.mul(size); + mat.rotate(scale, v[2]); + + scale.set(-1.f, 1.f, -1.f); + scale.mul(size); + mat.rotate(scale, v[3]); + + + LLVector4a& newMin = mExtents[0]; + LLVector4a& newMax = mExtents[1]; newMin = newMax = center; for (U32 i = 0; i < 4; i++) { - for (U32 j = 0; j < 3; j++) - { - F32 delta = fabsf(v[i].mV[j]); - F32 min = center.mV[j] - delta; - F32 max = center.mV[j] + delta; - - if (min < newMin.mV[j]) - { - newMin.mV[j] = min; - } - - if (max > newMax.mV[j]) - { - newMax.mV[j] = max; - } - } - } + LLVector4a delta; + delta.setAbs(v[i]); + LLVector4a min; + min.setSub(center, delta); + LLVector4a max; + max.setAdd(center, delta); - LLVector3 diagonal = newMax - newMin; - mRadius = diagonal.magVec() * 0.5f; + newMin.setMin(min); + newMax.setMax(max); + } + + LLVector4a diagonal; + diagonal.setSub(newMax, newMin); + mRadius = diagonal.length3() * 0.5f; - mPositionGroup.setVec((newMin + newMax) * 0.5f); + mPositionGroup->setAdd(newMin,newMax); + mPositionGroup->mul(0.5f); updateBinRadius(); } void LLSpatialBridge::updateBinRadius() { - mBinRadius = llmin((F32) mOctree->getSize().mdV[0]*0.5f, 256.f); + mBinRadius = llmin( mOctree->getSize()[0]*0.5f, 256.f); } LLCamera LLSpatialBridge::transformCamera(LLCamera& camera) @@ -1276,8 +1309,12 @@ void LLSpatialBridge::setVisible(LLCamera& camera_in, std::vector* LLSpatialGroup* group = (LLSpatialGroup*) mOctree->getListener(0); group->rebound(); - LLVector3 center = (mExtents[0] + mExtents[1]) * 0.5f; - LLVector3 size = (mExtents[1]-mExtents[0]) * 0.5f; + LLVector4a center; + center.setAdd(mExtents[0], mExtents[1]); + center.mul(0.5f); + LLVector4a size; + size.setSub(mExtents[1], mExtents[0]); + size.mul(0.5f); if ((LLPipeline::sShadowRender && camera_in.AABBInFrustum(center, size)) || LLPipeline::sImpostorRender || @@ -1389,11 +1426,11 @@ BOOL LLSpatialBridge::updateMove() return TRUE; } -void LLSpatialBridge::shiftPos(const LLVector3& vec) +void LLSpatialBridge::shiftPos(const LLVector4a& vec) { - mExtents[0] += vec; - mExtents[1] += vec; - mPositionGroup += LLVector3d(vec); + mExtents[0].add(vec); + mExtents[1].add(vec); + mPositionGroup->add(vec); } void LLSpatialBridge::cleanupReferences() @@ -1511,7 +1548,7 @@ F32 LLHUDBridge::calcPixelArea(LLSpatialGroup* group, LLCamera& camera) } -void LLHUDBridge::shiftPos(const LLVector3& vec) +void LLHUDBridge::shiftPos(const LLVector4a& vec) { //don't shift hud bridges on region crossing } diff --git a/indra/newview/lldrawable.h b/indra/newview/lldrawable.h index c3c6cbe12f..811ff1801b 100644 --- a/indra/newview/lldrawable.h +++ b/indra/newview/lldrawable.h @@ -41,6 +41,7 @@ #include "v4math.h" #include "m4math.h" #include "v4coloru.h" +#include "llvector4a.h" #include "llquaternion.h" #include "xform.h" #include "llmemtype.h" @@ -66,6 +67,17 @@ const U32 SILHOUETTE_HIGHLIGHT = 0; class LLDrawable : public LLRefCount { public: + LLDrawable(const LLDrawable& rhs) + { + *this = rhs; + } + + const LLDrawable& operator=(const LLDrawable& rhs) + { + llerrs << "Illegal operation!" << llendl; + return *this; + } + static void initClass(); LLDrawable() { init(); } @@ -94,14 +106,14 @@ public: const LLVector3& getPosition() const { return mXform.getPosition(); } const LLVector3& getWorldPosition() const { return mXform.getPositionW(); } const LLVector3 getPositionAgent() const; - const LLVector3d& getPositionGroup() const { return mPositionGroup; } + const LLVector4a& getPositionGroup() const { return *mPositionGroup; } const LLVector3& getScale() const { return mCurrentScale; } void setScale(const LLVector3& scale) { mCurrentScale = scale; } const LLQuaternion& getWorldRotation() const { return mXform.getWorldRotation(); } const LLQuaternion& getRotation() const { return mXform.getRotation(); } F32 getIntensity() const { return llmin(mXform.getScale().mV[0], 4.f); } S32 getLOD() const { return mVObjp ? mVObjp->getLOD() : 1; } - F64 getBinRadius() const { return mBinRadius; } + F32 getBinRadius() const { return mBinRadius; } void getMinMax(LLVector3& min,LLVector3& max) const { mXform.getMinMax(min,max); } LLXformMatrix* getXform() { return &mXform; } @@ -155,7 +167,7 @@ public: void updateSpecialHoverCursor(BOOL enabled); - virtual void shiftPos(const LLVector3 &shift_vector); + virtual void shiftPos(const LLVector4a &shift_vector); S32 getGeneration() const { return mGeneration; } @@ -173,11 +185,12 @@ public: const LLVector3& getBounds(LLVector3& min, LLVector3& max) const; virtual void updateSpatialExtents(); virtual void updateBinRadius(); - const LLVector3* getSpatialExtents() const; - void setSpatialExtents(LLVector3 min, LLVector3 max); - void setPositionGroup(const LLVector3d& pos); - void setPositionGroup(const LLVector3& pos) { setPositionGroup(LLVector3d(pos)); } + const LLVector4a* getSpatialExtents() const; + void setSpatialExtents(const LLVector3& min, const LLVector3& max); + void setSpatialExtents(const LLVector4a& min, const LLVector4a& max); + void setPositionGroup(const LLVector4a& pos); + void setRenderType(S32 type) { mRenderType = type; } BOOL isRenderType(S32 type) { return mRenderType == type; } S32 getRenderType() { return mRenderType; } @@ -288,6 +301,9 @@ public: private: typedef std::vector face_list_t; + LLVector4a* mExtents; + LLVector4a* mPositionGroup; + U32 mState; S32 mRenderType; LLPointer mVObjp; @@ -297,9 +313,7 @@ private: mutable U32 mVisible; F32 mRadius; - LLVector3 mExtents[2]; - LLVector3d mPositionGroup; - F64 mBinRadius; + F32 mBinRadius; S32 mGeneration; LLVector3 mCurrentScale; diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 98a50ca4e7..b8407a6f5b 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -152,6 +152,8 @@ void cylindricalProjection(LLVector2 &tc, const LLVector4a& normal, const LLVect void LLFace::init(LLDrawable* drawablep, LLViewerObject* objp) { + mExtents = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*2, 16); + mLastUpdateTime = gFrameTimeSeconds; mLastMoveTime = 0.f; mVSize = 0.f; @@ -206,6 +208,12 @@ static LLFastTimer::DeclareTimer FTM_FACE_DEREF("Deref"); void LLFace::destroy() { LLFastTimer t(FTM_DESTROY_FACE); + + if (gDebugGL) + { + gPipeline.checkReferences(this); + } + if(mTexture.notNull()) { LLFastTimer t(FTM_DESTROY_TEXTURE); @@ -260,6 +268,9 @@ void LLFace::destroy() mDrawablep = NULL; mVObjp = NULL; } + + _mm_free(mExtents); + mExtents = NULL; } @@ -725,13 +736,20 @@ static void xform(LLVector2 &tex_coord, F32 cosAng, F32 sinAng, F32 offS, F32 of BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert, const LLMatrix3& mat_normal, BOOL global_volume) + const LLMatrix4& mat_vert_in, const LLMatrix3& mat_normal_in, BOOL global_volume) { LLMemType mt1(LLMemType::MTYPE_DRAWABLE); //get bounding box if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION)) { + //VECTORIZE THIS + LLMatrix4a mat_vert; + mat_vert.loadu(mat_vert_in); + + LLMatrix4a mat_normal; + mat_normal.loadu(mat_normal_in); + //if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME)) //{ //vertex buffer no longer valid // mVertexBuffer = NULL; @@ -739,82 +757,96 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, //} //VECTORIZE THIS - LLVector3 min,max; + LLVector4a min,max; if (f >= volume.getNumVolumeFaces()) { - min = LLVector3(-1,-1,-1); - max = LLVector3(1,1,1); - } - else - { - const LLVolumeFace &face = volume.getVolumeFace(f); - min.set(face.mExtents[0].getF32()); - max.set(face.mExtents[1].getF32()); + llwarns << "Generating bounding box for invalid face index!" << llendl; + f = 0; } + const LLVolumeFace &face = volume.getVolumeFace(f); + min = face.mExtents[0]; + max = face.mExtents[1]; + + //min, max are in volume space, convert to drawable render space - LLVector3 center = ((min + max) * 0.5f)*mat_vert; - LLVector3 size = ((max-min) * 0.5f); + LLVector4a center; + LLVector4a t; + t.setAdd(min, max); + t.mul(0.5f); + mat_vert.affineTransform(t, center); + LLVector4a size; + size.setSub(max, min); + size.mul(0.5f); + if (!global_volume) { - size.scaleVec(mDrawablep->getVObj()->getScale()); + //VECTORIZE THIS + LLVector4a scale; + scale.load3(mDrawablep->getVObj()->getScale().mV); + size.mul(scale); } - LLMatrix3 mat = mat_normal; - LLVector3 x = mat.getFwdRow(); - LLVector3 y = mat.getLeftRow(); - LLVector3 z = mat.getUpRow(); - x.normVec(); - y.normVec(); - z.normVec(); + mat_normal.mMatrix[0].normalize3fast(); + mat_normal.mMatrix[1].normalize3fast(); + mat_normal.mMatrix[2].normalize3fast(); + + LLVector4a v[4]; - mat.setRows(x,y,z); + //get 4 corners of bounding box + mat_normal.rotate(size,v[0]); - LLQuaternion rotation = LLQuaternion(mat); + //VECTORIZE THIS + LLVector4a scale; - LLVector3 v[4]; - //get 4 corners of bounding box - v[0] = (size * rotation); - v[1] = (LLVector3(-size.mV[0], -size.mV[1], size.mV[2]) * rotation); - v[2] = (LLVector3(size.mV[0], -size.mV[1], -size.mV[2]) * rotation); - v[3] = (LLVector3(-size.mV[0], size.mV[1], -size.mV[2]) * rotation); + scale.set(-1.f, -1.f, 1.f); + scale.mul(size); + mat_normal.rotate(scale, v[1]); + + scale.set(1.f, -1.f, -1.f); + scale.mul(size); + mat_normal.rotate(scale, v[2]); + + scale.set(-1.f, 1.f, -1.f); + scale.mul(size); + mat_normal.rotate(scale, v[3]); - LLVector3& newMin = mExtents[0]; - LLVector3& newMax = mExtents[1]; + LLVector4a& newMin = mExtents[0]; + LLVector4a& newMax = mExtents[1]; newMin = newMax = center; for (U32 i = 0; i < 4; i++) { - for (U32 j = 0; j < 3; j++) - { - F32 delta = fabsf(v[i].mV[j]); - F32 min = center.mV[j] - delta; - F32 max = center.mV[j] + delta; - - if (min < newMin.mV[j]) - { - newMin.mV[j] = min; - } - - if (max > newMax.mV[j]) - { - newMax.mV[j] = max; - } - } + LLVector4a delta; + delta.setAbs(v[i]); + LLVector4a min; + min.setSub(center, delta); + LLVector4a max; + max.setAdd(center, delta); + + newMin.setMin(min); + newMax.setMax(max); } if (!mDrawablep->isActive()) { - LLVector3 offset = mDrawablep->getRegion()->getOriginAgent(); - newMin += offset; - newMax += offset; + LLVector4a offset; + offset.load3(mDrawablep->getRegion()->getOriginAgent().mV); + newMin.add(offset); + newMax.add(offset); } - mCenterLocal = (newMin+newMax)*0.5f; - LLVector3 tmp = (newMin - newMax) ; - mBoundingSphereRadius = tmp.length() * 0.5f ; + t.setAdd(newMin, newMax); + t.mul(0.5f); + + //VECTORIZE THIS + mCenterLocal.set(t.getF32()); + + t.setSub(newMax,newMin); + t.mul(0.5f); + mBoundingSphereRadius = t.length3(); updateCenterAgent(); } @@ -1647,20 +1679,31 @@ F32 LLFace::getTextureVirtualSize() BOOL LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius) { + //VECTORIZE THIS //get area of circle around face - LLVector3 center = getPositionAgent(); - LLVector3 size = (mExtents[1] - mExtents[0]) * 0.5f; + LLVector4a center; + center.load3(getPositionAgent().mV); + LLVector4a size; + size.setSub(mExtents[1], mExtents[0]); + size.mul(0.5f); + LLViewerCamera* camera = LLViewerCamera::getInstance(); - F32 size_squared = size.lengthSquared() ; - LLVector3 lookAt = center - camera->getOrigin(); - F32 dist = lookAt.normVec() ; + F32 size_squared = size.dot3(size); + LLVector4a lookAt; + LLVector4a t; + t.load3(camera->getOrigin().mV); + lookAt.setSub(center, t); + F32 dist = lookAt.length3(); + lookAt.normalize3fast() ; //get area of circle around node F32 app_angle = atanf(fsqrtf(size_squared) / dist); radius = app_angle*LLDrawable::sCurPixelAngle; mPixelArea = radius*radius * 3.14159f; - cos_angle_to_view_dir = lookAt * camera->getXAxis() ; + LLVector4a x_axis; + x_axis.load3(camera->getXAxis().mV); + cos_angle_to_view_dir = lookAt.dot3(x_axis); //if has media, check if the face is out of the view frustum. if(hasMedia()) @@ -1676,7 +1719,10 @@ BOOL LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius) } else { - if(dist * dist * (lookAt - camera->getXAxis()).lengthSquared() < size_squared) + LLVector4a d; + d.setSub(lookAt, x_axis); + + if(dist * dist * d.dot3(d) < size_squared) { cos_angle_to_view_dir = 1.0f ; } diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 48909d7895..0cd472a2fd 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -65,6 +65,17 @@ class LLFace { public: + LLFace(const LLFace& rhs) + { + *this = rhs; + } + + const LLFace& operator=(const LLFace& rhs) + { + llerrs << "Illegal operation!" << llendl; + return *this; + } + enum EMasks { LIGHT = 0x0001, @@ -221,7 +232,9 @@ public: LLVector3 mCenterLocal; LLVector3 mCenterAgent; - LLVector3 mExtents[2]; + + LLVector4a* mExtents; + LLVector2 mTexExtents[2]; F32 mDistance; LLPointer mVertexBuffer; diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp index 561965d021..8be4e34748 100644 --- a/indra/newview/llflexibleobject.cpp +++ b/indra/newview/llflexibleobject.cpp @@ -97,11 +97,13 @@ void LLVolumeImplFlexible::onParameterChanged(U16 param_type, LLNetworkData *dat } } -void LLVolumeImplFlexible::onShift(const LLVector3 &shift_vector) +void LLVolumeImplFlexible::onShift(const LLVector4a &shift_vector) { + //VECTORIZE THIS + LLVector3 shift(shift_vector.getF32()); for (int section = 0; section < (1<getRenderRotation(); LLVector3 first_grid_obj_pos = first_grid_object->getRenderPosition(); - LLVector3 min_extents(F32_MAX, F32_MAX, F32_MAX); - LLVector3 max_extents(-F32_MAX, -F32_MAX, -F32_MAX); + LLVector4a min_extents(F32_MAX); + LLVector4a max_extents(-F32_MAX); BOOL grid_changed = FALSE; for (LLObjectSelection::iterator iter = mGridObjects.begin(); iter != mGridObjects.end(); ++iter) @@ -1110,7 +1110,7 @@ void LLSelectMgr::getGrid(LLVector3& origin, LLQuaternion &rotation, LLVector3 & LLDrawable* drawable = object->mDrawable; if (drawable) { - const LLVector3* ext = drawable->getSpatialExtents(); + const LLVector4a* ext = drawable->getSpatialExtents(); update_min_max(min_extents, max_extents, ext[0]); update_min_max(min_extents, max_extents, ext[1]); grid_changed = TRUE; @@ -1118,13 +1118,19 @@ void LLSelectMgr::getGrid(LLVector3& origin, LLQuaternion &rotation, LLVector3 & } if (grid_changed) { - mGridOrigin = lerp(min_extents, max_extents, 0.5f); + LLVector4a center, size; + center.setAdd(min_extents, max_extents); + center.mul(0.5f); + size.setSub(max_extents, min_extents); + size.mul(0.5f); + + mGridOrigin.set(center.getF32()); LLDrawable* drawable = first_grid_object->mDrawable; if (drawable && drawable->isActive()) { mGridOrigin = mGridOrigin * first_grid_object->getRenderMatrix(); } - mGridScale = (max_extents - min_extents) * 0.5f; + mGridScale.set(size.getF32()); } } else // GRID_MODE_WORLD or just plain default diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 355173772b..3cf0138303 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -102,23 +102,6 @@ void sg_assert(BOOL expr) #endif } -#if LL_DEBUG -void validate_drawable(LLDrawable* drawablep) -{ - F64 rad = drawablep->getBinRadius(); - const LLVector3* ext = drawablep->getSpatialExtents(); - - if (rad < 0 || rad > 4096 || - (ext[1]-ext[0]).magVec() > 4096) - { - llwarns << "Invalid drawable found in octree." << llendl; - } -} -#else -#define validate_drawable(x) -#endif - - S32 AABBSphereIntersect(const LLVector3& min, const LLVector3& max, const LLVector3 &origin, const F32 &rad) { return AABBSphereIntersectR2(min, max, origin, rad*rad); @@ -158,6 +141,55 @@ S32 AABBSphereIntersectR2(const LLVector3& min, const LLVector3& max, const LLVe } +S32 AABBSphereIntersect(const LLVector4a& min, const LLVector4a& max, const LLVector3 &origin, const F32 &rad) +{ + return AABBSphereIntersectR2(min, max, origin, rad*rad); +} + +S32 AABBSphereIntersectR2(const LLVector4a& min, const LLVector4a& max, const LLVector3 &origin, const F32 &r) +{ + F32 d = 0.f; + F32 t; + + LLVector4a origina; + origina.load3(origin.mV); + + LLVector4a v; + v.setSub(min, origina); + + if (v.dot3(v) < r) + { + v.setSub(max, origina); + if (v.dot3(v) < r) + { + return 2; + } + } + + + for (U32 i = 0; i < 3; i++) + { + if (origin.mV[i] < min[i]) + { + t = min[i] - origin.mV[i]; + d += t*t; + } + else if (origin.mV[i] > max[i]) + { + t = origin.mV[i] - max[i]; + d += t*t; + } + + if (d > r) + { + return 0; + } + } + + return 1; +} + + typedef enum { b000 = 0x00, @@ -193,24 +225,13 @@ static U8 sOcclusionIndices[] = b000, b110, b100, b101, b001, b011, b010, b110, }; -U8* get_box_fan_indices(LLCamera* camera, const LLVector3& center) +U8* get_box_fan_indices(LLCamera* camera, const LLVector4a& center) { - LLVector3 d = center - camera->getOrigin(); - - U8 cypher = 0; - if (d.mV[0] > 0) - { - cypher |= b100; - } - if (d.mV[1] > 0) - { - cypher |= b010; - } - if (d.mV[2] > 0) - { - cypher |= b001; - } + LLVector4a origin; + origin.load3(camera->getOrigin().mV); + S32 cypher = center.greaterThan4(origin).getComparisonMask() & 0x7; + return sOcclusionIndices+cypher*8; } @@ -218,33 +239,49 @@ void LLSpatialGroup::buildOcclusion() { if (!mOcclusionVerts) { - mOcclusionVerts = new F32[8*3]; + mOcclusionVerts = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*8, 16); } - LLVector3 r = mBounds[1] + LLVector3(SG_OCCLUSION_FUDGE, SG_OCCLUSION_FUDGE, SG_OCCLUSION_FUDGE); + LLVector4a fudge; + fudge.splat(SG_OCCLUSION_FUDGE); - for (U32 k = 0; k < 3; k++) - { - r.mV[k] = llmin(mBounds[1].mV[k]+0.25f, r.mV[k]); - } + LLVector4a r; + r.setAdd(mBounds[1], fudge); - F32* v = mOcclusionVerts; - F32* c = mBounds[0].mV; - F32* s = r.mV; + LLVector4a r2; + r2.splat(0.25f); + r2.add(mBounds[1]); + + r.setMin(r2); + + LLVector4a* v = mOcclusionVerts; + const LLVector4a& c = mBounds[0]; + const LLVector4a& s = r; + static const LLVector4a octant[] = + { + LLVector4a(-1.f, -1.f, -1.f), + LLVector4a(-1.f, -1.f, 1.f), + LLVector4a(-1.f, 1.f, -1.f), + LLVector4a(-1.f, 1.f, 1.f), + + LLVector4a(1.f, -1.f, -1.f), + LLVector4a(1.f, -1.f, 1.f), + LLVector4a(1.f, 1.f, -1.f), + LLVector4a(1.f, 1.f, 1.f), + }; + //vertex positions are encoded so the 3 bits of their vertex index //correspond to their axis facing, with bit position 3,2,1 matching //axis facing x,y,z, bit set meaning positive facing, bit clear //meaning negative facing - v[0] = c[0]-s[0]; v[1] = c[1]-s[1]; v[2] = c[2]-s[2]; // 0 - 0000 - v[3] = c[0]-s[0]; v[4] = c[1]-s[1]; v[5] = c[2]+s[2]; // 1 - 0001 - v[6] = c[0]-s[0]; v[7] = c[1]+s[1]; v[8] = c[2]-s[2]; // 2 - 0010 - v[9] = c[0]-s[0]; v[10] = c[1]+s[1]; v[11] = c[2]+s[2]; // 3 - 0011 - - v[12] = c[0]+s[0]; v[13] = c[1]-s[1]; v[14] = c[2]-s[2]; // 4 - 0100 - v[15] = c[0]+s[0]; v[16] = c[1]-s[1]; v[17] = c[2]+s[2]; // 5 - 0101 - v[18] = c[0]+s[0]; v[19] = c[1]+s[1]; v[20] = c[2]-s[2]; // 6 - 0110 - v[21] = c[0]+s[0]; v[22] = c[1]+s[1]; v[23] = c[2]+s[2]; // 7 - 0111 + + for (S32 i = 0; i < 8; ++i) + { + v[i] = s; + v[i].mul(octant[i]); + v[i].add(c); + } clearState(LLSpatialGroup::OCCLUSION_DIRTY); } @@ -288,6 +325,11 @@ LLSpatialGroup::~LLSpatialGroup() llerrs << "Illegal deletion of LLSpatialGroup!" << llendl; }*/ + if (gDebugGL) + { + gPipeline.checkReferences(this); + } + if (isState(DEAD)) { sZombieGroups--; @@ -300,11 +342,13 @@ LLSpatialGroup::~LLSpatialGroup() sQueryPool.release(mOcclusionQuery[LLViewerCamera::sCurCameraID]); } - delete [] mOcclusionVerts; + _mm_free(mOcclusionVerts); LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); clearDrawMap(); clearAtlasList() ; + + _mm_free(mBounds); } BOOL LLSpatialGroup::hasAtlas(LLTextureAtlas* atlasp) @@ -456,8 +500,10 @@ void LLSpatialGroup::validate() sg_assert(!isState(DIRTY)); sg_assert(!isDead()); - LLVector3 myMin = mBounds[0] - mBounds[1]; - LLVector3 myMax = mBounds[0] + mBounds[1]; + LLVector4a myMin; + myMin.setSub(mBounds[0], mBounds[1]); + LLVector4a myMax; + myMax.setAdd(mBounds[0], mBounds[1]); validateDrawMap(); @@ -489,16 +535,18 @@ void LLSpatialGroup::validate() group->validate(); //ensure all children are enclosed in this node - LLVector3 center = group->mBounds[0]; - LLVector3 size = group->mBounds[1]; + LLVector4a center = group->mBounds[0]; + LLVector4a size = group->mBounds[1]; - LLVector3 min = center - size; - LLVector3 max = center + size; + LLVector4a min; + min.setSub(center, size); + LLVector4a max; + max.setAdd(center, size); for (U32 j = 0; j < 3; j++) { - sg_assert(min.mV[j] >= myMin.mV[j]-0.02f); - sg_assert(max.mV[j] <= myMax.mV[j]+0.02f); + sg_assert(min[j] >= myMin[j]-0.02f); + sg_assert(max[j] <= myMax[j]+0.02f); } } @@ -508,8 +556,8 @@ void LLSpatialGroup::validate() void LLSpatialGroup::checkStates() { #if LL_OCTREE_PARANOIA_CHECK - LLOctreeStateCheck checker; - checker.traverse(mOctreeNode); + //LLOctreeStateCheck checker; + //checker.traverse(mOctreeNode); #endif } @@ -534,19 +582,17 @@ BOOL LLSpatialGroup::updateInGroup(LLDrawable *drawablep, BOOL immediate) LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); drawablep->updateSpatialExtents(); - validate_drawable(drawablep); OctreeNode* parent = mOctreeNode->getOctParent(); if (mOctreeNode->isInside(drawablep->getPositionGroup()) && (mOctreeNode->contains(drawablep) || - (drawablep->getBinRadius() > mOctreeNode->getSize().mdV[0] && + (drawablep->getBinRadius() > mOctreeNode->getSize()[0] && parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY))) { unbound(); setState(OBJECT_DIRTY); //setState(GEOM_DIRTY); - validate_drawable(drawablep); return TRUE; } @@ -564,7 +610,6 @@ BOOL LLSpatialGroup::addObject(LLDrawable *drawablep, BOOL add_all, BOOL from_oc else { drawablep->setSpatialGroup(this); - validate_drawable(drawablep); setState(OBJECT_DIRTY | GEOM_DIRTY); setOcclusionState(LLSpatialGroup::DISCARD_QUERY, LLSpatialGroup::STATE_MODE_ALL_CAMERAS); gPipeline.markRebuild(this, TRUE); @@ -665,7 +710,7 @@ void LLSpatialPartition::rebuildMesh(LLSpatialGroup* group) } -BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector3& minOut, LLVector3& maxOut) +BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector4a& minOut, LLVector4a& maxOut) { const OctreeNode* node = mOctreeNode; @@ -678,8 +723,8 @@ BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector3& minOut, LLVector3& maxO return FALSE; } - LLVector3& newMin = mObjectExtents[0]; - LLVector3& newMax = mObjectExtents[1]; + LLVector4a& newMin = mObjectExtents[0]; + LLVector4a& newMax = mObjectExtents[1]; if (isState(OBJECT_DIRTY)) { //calculate new bounding box @@ -688,10 +733,10 @@ BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector3& minOut, LLVector3& maxO //initialize bounding box to first element OctreeNode::const_element_iter i = node->getData().begin(); LLDrawable* drawablep = *i; - const LLVector3* minMax = drawablep->getSpatialExtents(); + const LLVector4a* minMax = drawablep->getSpatialExtents(); - newMin.setVec(minMax[0]); - newMax.setVec(minMax[1]); + newMin = minMax[0]; + newMax = minMax[1]; for (++i; i != node->getData().end(); ++i) { @@ -715,8 +760,10 @@ BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector3& minOut, LLVector3& maxO }*/ } - mObjectBounds[0] = (newMin + newMax) * 0.5f; - mObjectBounds[1] = (newMax - newMin) * 0.5f; + mObjectBounds[0].setAdd(newMin, newMax); + mObjectBounds[0].mul(0.5f); + mObjectBounds[1].setSub(newMax, newMin); + mObjectBounds[1].mul(0.5f); } if (empty) @@ -726,17 +773,8 @@ BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector3& minOut, LLVector3& maxO } else { - for (U32 i = 0; i < 3; i++) - { - if (newMin.mV[i] < minOut.mV[i]) - { - minOut.mV[i] = newMin.mV[i]; - } - if (newMax.mV[i] > maxOut.mV[i]) - { - maxOut.mV[i] = newMax.mV[i]; - } - } + minOut.setMin(newMin); + maxOut.setMax(newMax); } return TRUE; @@ -827,18 +865,19 @@ BOOL LLSpatialGroup::removeObject(LLDrawable *drawablep, BOOL from_octree) return TRUE; } -void LLSpatialGroup::shift(const LLVector3 &offset) +void LLSpatialGroup::shift(const LLVector4a &offset) { LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); - LLVector3d offsetd(offset); - mOctreeNode->setCenter(mOctreeNode->getCenter()+offsetd); + LLVector4a t = mOctreeNode->getCenter(); + t.add(offset); + mOctreeNode->setCenter(t); mOctreeNode->updateMinMax(); - mBounds[0] += offset; - mExtents[0] += offset; - mExtents[1] += offset; - mObjectBounds[0] += offset; - mObjectExtents[0] += offset; - mObjectExtents[1] += offset; + mBounds[0].add(offset); + mExtents[0].add(offset); + mExtents[1].add(offset); + mObjectBounds[0].add(offset); + mObjectExtents[0].add(offset); + mObjectExtents[1].add(offset); //if (!mSpatialPartition->mRenderByGroup) { @@ -850,10 +889,7 @@ void LLSpatialGroup::shift(const LLVector3 &offset) { for (U32 i = 0; i < 8; i++) { - F32* v = mOcclusionVerts+i*3; - v[0] += offset.mV[0]; - v[1] += offset.mV[1]; - v[2] += offset.mV[2]; + mOcclusionVerts[i].add(offset); } } } @@ -1119,8 +1155,6 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) : mDepth(0.f), mLastUpdateDistance(-1.f), mLastUpdateTime(gFrameTimeSeconds), - mViewAngle(0.f), - mLastUpdateViewAngle(-1.f), mAtlasList(4), mCurUpdatingTime(0), mCurUpdatingSlotp(NULL), @@ -1129,13 +1163,25 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) : sNodeCount++; LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); + mBounds = (LLVector4a*) _mm_malloc(sizeof(LLVector4a) * V4_COUNT, 16); + mExtents = mBounds + EXTENTS; + mObjectBounds = mBounds + OBJECT_BOUNDS; + mObjectExtents = mBounds + OBJECT_EXTENTS; + mViewAngle = mBounds+VIEW_ANGLE; + mLastUpdateViewAngle = mBounds+LAST_VIEW_ANGLE; + + mViewAngle->splat(0.f); + mLastUpdateViewAngle->splat(-1.f); + mExtents[0] = mExtents[1] = mObjectBounds[0] = mObjectBounds[0] = mObjectBounds[1] = + mObjectExtents[0] = mObjectExtents[1] = *mViewAngle; + sg_assert(mOctreeNode->getListenerCount() == 0); mOctreeNode->addListener(this); setState(SG_INITIAL_STATE_MASK); gPipeline.markRebuild(this, TRUE); - mBounds[0] = LLVector3(node->getCenter()); - mBounds[1] = LLVector3(node->getSize()); + mBounds[0] = node->getCenter(); + mBounds[1] = node->getSize(); part->mLODSeed = (part->mLODSeed+1)%part->mLODPeriod; mLODHash = part->mLODSeed; @@ -1172,8 +1218,8 @@ void LLSpatialGroup::updateDistance(LLCamera &camera) #endif if (!getData().empty()) { - mRadius = mSpatialPartition->mRenderByGroup ? mObjectBounds[1].magVec() : - (F32) mOctreeNode->getSize().magVec(); + mRadius = mSpatialPartition->mRenderByGroup ? mObjectBounds[1].length3() : + (F32) mOctreeNode->getSize().length3(); mDistance = mSpatialPartition->calcDistance(this, camera); mPixelArea = mSpatialPartition->calcPixelArea(this, camera); } @@ -1181,27 +1227,34 @@ void LLSpatialGroup::updateDistance(LLCamera &camera) F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera) { - LLVector3 eye = group->mObjectBounds[0] - camera.getOrigin(); + LLVector4a eye; + LLVector4a origin; + origin.load3(camera.getOrigin().mV); + + eye.setSub(group->mObjectBounds[0], origin); F32 dist = 0.f; if (group->mDrawMap.find(LLRenderPass::PASS_ALPHA) != group->mDrawMap.end()) { - LLVector3 v = eye; - dist = eye.normVec(); + LLVector4a v = eye; + + dist = eye.length3(); + eye.normalize3fast(); if (!group->isState(LLSpatialGroup::ALPHA_DIRTY)) { if (!group->mSpatialPartition->isBridge()) { - LLVector3 view_angle = LLVector3(eye * LLVector3(1,0,0), - eye * LLVector3(0,1,0), - eye * LLVector3(0,0,1)); + LLVector4a view_angle = eye; - if ((view_angle-group->mLastUpdateViewAngle).magVec() > 0.64f) + LLVector4a diff; + diff.setSub(view_angle, *group->mLastUpdateViewAngle); + + if (diff.length3() > 0.64f) { - group->mViewAngle = view_angle; - group->mLastUpdateViewAngle = view_angle; + *group->mViewAngle = view_angle; + *group->mLastUpdateViewAngle = view_angle; //for occasional alpha sorting within the group //NOTE: If there is a trivial way to detect that alpha sorting here would not change the render order, //not setting this node to dirty would be a very good thing @@ -1215,17 +1268,20 @@ F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera) LLVector3 at = camera.getAtAxis(); - //front of bounding box - for (U32 i = 0; i < 3; i++) - { - v.mV[i] -= group->mObjectBounds[1].mV[i]*0.25f * at.mV[i]; - } + LLVector4a ata; + ata.load3(at.mV); - group->mDepth = v * at; + LLVector4a t = ata; + //front of bounding box + t.mul(0.25f); + t.mul(group->mObjectBounds[1]); + v.sub(t); + + group->mDepth = v.dot3(ata); } else { - dist = eye.magVec(); + dist = eye.length3(); } if (dist < 16.f) @@ -1378,7 +1434,7 @@ void LLSpatialGroup::destroyGL() } } - delete [] mOcclusionVerts; + _mm_free(mOcclusionVerts); mOcclusionVerts = NULL; for (LLSpatialGroup::element_iter i = getData().begin(); i != getData().end(); ++i) @@ -1421,8 +1477,8 @@ BOOL LLSpatialGroup::rebound() } else { - LLVector3& newMin = mExtents[0]; - LLVector3& newMax = mExtents[1]; + LLVector4a& newMin = mExtents[0]; + LLVector4a& newMax = mExtents[1]; LLSpatialGroup* group = (LLSpatialGroup*) mOctreeNode->getChild(0)->getListener(0); group->clearState(SKIP_FRUSTUM_CHECK); group->rebound(); @@ -1436,26 +1492,19 @@ BOOL LLSpatialGroup::rebound() group = (LLSpatialGroup*) mOctreeNode->getChild(i)->getListener(0); group->clearState(SKIP_FRUSTUM_CHECK); group->rebound(); - const LLVector3& max = group->mExtents[1]; - const LLVector3& min = group->mExtents[0]; + const LLVector4a& max = group->mExtents[1]; + const LLVector4a& min = group->mExtents[0]; - for (U32 j = 0; j < 3; j++) - { - if (max.mV[j] > newMax.mV[j]) - { - newMax.mV[j] = max.mV[j]; - } - if (min.mV[j] < newMin.mV[j]) - { - newMin.mV[j] = min.mV[j]; - } - } + newMax.setMax(max); + newMin.setMin(min); } boundObjects(FALSE, newMin, newMax); - mBounds[0] = (newMin + newMax)*0.5f; - mBounds[1] = (newMax - newMin)*0.5f; + mBounds[0].setAdd(newMin, newMax); + mBounds[0].mul(0.5f); + mBounds[1].setSub(newMax, newMin); + mBounds[1].mul(0.5f); } setState(OCCLUSION_DIRTY); @@ -1540,7 +1589,7 @@ void LLSpatialGroup::doOcclusion(LLCamera* camera) } glBeginQueryARB(GL_SAMPLES_PASSED_ARB, mOcclusionQuery[LLViewerCamera::sCurCameraID]); - glVertexPointer(3, GL_FLOAT, 0, mOcclusionVerts); + glVertexPointer(3, GL_FLOAT, 16, mOcclusionVerts); if (camera->getOrigin().isExactlyZero()) { //origin is invalid, draw entire box glDrawRangeElements(GL_TRIANGLE_FAN, 0, 7, 8, @@ -1581,8 +1630,11 @@ LLSpatialPartition::LLSpatialPartition(U32 data_mask, BOOL render_by_group, U32 LLGLNamePool::registerPool(&sQueryPool); - mOctree = new LLSpatialGroup::OctreeRoot(LLVector3d(0,0,0), - LLVector3d(1,1,1), + LLVector4a center, size; + center.splat(0.f); + size.splat(1.f); + + mOctree = new LLSpatialGroup::OctreeRoot(center,size, NULL); new LLSpatialGroup(mOctree, this); } @@ -1602,7 +1654,6 @@ LLSpatialGroup *LLSpatialPartition::put(LLDrawable *drawablep, BOOL was_visible) LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); drawablep->updateSpatialExtents(); - validate_drawable(drawablep); //keep drawable from being garbage collected LLPointer ptr = drawablep; @@ -1686,16 +1737,16 @@ void LLSpatialPartition::move(LLDrawable *drawablep, LLSpatialGroup *curp, BOOL class LLSpatialShift : public LLSpatialGroup::OctreeTraveler { public: - LLSpatialShift(LLVector3 offset) : mOffset(offset) { } + const LLVector4a& mOffset; + + LLSpatialShift(const LLVector4a& offset) : mOffset(offset) { } virtual void visit(const LLSpatialGroup::OctreeNode* branch) { ((LLSpatialGroup*) branch->getListener(0))->shift(mOffset); } - - LLVector3 mOffset; }; -void LLSpatialPartition::shift(const LLVector3 &offset) +void LLSpatialPartition::shift(const LLVector4a &offset) { //shift octree node bounding boxes by offset LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION); LLSpatialShift shifter(offset); @@ -1857,7 +1908,7 @@ public: class LLOctreeCullVisExtents: public LLOctreeCullShadow { public: - LLOctreeCullVisExtents(LLCamera* camera, LLVector3& min, LLVector3& max) + LLOctreeCullVisExtents(LLCamera* camera, LLVector4a& min, LLVector4a& max) : LLOctreeCullShadow(camera), mMin(min), mMax(max), mEmpty(TRUE) { } virtual bool earlyFail(LLSpatialGroup* group) @@ -1924,8 +1975,8 @@ public: } BOOL mEmpty; - LLVector3& mMin; - LLVector3& mMax; + LLVector4a& mMin; + LLVector4a& mMax; }; class LLOctreeCullDetectVisible: public LLOctreeCullShadow @@ -2029,6 +2080,11 @@ void drawBox(const LLVector3& c, const LLVector3& r) gGL.end(); } +void drawBox(const LLVector4a& c, const LLVector4a& r) +{ + drawBox(reinterpret_cast(c), reinterpret_cast(r)); +} + void drawBoxOutline(const LLVector3& pos, const LLVector3& size) { LLVector3 v1 = size.scaledVec(LLVector3( 1, 1,1)); @@ -2075,6 +2131,11 @@ void drawBoxOutline(const LLVector3& pos, const LLVector3& size) gGL.end(); } +void drawBoxOutline(const LLVector4a& pos, const LLVector4a& size) +{ + drawBoxOutline(reinterpret_cast(pos), reinterpret_cast(size)); +} + class LLOctreeDirty : public LLOctreeTraveler { public: @@ -2118,14 +2179,21 @@ BOOL LLSpatialPartition::isOcclusionEnabled() BOOL LLSpatialPartition::getVisibleExtents(LLCamera& camera, LLVector3& visMin, LLVector3& visMax) { + LLVector4a visMina, visMaxa; + visMina.load3(visMin.mV); + visMaxa.load3(visMax.mV); + { LLFastTimer ftm(FTM_CULL_REBOUND); LLSpatialGroup* group = (LLSpatialGroup*) mOctree->getListener(0); group->rebound(); } - LLOctreeCullVisExtents vis(&camera, visMin, visMax); + LLOctreeCullVisExtents vis(&camera, visMina, visMaxa); vis.traverse(mOctree); + + visMin.set(visMina.getF32()); + visMax.set(visMina.getF32()); return vis.mEmpty; } @@ -2188,25 +2256,36 @@ BOOL earlyFail(LLCamera* camera, LLSpatialGroup* group) } const F32 vel = SG_OCCLUSION_FUDGE*2.f; - LLVector3 c = group->mBounds[0]; - LLVector3 r = group->mBounds[1] + LLVector3(vel,vel,vel); - + LLVector4a fudge; + fudge.splat(vel); + + const LLVector4a& c = group->mBounds[0]; + LLVector4a r; + r.setAdd(group->mBounds[1], fudge); + /*if (r.magVecSquared() > 1024.0*1024.0) { return TRUE; }*/ - LLVector3 e = camera->getOrigin(); + LLVector4a e; + e.load3(camera->getOrigin().mV); - LLVector3 min = c - r; - LLVector3 max = c + r; + LLVector4a min; + min.setSub(c,r); + LLVector4a max; + max.setAdd(c,r); - for (U32 j = 0; j < 3; j++) + S32 lt = e.lessThan4(min).getComparisonMask() & 0x7; + if (lt) { - if (e.mV[j] < min.mV[j] || e.mV[j] > max.mV[j]) - { - return FALSE; - } + return FALSE; + } + + S32 gt = e.greaterThan4(max).getComparisonMask() & 0x7; + if (gt) + { + return FALSE; } return TRUE; @@ -2411,7 +2490,13 @@ void renderOctree(LLSpatialGroup* group) } gGL.color4fv(col.mV); - drawBox(group->mObjectBounds[0], group->mObjectBounds[1]*1.01f+LLVector3(0.001f, 0.001f, 0.001f)); + LLVector4a fudge; + fudge.splat(0.001f); + LLVector4a size = group->mObjectBounds[1]; + size.mul(1.01f); + size.add(fudge); + + drawBox(group->mObjectBounds[0], fudge); gGL.setSceneBlendType(LLRender::BT_ALPHA); @@ -2442,8 +2527,12 @@ void renderOctree(LLSpatialGroup* group) for (LLSpatialGroup::drawmap_elem_t::iterator j = i->second.begin(); j != i->second.end(); ++j) { LLDrawInfo* draw_info = *j; - LLVector3 center = (draw_info->mExtents[1] + draw_info->mExtents[0])*0.5f; - LLVector3 size = (draw_info->mExtents[1] - draw_info->mExtents[0])*0.5f; + LLVector4a center; + center.setAdd(draw_info->mExtents[1], draw_info->mExtents[0]); + center.mul(0.5f); + LLVector4a size; + size.setSub(draw_info->mExtents[1], draw_info->mExtents[0]); + size.mul(0.5f); drawBoxOutline(center, size); } } @@ -2493,7 +2582,7 @@ void renderVisibility(LLSpatialGroup* group, LLCamera* camera) else if (camera && group->mOcclusionVerts) { LLVertexBuffer::unbind(); - glVertexPointer(3, GL_FLOAT, 0, group->mOcclusionVerts); + glVertexPointer(3, GL_FLOAT, 16, group->mOcclusionVerts); glColor4f(1.0f, 0.f, 0.f, 0.5f); glDrawRangeElements(GL_TRIANGLE_FAN, 0, 7, 8, GL_UNSIGNED_BYTE, get_box_fan_indices(camera, group->mBounds[0])); @@ -2572,8 +2661,8 @@ void renderBoundingBox(LLDrawable* drawable, BOOL set_color = TRUE) } } - const LLVector3* ext; - LLVector3 pos, size; + const LLVector4a* ext; + LLVector4a pos, size; //render face bounding boxes for (S32 i = 0; i < drawable->getNumFaces(); i++) @@ -2582,20 +2671,21 @@ void renderBoundingBox(LLDrawable* drawable, BOOL set_color = TRUE) ext = facep->mExtents; - if (ext[0].isExactlyZero() && ext[1].isExactlyZero()) - { - continue; - } - pos = (ext[0] + ext[1]) * 0.5f; - size = (ext[1] - ext[0]) * 0.5f; + pos.setAdd(ext[0], ext[1]); + pos.mul(0.5f); + size.setSub(ext[1], ext[0]); + size.mul(0.5f); + drawBoxOutline(pos,size); } //render drawable bounding box ext = drawable->getSpatialExtents(); - pos = (ext[0] + ext[1]) * 0.5f; - size = (ext[1] - ext[0]) * 0.5f; + pos.setAdd(ext[0], ext[1]); + pos.mul(0.5f); + size.setSub(ext[1], ext[0]); + size.mul(0.5f); LLViewerObject* vobj = drawable->getVObj(); if (vobj && vobj->onActiveList()) @@ -2651,8 +2741,13 @@ void renderTexturePriority(LLDrawable* drawable) // gGL.color4f(1,0,1,1); //} - LLVector3 center = (facep->mExtents[1]+facep->mExtents[0])*0.5f; - LLVector3 size = (facep->mExtents[1]-facep->mExtents[0])*0.5f + LLVector3(0.01f, 0.01f, 0.01f); + LLVector4a center; + center.setAdd(facep->mExtents[1],facep->mExtents[0]); + center.mul(0.5f); + LLVector4a size; + size.setSub(facep->mExtents[1],facep->mExtents[0]); + size.mul(0.5f); + size.add(LLVector4a(0.01f)); drawBox(center, size); /*S32 boost = imagep->getBoostLevel(); @@ -2676,7 +2771,6 @@ void renderPoints(LLDrawable* drawablep) { gGL.begin(LLRender::POINTS); gGL.color3f(1,1,1); - LLVector3 center(drawablep->getPositionGroup()); for (S32 i = 0; i < drawablep->getNumFaces(); i++) { gGL.vertex3fv(drawablep->getFace(i)->mCenterLocal.mV); @@ -2708,8 +2802,12 @@ void renderShadowFrusta(LLDrawInfo* params) LLGLEnable blend(GL_BLEND); gGL.setSceneBlendType(LLRender::BT_ADD); - LLVector3 center = (params->mExtents[1]+params->mExtents[0])*0.5f; - LLVector3 size = (params->mExtents[1]-params->mExtents[0])*0.5f; + LLVector4a center; + center.setAdd(params->mExtents[1], params->mExtents[0]); + center.mul(0.5f); + LLVector4a size; + size.setSub(params->mExtents[1],params->mExtents[0]); + size.mul(0.5f); if (gPipeline.mShadowCamera[4].AABBInFrustum(center, size)) { @@ -2753,10 +2851,14 @@ void renderLights(LLDrawable* drawablep) pushVerts(drawablep->getFace(i), LLVertexBuffer::MAP_VERTEX); } - const LLVector3* ext = drawablep->getSpatialExtents(); + const LLVector4a* ext = drawablep->getSpatialExtents(); - LLVector3 pos = (ext[0] + ext[1]) * 0.5f; - LLVector3 size = (ext[1] - ext[0]) * 0.5f; + LLVector4a pos; + pos.setAdd(ext[0], ext[1]); + pos.mul(0.5f); + LLVector4a size; + size.setSub(ext[1], ext[0]); + size.mul(0.5f); { LLGLDepthTest depth(GL_FALSE, GL_TRUE); @@ -2766,7 +2868,7 @@ void renderLights(LLDrawable* drawablep) gGL.color4f(1,1,0,1); F32 rad = drawablep->getVOVolume()->getLightRadius(); - drawBoxOutline(pos, LLVector3(rad,rad,rad)); + drawBoxOutline(pos, LLVector4a(rad)); } } @@ -2781,7 +2883,7 @@ public: mDir.setSub(mEnd, mStart); } - void visit(const LLOctreeNode* branch) + void visit(const LLOctreeNode* branch) { LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) branch->getListener(0); @@ -2859,10 +2961,14 @@ void renderRaycast(LLDrawable* drawablep) glPopMatrix(); // draw bounding box of prim - const LLVector3* ext = drawablep->getSpatialExtents(); + const LLVector4a* ext = drawablep->getSpatialExtents(); - LLVector3 pos = (ext[0] + ext[1]) * 0.5f; - LLVector3 size = (ext[1] - ext[0]) * 0.5f; + LLVector4a pos; + pos.setAdd(ext[0], ext[1]); + pos.mul(0.5f); + LLVector4a size; + size.setSub(ext[1], ext[0]); + size.mul(0.5f); LLGLDepthTest depth(GL_FALSE, GL_TRUE); gGL.color4f(0,0.5f,0.5f,1); @@ -2949,8 +3055,8 @@ public: return; } - LLVector3 nodeCenter = group->mBounds[0]; - LLVector3 octCenter = LLVector3(group->mOctreeNode->getCenter()); + LLVector4a nodeCenter = group->mBounds[0]; + LLVector4a octCenter = group->mOctreeNode->getCenter(); group->rebuildGeom(); group->rebuildMesh(); @@ -2979,8 +3085,14 @@ public: if (drawable->isState(LLDrawable::IN_REBUILD_Q2)) { gGL.color4f(0.6f, 0.6f, 0.1f, 1.f); - const LLVector3* ext = drawable->getSpatialExtents(); - drawBoxOutline((ext[0]+ext[1])*0.5f, (ext[1]-ext[0])*0.5f); + const LLVector4a* ext = drawable->getSpatialExtents(); + LLVector4a center; + center.setAdd(ext[0], ext[1]); + center.mul(0.5f); + LLVector4a size; + size.setSub(ext[1], ext[0]); + size.mul(0.5f); + drawBoxOutline(center, size); } } @@ -3211,7 +3323,11 @@ void LLSpatialPartition::renderDebug() void LLSpatialGroup::drawObjectBox(LLColor4 col) { gGL.color4fv(col.mV); - drawBox(mObjectBounds[0], mObjectBounds[1]*1.01f+LLVector3(0.001f, 0.001f, 0.001f)); + LLVector4a size; + size = mObjectBounds[0]; + size.mul(1.01f); + size.add(LLVector4a(0.001f)); + drawBox(mObjectBounds[0], size); } @@ -3271,8 +3387,8 @@ public: LLSpatialGroup* group = (LLSpatialGroup*) child->getListener(0); - LLVector3 size; - LLVector3 center; + LLVector4a size; + LLVector4a center; size = group->mBounds[1]; center = group->mBounds[0]; @@ -3289,7 +3405,11 @@ public: local_end = mEnd * local_matrix; } - if (LLLineSegmentBoxIntersect(local_start, local_end, center, size)) + LLVector4a start, end; + start.load3(local_start.mV); + end.load3(local_end.mV); + + if (LLLineSegmentBoxIntersect(start, end, center, size)) { check(child); } @@ -3380,6 +3500,7 @@ LLDrawInfo::LLDrawInfo(U16 start, U16 end, U32 count, U32 offset, mDrawMode(LLRender::TRIANGLES) { mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset); + mExtents = (LLVector4a*) _mm_malloc(sizeof(LLVector4a)*2, 16); mDebugColor = (rand() << 16) + rand(); } @@ -3395,6 +3516,13 @@ LLDrawInfo::~LLDrawInfo() { mFace->setDrawInfo(NULL); } + + if (gDebugGL) + { + gPipeline.checkReferences(this); + } + + _mm_free(mExtents); } void LLDrawInfo::validate() diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index 9b252d1035..119945113a 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -45,7 +45,7 @@ #include "lldrawpool.h" #include "llface.h" #include "llviewercamera.h" - +#include "llvector4a.h" #include #define SG_STATE_INHERIT_MASK (OCCLUDED) @@ -57,12 +57,15 @@ class LLSpatialGroup; class LLTextureAtlas; class LLTextureAtlasSlot; +S32 AABBSphereIntersect(const LLVector4a& min, const LLVector4a& max, const LLVector3 &origin, const F32 &rad); +S32 AABBSphereIntersectR2(const LLVector4a& min, const LLVector4a& max, const LLVector3 &origin, const F32 &radius_squared); + S32 AABBSphereIntersect(const LLVector3& min, const LLVector3& max, const LLVector3 &origin, const F32 &rad); S32 AABBSphereIntersectR2(const LLVector3& min, const LLVector3& max, const LLVector3 &origin, const F32 &radius_squared); void pushVerts(LLFace* face, U32 mask); // get index buffer for binary encoded axis vertex buffer given a box at center being viewed by given camera -U8* get_box_fan_indices(LLCamera* camera, const LLVector3& center); +U8* get_box_fan_indices(LLCamera* camera, const LLVector4a& center); class LLDrawInfo : public LLRefCount { @@ -70,6 +73,18 @@ protected: ~LLDrawInfo(); public: + + LLDrawInfo(const LLDrawInfo& rhs) + { + *this = rhs; + } + + const LLDrawInfo& operator=(const LLDrawInfo& rhs) + { + llerrs << "Illegal operation!" << llendl; + return *this; + } + LLDrawInfo(U16 start, U16 end, U32 count, U32 offset, LLViewerTexture* image, LLVertexBuffer* buffer, BOOL fullbright = FALSE, U8 bump = 0, BOOL particle = FALSE, F32 part_size = 0); @@ -77,6 +92,8 @@ public: void validate(); + LLVector4a* mExtents; + LLPointer mVertexBuffer; LLPointer mTexture; LLColor4U mGlowColor; @@ -95,7 +112,6 @@ public: LLSpatialGroup* mGroup; LLFace* mFace; //associated face F32 mDistance; - LLVector3 mExtents[2]; U32 mDrawMode; struct CompareTexture @@ -158,11 +174,24 @@ public: }; }; +LL_ALIGN_PREFIX(64) class LLSpatialGroup : public LLOctreeListener { friend class LLSpatialPartition; friend class LLOctreeStateCheck; public: + + LLSpatialGroup(const LLSpatialGroup& rhs) + { + *this = rhs; + } + + const LLSpatialGroup& operator=(const LLSpatialGroup& rhs) + { + llerrs << "Illegal operation!" << llendl; + return *this; + } + static U32 sNodeCount; static BOOL sNoDelete; //deletion of spatial groups and draw info not allowed if TRUE @@ -273,8 +302,8 @@ public: BOOL isVisible() const; BOOL isRecentlyVisible() const; void setVisible(); - void shift(const LLVector3 &offset); - BOOL boundObjects(BOOL empty, LLVector3& newMin, LLVector3& newMax); + void shift(const LLVector4a &offset); + BOOL boundObjects(BOOL empty, LLVector4a& newMin, LLVector4a& newMax); void unbound(); BOOL rebound(); void buildOcclusion(); //rebuild mOcclusionVerts @@ -322,6 +351,27 @@ public: void addAtlas(LLTextureAtlas* atlasp, S8 recursive_level = 3) ; void removeAtlas(LLTextureAtlas* atlasp, BOOL remove_group = TRUE, S8 recursive_level = 3) ; void clearAtlasList() ; + +public: + + typedef enum + { + BOUNDS = 0, + EXTENTS = 2, + OBJECT_BOUNDS = 4, + OBJECT_EXTENTS = 6, + VIEW_ANGLE = 8, + LAST_VIEW_ANGLE = 9, + V4_COUNT = 10 + } eV4Index; + + LLVector4a* mBounds; // bounding box (center, size) of this node and all its children (tight fit to objects) + LLVector4a* mExtents; // extents (min, max) of this node and all its children + LLVector4a* mObjectExtents; // extents (min, max) of objects in this node + LLVector4a* mObjectBounds; // bounding box (center, size) of objects in this node + LLVector4a* mViewAngle; + LLVector4a* mLastUpdateViewAngle; + private: U32 mCurUpdatingTime ; //do not make the below two to use LLPointer @@ -349,14 +399,9 @@ public: F32 mBuilt; OctreeNode* mOctreeNode; LLSpatialPartition* mSpatialPartition; - LLVector3 mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects) - LLVector3 mExtents[2]; // extents (min, max) of this node and all its children - LLVector3 mObjectExtents[2]; // extents (min, max) of objects in this node - LLVector3 mObjectBounds[2]; // bounding box (center, size) of objects in this node - LLPointer mVertexBuffer; - F32* mOcclusionVerts; + LLVector4a* mOcclusionVerts; GLuint mOcclusionQuery[LLViewerCamera::NUM_CAMERAS]; U32 mBufferUsage; @@ -367,13 +412,10 @@ public: F32 mDepth; F32 mLastUpdateDistance; F32 mLastUpdateTime; - - LLVector3 mViewAngle; - LLVector3 mLastUpdateViewAngle; F32 mPixelArea; F32 mRadius; -}; +} LL_ALIGN_POSTFIX(64); class LLGeometryManager { @@ -409,7 +451,7 @@ public: // If the drawable moves, move it here. virtual void move(LLDrawable *drawablep, LLSpatialGroup *curp, BOOL immediate = FALSE); - virtual void shift(const LLVector3 &offset); + virtual void shift(const LLVector4a &offset); virtual F32 calcDistance(LLSpatialGroup* group, LLCamera& camera); virtual F32 calcPixelArea(LLSpatialGroup* group, LLCamera& camera); @@ -467,7 +509,7 @@ public: virtual void makeActive(); virtual void move(LLDrawable *drawablep, LLSpatialGroup *curp, BOOL immediate = FALSE); virtual BOOL updateMove(); - virtual void shiftPos(const LLVector3& vec); + virtual void shiftPos(const LLVector4a& vec); virtual void cleanupReferences(); virtual LLSpatialPartition* asPartition() { return this; } virtual LLSpatialBridge* asBridge() { return this; } @@ -658,7 +700,7 @@ class LLHUDBridge : public LLVolumeBridge { public: LLHUDBridge(LLDrawable* drawablep); - virtual void shiftPos(const LLVector3& vec); + virtual void shiftPos(const LLVector4a& vec); virtual F32 calcPixelArea(LLSpatialGroup* group, LLCamera& camera); }; @@ -675,7 +717,7 @@ class LLHUDPartition : public LLBridgePartition { public: LLHUDPartition(); - virtual void shift(const LLVector3 &offset); + virtual void shift(const LLVector4a &offset); }; extern const F32 SG_BOX_SIDE; diff --git a/indra/newview/llsurfacepatch.cpp b/indra/newview/llsurfacepatch.cpp index 48e4a6ccc7..06431d428f 100644 --- a/indra/newview/llsurfacepatch.cpp +++ b/indra/newview/llsurfacepatch.cpp @@ -860,8 +860,10 @@ void LLSurfacePatch::updateVisibility() F32 stride_per_distance = DEFAULT_DELTA_ANGLE / mSurfacep->getMetersPerGrid(); U32 grids_per_patch_edge = mSurfacep->getGridsPerPatchEdge(); - LLVector3 center = mCenterRegion + mSurfacep->getOriginAgent(); - LLVector3 radius = LLVector3(mRadius, mRadius, mRadius); + LLVector4a center; + center.load3( (mCenterRegion + mSurfacep->getOriginAgent()).mV); + LLVector4a radius; + radius.splat(mRadius); // sphere in frustum on global coordinates if (LLViewerCamera::getInstance()->AABBInFrustumNoFarClip(center, radius)) diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index 1490f8153c..d31b0f51fd 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -900,9 +900,11 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot) render_ui(); } - gPipeline.rebuildGroups(); - + LLSpatialGroup::sNoDelete = FALSE; + gPipeline.clearReferences(); + + gPipeline.rebuildGroups(); } LLAppViewer::instance()->pingMainloopTimeout("Display:FrameStats"); @@ -1000,6 +1002,7 @@ void render_hud_attachments() gPipeline.renderGeom(hud_cam); LLSpatialGroup::sNoDelete = FALSE; + gPipeline.clearReferences(); render_hud_elements(); diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp index 3aecd0175d..0ed2d1da09 100644 --- a/indra/newview/llviewerobject.cpp +++ b/indra/newview/llviewerobject.cpp @@ -2863,21 +2863,26 @@ void LLViewerObject::setScale(const LLVector3 &scale, BOOL damped) } } -void LLViewerObject::updateSpatialExtents(LLVector3& newMin, LLVector3 &newMax) -{ - LLVector3 center = getRenderPosition(); - LLVector3 size = getScale(); - newMin.setVec(center-size); - newMax.setVec(center+size); - mDrawable->setPositionGroup((newMin + newMax) * 0.5f); +void LLViewerObject::updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax) +{ + LLVector4a center; + center.load3(getRenderPosition().mV); + LLVector4a size; + size.load3(getScale().mV); + newMin.setSub(center, size); + newMax.setAdd(center, size); + + mDrawable->setPositionGroup(center); } F32 LLViewerObject::getBinRadius() { if (mDrawable.notNull()) { - const LLVector3* ext = mDrawable->getSpatialExtents(); - return (ext[1]-ext[0]).magVec(); + const LLVector4a* ext = mDrawable->getSpatialExtents(); + LLVector4a diff; + diff.setSub(ext[1], ext[0]); + return diff.length3(); } return getScale().magVec(); @@ -3469,12 +3474,21 @@ BOOL LLViewerObject::lineSegmentBoundingBox(const LLVector3& start, const LLVect return FALSE; } - const LLVector3* ext = mDrawable->getSpatialExtents(); + const LLVector4a* ext = mDrawable->getSpatialExtents(); + + //VECTORIZE THIS + LLVector4a center; + center.setAdd(ext[1], ext[0]); + center.mul(0.5f); + LLVector4a size; + size.setSub(ext[1], ext[0]); + size.mul(0.5f); - LLVector3 center = (ext[1]+ext[0])*0.5f; - LLVector3 size = (ext[1]-ext[0])*0.5f; + LLVector4a starta, enda; + starta.load3(start.mV); + enda.load3(end.mV); - return LLLineSegmentBoxIntersect(start, end, center, size); + return LLLineSegmentBoxIntersect(starta, enda, center, size); } U8 LLViewerObject::getMediaType() const diff --git a/indra/newview/llviewerobject.h b/indra/newview/llviewerobject.h index 0fd0cbfa60..6ebd1cbe21 100644 --- a/indra/newview/llviewerobject.h +++ b/indra/newview/llviewerobject.h @@ -373,7 +373,7 @@ public: void markForUpdate(BOOL priority); void updateVolume(const LLVolumeParams& volume_params); - virtual void updateSpatialExtents(LLVector3& min, LLVector3& max); + virtual void updateSpatialExtents(LLVector4a& min, LLVector4a& max); virtual F32 getBinRadius(); LLBBox getBoundingBoxAgent() const; @@ -386,7 +386,7 @@ public: void clearDrawableState(U32 state, BOOL recursive = TRUE); // Called when the drawable shifts - virtual void onShift(const LLVector3 &shift_vector) { } + virtual void onShift(const LLVector4a &shift_vector) { } ////////////////////////////////////// // diff --git a/indra/newview/llviewerpartsim.cpp b/indra/newview/llviewerpartsim.cpp index 6b480ccf8e..41848e8b7a 100644 --- a/indra/newview/llviewerpartsim.cpp +++ b/indra/newview/llviewerpartsim.cpp @@ -161,8 +161,8 @@ LLViewerPartGroup::LLViewerPartGroup(const LLVector3 ¢er_agent, const F32 bo if (group != NULL) { - LLVector3 center(group->mOctreeNode->getCenter()); - LLVector3 size(group->mOctreeNode->getSize()); + LLVector3 center(group->mOctreeNode->getCenter().getF32()); + LLVector3 size(group->mOctreeNode->getSize().getF32()); size += LLVector3(0.01f, 0.01f, 0.01f); mMinObjPos = center - size; mMaxObjPos = center + size; diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp index ac109771dd..b097461822 100644 --- a/indra/newview/llvoavatar.cpp +++ b/indra/newview/llvoavatar.cpp @@ -1286,41 +1286,46 @@ void LLVOAvatar::updateDrawable(BOOL force_damped) clearChanged(SHIFTED); } -void LLVOAvatar::onShift(const LLVector3& shift_vector) +void LLVOAvatar::onShift(const LLVector4a& shift_vector) { - mLastAnimExtents[0] += shift_vector; - mLastAnimExtents[1] += shift_vector; + const LLVector3& shift = reinterpret_cast(shift_vector); + mLastAnimExtents[0] += shift; + mLastAnimExtents[1] += shift; mNeedsImpostorUpdate = TRUE; mNeedsAnimUpdate = TRUE; } -void LLVOAvatar::updateSpatialExtents(LLVector3& newMin, LLVector3 &newMax) +void LLVOAvatar::updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax) { if (isImpostor() && !needsImpostorUpdate()) { LLVector3 delta = getRenderPosition() - - ((LLVector3(mDrawable->getPositionGroup())-mImpostorOffset)); + ((LLVector3(mDrawable->getPositionGroup().getF32())-mImpostorOffset)); - newMin = mLastAnimExtents[0] + delta; - newMax = mLastAnimExtents[1] + delta; + newMin.load3( (mLastAnimExtents[0] + delta).mV); + newMax.load3( (mLastAnimExtents[1] + delta).mV); } else { getSpatialExtents(newMin,newMax); - mLastAnimExtents[0] = newMin; - mLastAnimExtents[1] = newMax; - LLVector3 pos_group = (newMin+newMax)*0.5f; - mImpostorOffset = pos_group-getRenderPosition(); + mLastAnimExtents[0].set(newMin.getF32()); + mLastAnimExtents[1].set(newMax.getF32()); + LLVector4a pos_group; + pos_group.setAdd(newMin,newMax); + pos_group.mul(0.5f); + mImpostorOffset = LLVector3(pos_group.getF32())-getRenderPosition(); mDrawable->setPositionGroup(pos_group); } } -void LLVOAvatar::getSpatialExtents(LLVector3& newMin, LLVector3& newMax) +void LLVOAvatar::getSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) { - LLVector3 buffer(0.25f, 0.25f, 0.25f); - LLVector3 pos = getRenderPosition(); - newMin = pos - buffer; - newMax = pos + buffer; + LLVector4a buffer(0.25f); + LLVector4a pos; + pos.load3(getRenderPosition().mV); + newMin.setSub(pos, buffer); + newMax.setAdd(pos, buffer); + float max_attachment_span = DEFAULT_MAX_PRIM_SCALE * 5.0f; //stretch bounding box by joint positions @@ -1329,12 +1334,20 @@ void LLVOAvatar::getSpatialExtents(LLVector3& newMin, LLVector3& newMax) LLPolyMesh* mesh = i->second; for (S32 joint_num = 0; joint_num < mesh->mJointRenderData.count(); joint_num++) { - update_min_max(newMin, newMax, - mesh->mJointRenderData[joint_num]->mWorldMatrix->getTranslation()); + LLVector4a trans; + trans.load3( mesh->mJointRenderData[joint_num]->mWorldMatrix->getTranslation().mV); + update_min_max(newMin, newMax, trans); } } - mPixelArea = LLPipeline::calcPixelArea((newMin+newMax)*0.5f, (newMax-newMin)*0.5f, *LLViewerCamera::getInstance()); + LLVector4a center, size; + center.setAdd(newMin, newMax); + center.mul(0.5f); + + size.setSub(newMax,newMin); + size.mul(0.5f); + + mPixelArea = LLPipeline::calcPixelArea(center, size, *LLViewerCamera::getInstance()); //stretch bounding box by attachments for (attachment_map_t::iterator iter = mAttachmentPoints.begin(); @@ -1361,15 +1374,17 @@ void LLVOAvatar::getSpatialExtents(LLVector3& newMin, LLVector3& newMax) LLSpatialBridge* bridge = drawable->getSpatialBridge(); if (bridge) { - const LLVector3* ext = bridge->getSpatialExtents(); - LLVector3 distance = (ext[1] - ext[0]); + const LLVector4a* ext = bridge->getSpatialExtents(); + LLVector4a distance; + distance.setSub(ext[1], ext[0]); + LLVector4a max_span(max_attachment_span); + + S32 lt = distance.lessThan4(max_span).getComparisonMask() & 0x7; // Only add the prim to spatial extents calculations if it isn't a megaprim. // max_attachment_span calculated at the start of the function // (currently 5 times our max prim size) - if (distance.mV[0] < max_attachment_span - && distance.mV[1] < max_attachment_span - && distance.mV[2] < max_attachment_span) + if (lt == 0x7) { update_min_max(newMin,newMax,ext[0]); update_min_max(newMin,newMax,ext[1]); @@ -1381,8 +1396,9 @@ void LLVOAvatar::getSpatialExtents(LLVector3& newMin, LLVector3& newMax) } //pad bounding box - newMin -= buffer; - newMax += buffer; + + newMin.sub(buffer); + newMax.add(buffer); } //----------------------------------------------------------------------------- @@ -2371,7 +2387,7 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update) if (isImpostor() && !mNeedsImpostorUpdate) { - LLVector3 ext[2]; + LLVector4a ext[2]; F32 distance; LLVector3 angle; @@ -2400,12 +2416,22 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update) } else { + //VECTORIZE THIS getSpatialExtents(ext[0], ext[1]); - if ((ext[1]-mImpostorExtents[1]).length() > 0.05f || - (ext[0]-mImpostorExtents[0]).length() > 0.05f) + LLVector4a diff; + diff.setSub(ext[1], mImpostorExtents[1]); + if (diff.length3() > 0.05f) { mNeedsImpostorUpdate = TRUE; } + else + { + diff.setSub(ext[0], mImpostorExtents[0]); + if (diff.length3() > 0.05f) + { + mNeedsImpostorUpdate = TRUE; + } + } } } } @@ -5151,9 +5177,13 @@ void LLVOAvatar::setPixelAreaAndAngle(LLAgent &agent) return; } - const LLVector3* ext = mDrawable->getSpatialExtents(); - LLVector3 center = (ext[1] + ext[0]) * 0.5f; - LLVector3 size = (ext[1]-ext[0])*0.5f; + const LLVector4a* ext = mDrawable->getSpatialExtents(); + LLVector4a center; + center.setAdd(ext[1], ext[0]); + center.mul(0.5f); + LLVector4a size; + size.setSub(ext[1], ext[0]); + size.mul(0.5f); mImpostorPixelArea = LLPipeline::calcPixelArea(center, size, *LLViewerCamera::getInstance()); @@ -5165,7 +5195,7 @@ void LLVOAvatar::setPixelAreaAndAngle(LLAgent &agent) } else { - F32 radius = size.length(); + F32 radius = size.length3(); mAppAngle = (F32) atan2( radius, range) * RAD_TO_DEG; } @@ -7546,9 +7576,9 @@ void LLVOAvatar::cacheImpostorValues() getImpostorValues(mImpostorExtents, mImpostorAngle, mImpostorDistance); } -void LLVOAvatar::getImpostorValues(LLVector3* extents, LLVector3& angle, F32& distance) const +void LLVOAvatar::getImpostorValues(LLVector4a* extents, LLVector3& angle, F32& distance) const { - const LLVector3* ext = mDrawable->getSpatialExtents(); + const LLVector4a* ext = mDrawable->getSpatialExtents(); extents[0] = ext[0]; extents[1] = ext[1]; diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h index a851b7a150..71c3ed1cc2 100644 --- a/indra/newview/llvoavatar.h +++ b/indra/newview/llvoavatar.h @@ -127,7 +127,7 @@ public: virtual BOOL isActive() const; // Whether this object needs to do an idleUpdate. virtual void updateTextures(); virtual S32 setTETexture(const U8 te, const LLUUID& uuid); // If setting a baked texture, need to request it from a non-local sim. - virtual void onShift(const LLVector3& shift_vector); + virtual void onShift(const LLVector4a& shift_vector); virtual U32 getPartitionType() const; virtual const LLVector3 getRenderPosition() const; virtual void updateDrawable(BOOL force_damped); @@ -135,8 +135,8 @@ public: virtual BOOL updateGeometry(LLDrawable *drawable); virtual void setPixelAreaAndAngle(LLAgent &agent); virtual void updateRegion(LLViewerRegion *regionp); - virtual void updateSpatialExtents(LLVector3& newMin, LLVector3 &newMax); - virtual void getSpatialExtents(LLVector3& newMin, LLVector3& newMax); + virtual void updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax); + virtual void getSpatialExtents(LLVector4a& newMin, LLVector4a& newMax); virtual BOOL lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face = -1, // which face to check, -1 = ALL_SIDES BOOL pick_transparent = FALSE, @@ -391,7 +391,7 @@ public: BOOL needsImpostorUpdate() const; const LLVector3& getImpostorOffset() const; const LLVector2& getImpostorDim() const; - void getImpostorValues(LLVector3* extents, LLVector3& angle, F32& distance) const; + void getImpostorValues(LLVector4a* extents, LLVector3& angle, F32& distance) const; void cacheImpostorValues(); void setImpostorDim(const LLVector2& dim); static void resetImpostors(); @@ -402,7 +402,7 @@ private: LLVector3 mImpostorOffset; LLVector2 mImpostorDim; BOOL mNeedsAnimUpdate; - LLVector3 mImpostorExtents[2]; + LL_ALIGN_16(LLVector4a mImpostorExtents[2]); LLVector3 mImpostorAngle; F32 mImpostorDistance; F32 mImpostorPixelArea; diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp index 3ba4ecad0c..b5fd8182c6 100644 --- a/indra/newview/llvopartgroup.cpp +++ b/indra/newview/llvopartgroup.cpp @@ -79,12 +79,14 @@ F32 LLVOPartGroup::getBinRadius() return mScale.mV[0]*2.f; } -void LLVOPartGroup::updateSpatialExtents(LLVector3& newMin, LLVector3& newMax) +void LLVOPartGroup::updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) { const LLVector3& pos_agent = getPositionAgent(); - newMin = pos_agent - mScale; - newMax = pos_agent + mScale; - mDrawable->setPositionGroup(pos_agent); + newMin.load3( (pos_agent - mScale).mV); + newMax.load3( (pos_agent + mScale).mV); + LLVector4a pos; + pos.load3(pos_agent.mV); + mDrawable->setPositionGroup(pos); } BOOL LLVOPartGroup::idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time) diff --git a/indra/newview/llvopartgroup.h b/indra/newview/llvopartgroup.h index 18583b4be9..771ae1c1eb 100644 --- a/indra/newview/llvopartgroup.h +++ b/indra/newview/llvopartgroup.h @@ -57,7 +57,7 @@ public: BOOL idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time); virtual F32 getBinRadius(); - virtual void updateSpatialExtents(LLVector3& newMin, LLVector3& newMax); + virtual void updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax); virtual U32 getPartitionType() const; /*virtual*/ void setPixelAreaAndAngle(LLAgent &agent); diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp index eef62ddf1a..02e7e7e60f 100644 --- a/indra/newview/llvosurfacepatch.cpp +++ b/indra/newview/llvosurfacepatch.cpp @@ -995,7 +995,13 @@ BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVect //step one meter at a time until intersection point found - const LLVector3* ext = mDrawable->getSpatialExtents(); + //VECTORIZE THIS + const LLVector4a* exta = mDrawable->getSpatialExtents(); + + LLVector3 ext[2]; + ext[0].set(exta[0].getF32()); + ext[1].set(exta[1].getF32()); + F32 rad = (delta*tdelta).magVecSquared(); F32 t = 0.f; @@ -1057,13 +1063,16 @@ BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVect return FALSE; } -void LLVOSurfacePatch::updateSpatialExtents(LLVector3& newMin, LLVector3 &newMax) +void LLVOSurfacePatch::updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax) { LLVector3 posAgent = getPositionAgent(); LLVector3 scale = getScale(); - newMin = posAgent-scale*0.5f; // Changing to 2.f makes the culling a -little- better, but still wrong - newMax = posAgent+scale*0.5f; - mDrawable->setPositionGroup((newMin+newMax)*0.5f); + newMin.load3( (posAgent-scale*0.5f).mV); // Changing to 2.f makes the culling a -little- better, but still wrong + newMax.load3( (posAgent+scale*0.5f).mV); + LLVector4a pos; + pos.setAdd(newMin,newMax); + pos.mul(0.5f); + mDrawable->setPositionGroup(pos); } U32 LLVOSurfacePatch::getPartitionType() const diff --git a/indra/newview/llvosurfacepatch.h b/indra/newview/llvosurfacepatch.h index 10a5888526..15442e1947 100644 --- a/indra/newview/llvosurfacepatch.h +++ b/indra/newview/llvosurfacepatch.h @@ -78,7 +78,7 @@ public: /*virtual*/ void updateTextures(); /*virtual*/ void setPixelAreaAndAngle(LLAgent &agent); // generate accurate apparent angle and area - /*virtual*/ void updateSpatialExtents(LLVector3& newMin, LLVector3& newMax); + /*virtual*/ void updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax); /*virtual*/ BOOL isActive() const; // Whether this object needs to do an idleUpdate. void setPatch(LLSurfacePatch *patchp); diff --git a/indra/newview/llvotree.cpp b/indra/newview/llvotree.cpp index b89c0cd638..d564643161 100644 --- a/indra/newview/llvotree.cpp +++ b/indra/newview/llvotree.cpp @@ -1238,7 +1238,7 @@ void LLVOTree::updateRadius() mDrawable->setRadius(32.0f); } -void LLVOTree::updateSpatialExtents(LLVector3& newMin, LLVector3& newMax) +void LLVOTree::updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) { F32 radius = getScale().length()*0.05f; LLVector3 center = getRenderPosition(); @@ -1248,9 +1248,11 @@ void LLVOTree::updateSpatialExtents(LLVector3& newMin, LLVector3& newMax) center += LLVector3(0, 0, size.mV[2]) * getRotation(); - newMin.set(center-size); - newMax.set(center+size); - mDrawable->setPositionGroup(center); + newMin.load3((center-size).mV); + newMax.load3((center+size).mV); + LLVector4a pos; + pos.load3(center.mV); + mDrawable->setPositionGroup(pos); } BOOL LLVOTree::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face, BOOL pick_transparent, S32 *face_hitp, @@ -1263,8 +1265,13 @@ BOOL LLVOTree::lineSegmentIntersect(const LLVector3& start, const LLVector3& end return FALSE; } - const LLVector3* ext = mDrawable->getSpatialExtents(); + const LLVector4a* exta = mDrawable->getSpatialExtents(); + //VECTORIZE THIS + LLVector3 ext[2]; + ext[0].set(exta[0].getF32()); + ext[1].set(exta[1].getF32()); + LLVector3 center = (ext[1]+ext[0])*0.5f; LLVector3 size = (ext[1]-ext[0]); diff --git a/indra/newview/llvotree.h b/indra/newview/llvotree.h index feac9e0675..2ce1b03d26 100644 --- a/indra/newview/llvotree.h +++ b/indra/newview/llvotree.h @@ -73,7 +73,7 @@ public: /*virtual*/ LLDrawable* createDrawable(LLPipeline *pipeline); /*virtual*/ BOOL updateGeometry(LLDrawable *drawable); - /*virtual*/ void updateSpatialExtents(LLVector3 &min, LLVector3 &max); + /*virtual*/ void updateSpatialExtents(LLVector4a &min, LLVector4a &max); virtual U32 getPartitionType() const; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index a9f3abeef8..db9e0b88e1 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -701,7 +701,7 @@ void LLVOVolume::updateTextureVirtualSize() const LLTextureEntry *te = face->getTextureEntry(); LLViewerTexture *imagep = face->getTexture(); if (!imagep || !te || - face->mExtents[0] == face->mExtents[1]) + face->mExtents[0].equal3(face->mExtents[1])) { continue; } @@ -1332,7 +1332,7 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global) { BOOL res = TRUE; - LLVector3 min,max; + LLVector4a min,max; BOOL rebuild = mDrawable->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION); @@ -1356,17 +1356,8 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global) } else { - for (U32 i = 0; i < 3; i++) - { - if (face->mExtents[0].mV[i] < min.mV[i]) - { - min.mV[i] = face->mExtents[0].mV[i]; - } - if (face->mExtents[1].mV[i] > max.mV[i]) - { - max.mV[i] = face->mExtents[1].mV[i]; - } - } + min.setMin(face->mExtents[0]); + max.setMax(face->mExtents[1]); } } } @@ -1374,7 +1365,9 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global) if (rebuild) { mDrawable->setSpatialExtents(min,max); - mDrawable->setPositionGroup((min+max)*0.5f); + min.add(max); + min.mul(0.5f); + mDrawable->setPositionGroup(min); } updateRadius(); @@ -3007,7 +3000,7 @@ void LLVOVolume::setSelected(BOOL sel) } } -void LLVOVolume::updateSpatialExtents(LLVector3& newMin, LLVector3& newMax) +void LLVOVolume::updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) { } @@ -3039,7 +3032,7 @@ F32 LLVOVolume::getBinRadius() } } - const LLVector3* ext = mDrawable->getSpatialExtents(); + const LLVector4a* ext = mDrawable->getSpatialExtents(); BOOL shrink_wrap = mDrawable->isAnimating(); BOOL alpha_wrap = FALSE; @@ -3071,7 +3064,10 @@ F32 LLVOVolume::getBinRadius() } else if (shrink_wrap) { - radius = (ext[1]-ext[0]).length()*0.5f; + LLVector4a rad; + rad.setSub(ext[1], ext[0]); + + radius = rad.length3()*0.5f; } else if (mDrawable->isStatic()) { @@ -3107,7 +3103,7 @@ const LLVector3 LLVOVolume::getPivotPositionAgent() const return LLViewerObject::getPivotPositionAgent(); } -void LLVOVolume::onShift(const LLVector3 &shift_vector) +void LLVOVolume::onShift(const LLVector4a &shift_vector) { if (mVolumeImpl) { @@ -3610,7 +3606,6 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) } } } - } continue; @@ -4217,7 +4212,7 @@ LLHUDPartition::LLHUDPartition() mLODPeriod = 1; } -void LLHUDPartition::shift(const LLVector3 &offset) +void LLHUDPartition::shift(const LLVector4a &offset) { //HUD objects don't shift with region crossing. That would be silly. } diff --git a/indra/newview/llvovolume.h b/indra/newview/llvovolume.h index 2776988a12..d5606034d0 100644 --- a/indra/newview/llvovolume.h +++ b/indra/newview/llvovolume.h @@ -66,7 +66,7 @@ public: virtual void onSetVolume(const LLVolumeParams &volume_params, const S32 detail) = 0; virtual void onSetScale(const LLVector3 &scale, BOOL damped) = 0; virtual void onParameterChanged(U16 param_type, LLNetworkData *data, BOOL in_use, bool local_origin) = 0; - virtual void onShift(const LLVector3 &shift_vector) = 0; + virtual void onShift(const LLVector4a &shift_vector) = 0; virtual bool isVolumeUnique() const = 0; // Do we need a unique LLVolume instance? virtual bool isVolumeGlobal() const = 0; // Are we in global space? virtual bool isActive() const = 0; // Is this object currently active? @@ -145,7 +145,7 @@ public: void markForUpdate(BOOL priority) { LLViewerObject::markForUpdate(priority); mVolumeChanged = TRUE; } - /*virtual*/ void onShift(const LLVector3 &shift_vector); // Called when the drawable shifts + /*virtual*/ void onShift(const LLVector4a &shift_vector); // Called when the drawable shifts /*virtual*/ void parameterChanged(U16 param_type, bool local_origin); /*virtual*/ void parameterChanged(U16 param_type, LLNetworkData* data, BOOL in_use, bool local_origin); @@ -201,7 +201,7 @@ public: void regenFaces(); BOOL genBBoxes(BOOL force_global); void preRebuild(); - virtual void updateSpatialExtents(LLVector3& min, LLVector3& max); + virtual void updateSpatialExtents(LLVector4a& min, LLVector4a& max); virtual F32 getBinRadius(); virtual U32 getPartitionType() const; diff --git a/indra/newview/llvowater.cpp b/indra/newview/llvowater.cpp index a8c4625f6e..7c1b22d432 100644 --- a/indra/newview/llvowater.cpp +++ b/indra/newview/llvowater.cpp @@ -258,15 +258,21 @@ void LLVOWater::setIsEdgePatch(const BOOL edge_patch) mIsEdgePatch = edge_patch; } -void LLVOWater::updateSpatialExtents(LLVector3 &newMin, LLVector3& newMax) +void LLVOWater::updateSpatialExtents(LLVector4a &newMin, LLVector4a& newMax) { - LLVector3 pos = getPositionAgent(); - LLVector3 scale = getScale(); - - newMin = pos - scale * 0.5f; - newMax = pos + scale * 0.5f; + LLVector4a pos; + pos.load3(getPositionAgent().mV); + LLVector4a scale; + scale.load3(getScale().mV); + scale.mul(0.5f); + + newMin.setSub(pos, scale); + newMax.setAdd(pos, scale); + + pos.setAdd(newMin,newMax); + pos.mul(0.5f); - mDrawable->setPositionGroup((newMin + newMax) * 0.5f); + mDrawable->setPositionGroup(pos); } U32 LLVOWater::getPartitionType() const diff --git a/indra/newview/llvowater.h b/indra/newview/llvowater.h index 3cc031e589..a868afe58b 100644 --- a/indra/newview/llvowater.h +++ b/indra/newview/llvowater.h @@ -66,7 +66,7 @@ public: /*virtual*/ BOOL idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time); /*virtual*/ LLDrawable* createDrawable(LLPipeline *pipeline); /*virtual*/ BOOL updateGeometry(LLDrawable *drawable); - /*virtual*/ void updateSpatialExtents(LLVector3& newMin, LLVector3& newMax); + /*virtual*/ void updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax); /*virtual*/ void updateTextures(); /*virtual*/ void setPixelAreaAndAngle(LLAgent &agent); // generate accurate apparent angle and area diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 91c3805d3b..01027e5be6 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -1508,11 +1508,214 @@ F32 LLPipeline::calcPixelArea(LLVector3 center, LLVector3 size, LLCamera &camera return radius*radius * F_PI; } +//static +F32 LLPipeline::calcPixelArea(const LLVector4a& center, const LLVector4a& size, LLCamera &camera) +{ + LLVector4a origin; + origin.load3(camera.getOrigin().mV); + + LLVector4a lookAt; + lookAt.setSub(center, origin); + F32 dist = lookAt.length3(); + + //ramp down distance for nearby objects + //shrink dist by dist/16. + if (dist < 16.f) + { + dist /= 16.f; + dist *= dist; + dist *= 16.f; + } + + //get area of circle around node + F32 app_angle = atanf(size.length3()/dist); + F32 radius = app_angle*LLDrawable::sCurPixelAngle; + return radius*radius * F_PI; +} + void LLPipeline::grabReferences(LLCullResult& result) { sCull = &result; } +void LLPipeline::clearReferences() +{ + sCull = NULL; +} + +void check_references(LLSpatialGroup* group, LLDrawable* drawable) +{ + for (LLSpatialGroup::element_iter i = group->getData().begin(); i != group->getData().end(); ++i) + { + if (drawable == *i) + { + llerrs << "LLDrawable deleted while actively reference by LLPipeline." << llendl; + } + } +} + +void check_references(LLDrawable* drawable, LLFace* face) +{ + for (S32 i = 0; i < drawable->getNumFaces(); ++i) + { + if (drawable->getFace(i) == face) + { + llerrs << "LLFace deleted while actively referenced by LLPipeline." << llendl; + } + } +} + +void check_references(LLSpatialGroup* group, LLFace* face) +{ + for (LLSpatialGroup::element_iter i = group->getData().begin(); i != group->getData().end(); ++i) + { + LLDrawable* drawable = *i; + check_references(drawable, face); + } +} + +void LLPipeline::checkReferences(LLFace* face) +{ +#if 0 + if (sCull) + { + for (LLCullResult::sg_list_t::iterator iter = sCull->beginVisibleGroups(); iter != sCull->endVisibleGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, face); + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginAlphaGroups(); iter != sCull->endAlphaGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, face); + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginDrawableGroups(); iter != sCull->endDrawableGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, face); + } + + for (LLCullResult::drawable_list_t::iterator iter = sCull->beginVisibleList(); iter != sCull->endVisibleList(); ++iter) + { + LLDrawable* drawable = *iter; + check_references(drawable, face); + } + } +#endif +} + +void LLPipeline::checkReferences(LLDrawable* drawable) +{ +#if 0 + if (sCull) + { + for (LLCullResult::sg_list_t::iterator iter = sCull->beginVisibleGroups(); iter != sCull->endVisibleGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, drawable); + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginAlphaGroups(); iter != sCull->endAlphaGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, drawable); + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginDrawableGroups(); iter != sCull->endDrawableGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, drawable); + } + + for (LLCullResult::drawable_list_t::iterator iter = sCull->beginVisibleList(); iter != sCull->endVisibleList(); ++iter) + { + if (drawable == *iter) + { + llerrs << "LLDrawable deleted while actively referenced by LLPipeline." << llendl; + } + } + } +#endif +} + +void check_references(LLSpatialGroup* group, LLDrawInfo* draw_info) +{ + for (LLSpatialGroup::draw_map_t::iterator i = group->mDrawMap.begin(); i != group->mDrawMap.end(); ++i) + { + LLSpatialGroup::drawmap_elem_t& draw_vec = i->second; + for (LLSpatialGroup::drawmap_elem_t::iterator j = draw_vec.begin(); j != draw_vec.end(); ++j) + { + LLDrawInfo* params = *j; + if (params == draw_info) + { + llerrs << "LLDrawInfo deleted while actively referenced by LLPipeline." << llendl; + } + } + } +} + + +void LLPipeline::checkReferences(LLDrawInfo* draw_info) +{ +#if 0 + if (sCull) + { + for (LLCullResult::sg_list_t::iterator iter = sCull->beginVisibleGroups(); iter != sCull->endVisibleGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, draw_info); + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginAlphaGroups(); iter != sCull->endAlphaGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, draw_info); + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginDrawableGroups(); iter != sCull->endDrawableGroups(); ++iter) + { + LLSpatialGroup* group = *iter; + check_references(group, draw_info); + } + } +#endif +} + +void LLPipeline::checkReferences(LLSpatialGroup* group) +{ +#if 0 + if (sCull) + { + for (LLCullResult::sg_list_t::iterator iter = sCull->beginVisibleGroups(); iter != sCull->endVisibleGroups(); ++iter) + { + if (group == *iter) + { + llerrs << "LLSpatialGroup deleted while actively referenced by LLPipeline." << llendl; + } + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginAlphaGroups(); iter != sCull->endAlphaGroups(); ++iter) + { + if (group == *iter) + { + llerrs << "LLSpatialGroup deleted while actively referenced by LLPipeline." << llendl; + } + } + + for (LLCullResult::sg_list_t::iterator iter = sCull->beginDrawableGroups(); iter != sCull->endDrawableGroups(); ++iter) + { + if (group == *iter) + { + llerrs << "LLSpatialGroup deleted while actively referenced by LLPipeline." << llendl; + } + } + } +#endif +} + + BOOL LLPipeline::visibleObjectsInFrustum(LLCamera& camera) { for (LLWorld::region_list_t::const_iterator iter = LLWorld::getInstance()->getRegionList().begin(); @@ -1714,7 +1917,7 @@ void LLPipeline::markNotCulled(LLSpatialGroup* group, LLCamera& camera) } if (sMinRenderSize > 0.f && - llmax(llmax(group->mBounds[1].mV[0], group->mBounds[1].mV[1]), group->mBounds[1].mV[2]) < sMinRenderSize) + llmax(llmax(group->mBounds[1][0], group->mBounds[1][1]), group->mBounds[1][2]) < sMinRenderSize) { return; } @@ -2100,6 +2303,9 @@ void LLPipeline::shiftObjects(const LLVector3 &offset) glClear(GL_DEPTH_BUFFER_BIT); gDepthDirty = TRUE; + LLVector4a offseta; + offseta.load3(offset.mV); + for (LLDrawable::drawable_vector_t::iterator iter = mShiftList.begin(); iter != mShiftList.end(); iter++) { @@ -2108,7 +2314,7 @@ void LLPipeline::shiftObjects(const LLVector3 &offset) { continue; } - drawablep->shiftPos(offset); + drawablep->shiftPos(offseta); drawablep->clearState(LLDrawable::ON_SHIFT_LIST); } mShiftList.resize(0); @@ -2122,7 +2328,7 @@ void LLPipeline::shiftObjects(const LLVector3 &offset) LLSpatialPartition* part = region->getSpatialPartition(i); if (part) { - part->shift(offset); + part->shift(offseta); } } } @@ -2659,8 +2865,10 @@ void LLPipeline::postSort(LLCamera& camera) { if (sMinRenderSize > 0.f) { - LLVector3 bounds = (*k)->mExtents[1]-(*k)->mExtents[0]; - if (llmax(llmax(bounds.mV[0], bounds.mV[1]), bounds.mV[2]) > sMinRenderSize) + LLVector4a bounds; + bounds.setSub((*k)->mExtents[1],(*k)->mExtents[0]); + + if (llmax(llmax(bounds[0], bounds[1]), bounds[2]) > sMinRenderSize) { sCull->pushDrawInfo(j->first, *k); } @@ -6770,8 +6978,9 @@ void LLPipeline::renderDeferredLighting() } - LLVector3 center = drawablep->getPositionAgent(); - F32* c = center.mV; + LLVector4a center; + center.load3(drawablep->getPositionAgent().mV); + const F32* c = center.getF32(); F32 s = volume->getLightRadius()*1.5f; LLColor3 col = volume->getLightColor(); @@ -6787,7 +6996,9 @@ void LLPipeline::renderDeferredLighting() continue; } - if (camera->AABBInFrustumNoFarClip(center, LLVector3(s,s,s)) == 0) + LLVector4a sa; + sa.splat(s); + if (camera->AABBInFrustumNoFarClip(center, sa) == 0) { continue; } @@ -6865,8 +7076,9 @@ void LLPipeline::renderDeferredLighting() LLVOVolume* volume = drawablep->getVOVolume(); - LLVector3 center = drawablep->getPositionAgent(); - F32* c = center.mV; + LLVector4a center; + center.load3(drawablep->getPositionAgent().mV); + const F32* c = center.getF32(); F32 s = volume->getLightRadius()*1.5f; sVisibleLightCount++; @@ -8952,7 +9164,7 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar) stateSort(*LLViewerCamera::getInstance(), result); - const LLVector3* ext = avatar->mDrawable->getSpatialExtents(); + const LLVector4a* ext = avatar->mDrawable->getSpatialExtents(); LLVector3 pos(avatar->getRenderPosition()+avatar->getImpostorOffset()); LLCamera camera = *viewer_camera; @@ -8961,18 +9173,23 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar) LLVector2 tdim; - LLVector3 half_height = (ext[1]-ext[0])*0.5f; - LLVector3 left = camera.getLeftAxis(); - left *= left; - left.normalize(); + LLVector4a half_height; + half_height.setSub(ext[1], ext[0]); + half_height.mul(0.5f); + + LLVector4a left; + left.load3(camera.getLeftAxis().mV); + left.mul(left); + left.normalize3fast(); - LLVector3 up = camera.getUpAxis(); - up *= up; - up.normalize(); + LLVector4a up; + up.load3(camera.getUpAxis().mV); + up.mul(up); + up.normalize3fast(); - tdim.mV[0] = fabsf(half_height * left); - tdim.mV[1] = fabsf(half_height * up); + tdim.mV[0] = fabsf(half_height.dot3(left)); + tdim.mV[1] = fabsf(half_height.dot3(up)); glMatrixMode(GL_PROJECTION); glPushMatrix(); diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index c9384f5ba2..52f943cd1d 100644 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -217,6 +217,7 @@ public: //calculate pixel area of given box from vantage point of given camera static F32 calcPixelArea(LLVector3 center, LLVector3 size, LLCamera& camera); + static F32 calcPixelArea(const LLVector4a& center, const LLVector4a& size, LLCamera &camera); void stateSort(LLCamera& camera, LLCullResult& result); void stateSort(LLSpatialGroup* group, LLCamera& camera); @@ -229,6 +230,14 @@ public: void renderGroups(LLRenderPass* pass, U32 type, U32 mask, BOOL texture); void grabReferences(LLCullResult& result); + void clearReferences(); + + //check references will assert that there are no references in sCullResult to the provided data + void checkReferences(LLFace* face); + void checkReferences(LLDrawable* drawable); + void checkReferences(LLDrawInfo* draw_info); + void checkReferences(LLSpatialGroup* group); + void renderGeom(LLCamera& camera, BOOL forceVBOUpdate = FALSE); void renderGeomDeferred(LLCamera& camera); -- cgit v1.2.3