diff options
Diffstat (limited to 'indra/llmath')
-rw-r--r-- | indra/llmath/llcamera.cpp | 222 | ||||
-rw-r--r-- | indra/llmath/llcamera.h | 9 | ||||
-rw-r--r-- | indra/llmath/llvolume.cpp | 487 | ||||
-rw-r--r-- | indra/llmath/llvolume.h | 18 | ||||
-rw-r--r-- | indra/llmath/v3color.h | 51 | ||||
-rw-r--r-- | indra/llmath/v4color.h | 41 |
6 files changed, 344 insertions, 484 deletions
diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp index 9034182072..18d704dd0f 100644 --- a/indra/llmath/llcamera.cpp +++ b/indra/llmath/llcamera.cpp @@ -311,104 +311,6 @@ int LLCamera::sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 rad return 0; } -// HACK: This version is still around because the version below doesn't work -// unless the agent planes are initialized. -// Return 1 if sphere is in frustum, 2 if fully in frustum, otherwise 0. -// NOTE: 'center' is in absolute frame. -int LLCamera::sphereInFrustumOld(const LLVector3 &sphere_center, const F32 radius) const -{ - // Returns 1 if sphere is in frustum, 0 if not. - // modified so that default view frust is along X with Z vertical - F32 x, y, z, rightDist, leftDist, topDist, bottomDist; - - // Subtract the view position - //LLVector3 relative_center; - //relative_center = sphere_center - getOrigin(); - LLVector3 rel_center(sphere_center); - rel_center -= mOrigin; - - bool all_in = TRUE; - - // Transform relative_center.x to camera frame - x = mXAxis * rel_center; - if (x < MIN_NEAR_PLANE - radius) - { - return 0; - } - else if (x < MIN_NEAR_PLANE + radius) - { - all_in = FALSE; - } - - if (x > mFarPlane + radius) - { - return 0; - } - else if (x > mFarPlane - radius) - { - all_in = FALSE; - } - - // Transform relative_center.y to camera frame - y = mYAxis * rel_center; - - // distance to plane is the dot product of (x, y, 0) * plane_normal - rightDist = x * mLocalPlanes[PLANE_RIGHT][VX] + y * mLocalPlanes[PLANE_RIGHT][VY]; - if (rightDist < -radius) - { - return 0; - } - else if (rightDist < radius) - { - all_in = FALSE; - } - - leftDist = x * mLocalPlanes[PLANE_LEFT][VX] + y * mLocalPlanes[PLANE_LEFT][VY]; - if (leftDist < -radius) - { - return 0; - } - else if (leftDist < radius) - { - all_in = FALSE; - } - - // Transform relative_center.y to camera frame - z = mZAxis * rel_center; - - topDist = x * mLocalPlanes[PLANE_TOP][VX] + z * mLocalPlanes[PLANE_TOP][VZ]; - if (topDist < -radius) - { - return 0; - } - else if (topDist < radius) - { - all_in = FALSE; - } - - bottomDist = x * mLocalPlanes[PLANE_BOTTOM][VX] + z * mLocalPlanes[PLANE_BOTTOM][VZ]; - if (bottomDist < -radius) - { - return 0; - } - else if (bottomDist < radius) - { - all_in = FALSE; - } - - if (all_in) - { - return 2; - } - - return 1; -} - - -// HACK: This (presumably faster) version only currently works if you set up the -// frustum planes using GL. At some point we should get those planes through another -// mechanism, and then we can get rid of the "old" version above. - // Return 1 if sphere is in frustum, 2 if fully in frustum, otherwise 0. // NOTE: 'center' is in absolute frame. int LLCamera::sphereInFrustum(const LLVector3 &sphere_center, const F32 radius) const @@ -463,65 +365,6 @@ F32 LLCamera::heightInPixels(const LLVector3 ¢er, F32 radius ) const } } -// If pos is visible, return the distance from pos to the camera. -// Use fudge distance to scale rad against top/bot/left/right planes -// Otherwise, return -distance -F32 LLCamera::visibleDistance(const LLVector3 &pos, F32 rad, F32 fudgedist, U32 planemask) const -{ - if (mFixedDistance > 0) - { - return mFixedDistance; - } - LLVector3 dvec = pos - mOrigin; - // Check visibility - F32 dist = dvec.magVec(); - if (dist > rad) - { - F32 dp,tdist; - dp = dvec * mXAxis; - if (dp < -rad) - return -dist; - - rad *= fudgedist; - LLVector3 tvec(pos); - for (int p=0; p<PLANE_NUM; p++) - { - if (!(planemask & (1<<p))) - continue; - tdist = -(mWorldPlanes[p].dist(tvec)); - if (tdist > rad) - return -dist; - } - } - return dist; -} - -// Like visibleDistance, except uses mHorizPlanes[], which are left and right -// planes perpindicular to (0,0,1) in world space -F32 LLCamera::visibleHorizDistance(const LLVector3 &pos, F32 rad, F32 fudgedist, U32 planemask) const -{ - if (mFixedDistance > 0) - { - return mFixedDistance; - } - LLVector3 dvec = pos - mOrigin; - // Check visibility - F32 dist = dvec.magVec(); - if (dist > rad) - { - rad *= fudgedist; - LLVector3 tvec(pos); - for (int p=0; p<HORIZ_PLANE_NUM; p++) - { - if (!(planemask & (1<<p))) - continue; - F32 tdist = -(mHorizPlanes[p].dist(tvec)); - if (tdist > rad) - return -dist; - } - } - return dist; -} // ---------------- friends and operators ---------------- @@ -536,18 +379,6 @@ std::ostream& operator<<(std::ostream &s, const LLCamera &C) s << " Aspect = " << C.getAspect() << "\n"; s << " NearPlane = " << C.mNearPlane << "\n"; s << " FarPlane = " << C.mFarPlane << "\n"; - s << " TopPlane = " << C.mLocalPlanes[LLCamera::PLANE_TOP][VX] << " " - << C.mLocalPlanes[LLCamera::PLANE_TOP][VY] << " " - << C.mLocalPlanes[LLCamera::PLANE_TOP][VZ] << "\n"; - s << " BottomPlane = " << C.mLocalPlanes[LLCamera::PLANE_BOTTOM][VX] << " " - << C.mLocalPlanes[LLCamera::PLANE_BOTTOM][VY] << " " - << C.mLocalPlanes[LLCamera::PLANE_BOTTOM][VZ] << "\n"; - s << " LeftPlane = " << C.mLocalPlanes[LLCamera::PLANE_LEFT][VX] << " " - << C.mLocalPlanes[LLCamera::PLANE_LEFT][VY] << " " - << C.mLocalPlanes[LLCamera::PLANE_LEFT][VZ] << "\n"; - s << " RightPlane = " << C.mLocalPlanes[LLCamera::PLANE_RIGHT][VX] << " " - << C.mLocalPlanes[LLCamera::PLANE_RIGHT][VY] << " " - << C.mLocalPlanes[LLCamera::PLANE_RIGHT][VZ] << "\n"; s << "}"; return s; } @@ -675,26 +506,6 @@ void LLCamera::calcRegionFrustumPlanes(const LLVector3& shift, F32 far_clip_dist void LLCamera::calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom) { - LLVector3 a, b, c; - - // For each plane we need to define 3 points (LLVector3's) in camera view space. - // The order in which we pass the points to planeFromPoints() matters, because the - // plane normal has a degeneracy of 2; we want it pointing _into_ the frustum. - - a.setVec(0.0f, 0.0f, 0.0f); - b.setVec(mFarPlane, right, top); - c.setVec(mFarPlane, right, bottom); - mLocalPlanes[PLANE_RIGHT].setVec(a, b, c); - - c.setVec(mFarPlane, left, top); - mLocalPlanes[PLANE_TOP].setVec(a, c, b); - - b.setVec(mFarPlane, left, bottom); - mLocalPlanes[PLANE_LEFT].setVec(a, b, c); - - c.setVec(mFarPlane, right, bottom); - mLocalPlanes[PLANE_BOTTOM].setVec( a, c, b); - //calculate center and radius squared of frustum in world absolute coordinates static LLVector3 const X_AXIS(1.f, 0.f, 0.f); mFrustCenter = X_AXIS*mFarPlane*0.5f; @@ -718,39 +529,6 @@ void LLCamera::calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2) calculateFrustumPlanes(left, right, top, bottom); } -void LLCamera::calculateWorldFrustumPlanes() -{ - F32 d; - LLVector3 center = mOrigin - mXAxis*mNearPlane; - mWorldPlanePos = center; - LLVector3 pnorm; - for (int p = 0; p < PLANE_NUM; p++) - { - mLocalPlanes[p].getVector3(pnorm); - LLVector3 norm = rotateToAbsolute(pnorm); - norm.normVec(); - d = -(center * norm); - mWorldPlanes[p] = LLPlane(norm, d); - } - // horizontal planes, perpindicular to (0,0,1); - LLVector3 zaxis(0, 0, 1.0f); - F32 yaw = getYaw(); - { - LLVector3 tnorm; - mLocalPlanes[PLANE_LEFT].getVector3(tnorm); - tnorm.rotVec(yaw, zaxis); - d = -(mOrigin * tnorm); - mHorizPlanes[HORIZ_PLANE_LEFT] = LLPlane(tnorm, d); - } - { - LLVector3 tnorm; - mLocalPlanes[PLANE_RIGHT].getVector3(tnorm); - tnorm.rotVec(yaw, zaxis); - d = -(mOrigin * tnorm); - mHorizPlanes[HORIZ_PLANE_RIGHT] = LLPlane(tnorm, d); - } -} - // NOTE: this is the OpenGL matrix that will transform the default OpenGL view // (-Z=at, Y=up) to the default view of the LLCamera class (X=at, Z=up): // diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index d0afa0e88f..27eaa614c9 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -131,14 +131,10 @@ private: S32 mViewHeightInPixels; // for ViewHeightInPixels() only F32 mNearPlane; F32 mFarPlane; - LL_ALIGN_16(LLPlane mLocalPlanes[PLANE_NUM]); F32 mFixedDistance; // Always return this distance, unless < 0 LLVector3 mFrustCenter; // center of frustum and radius squared for ultra-quick exclusion test F32 mFrustRadiusSquared; - LL_ALIGN_16(LLPlane mWorldPlanes[PLANE_NUM]); - LL_ALIGN_16(LLPlane mHorizPlanes[HORIZ_PLANE_NUM]); - U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in LLVector3 mWorldPlanePos; // Position of World Planes (may be offset from camera) @@ -184,7 +180,6 @@ public: return atan2f(mXAxis[VZ], xylen); } - const LLPlane& getWorldPlane(S32 index) const { return mWorldPlanes[index]; } const LLVector3& getWorldPlanePos() const { return mWorldPlanePos; } // Copy mView, mAspect, mNearPlane, and mFarPlane to buffer. @@ -200,7 +195,6 @@ public: // Returns 1 if partly in, 2 if fully in. // NOTE: 'center' is in absolute frame. - S32 sphereInFrustumOld(const LLVector3 ¢er, const F32 radius) const; S32 sphereInFrustum(const LLVector3 ¢er, const F32 radius) const; S32 pointInFrustum(const LLVector3 &point) const { return sphereInFrustum(point, 0.0f); } S32 sphereInFrustumFull(const LLVector3 ¢er, const F32 radius) const { return sphereInFrustum(center, radius); } @@ -217,8 +211,6 @@ public: F32 heightInPixels(const LLVector3 ¢er, F32 radius ) const; // return the distance from pos to camera if visible (-distance if not visible) - F32 visibleDistance(const LLVector3 &pos, F32 rad, F32 fudgescale = 1.0f, U32 planemask = PLANE_ALL_MASK) const; - F32 visibleHorizDistance(const LLVector3 &pos, F32 rad, F32 fudgescale = 1.0f, U32 planemask = HORIZ_PLANE_ALL_MASK) const; void setFixedDistance(F32 distance) { mFixedDistance = distance; } friend std::ostream& operator<<(std::ostream &s, const LLCamera &C); @@ -227,7 +219,6 @@ protected: void calculateFrustumPlanes(); void calculateFrustumPlanes(F32 left, F32 right, F32 top, F32 bottom); void calculateFrustumPlanesFromWindow(F32 x1, F32 y1, F32 x2, F32 y2); - void calculateWorldFrustumPlanes(); } LL_ALIGN_POSTFIX(16); diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 40f7b1e9fb..91e463cc32 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -32,6 +32,7 @@ #include <stdint.h> #endif #include <cmath> +#include <unordered_map> #include "llerror.h" @@ -52,6 +53,11 @@ #include "llmeshoptimizer.h" #include "lltimer.h" +#include "mikktspace/mikktspace.h" +#include "mikktspace/mikktspace.c" // insert mikktspace implementation into llvolume object file + +#include "meshoptimizer/meshoptimizer.h" + #define DEBUG_SILHOUETTE_BINORMALS 0 #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette #define DEBUG_SILHOUETTE_EDGE_MAP 0 // DaveP: Use this to display edge map using the silhouette @@ -2093,7 +2099,9 @@ void LLVolume::regen() void LLVolume::genTangents(S32 face) { - mVolumeFaces[face].createTangents(); + // generate legacy tangents for the specified face + llassert(!isMeshAssetLoaded() || mVolumeFaces[face].mTangents != nullptr); // if this is a complete mesh asset, we should already have tangents + mVolumeFaces[face].createTangents(); } LLVolume::~LLVolume() @@ -2433,11 +2441,10 @@ bool LLVolume::unpackVolumeFacesInternal(const LLSD& mdl) LLSD::Binary pos = mdl[i]["Position"]; LLSD::Binary norm = mdl[i]["Normal"]; + LLSD::Binary tangent = mdl[i]["Tangent"]; LLSD::Binary tc = mdl[i]["TexCoord0"]; LLSD::Binary idx = mdl[i]["TriangleList"]; - - //copy out indices S32 num_indices = idx.size() / 2; const S32 indices_to_discard = num_indices % 3; @@ -2492,6 +2499,16 @@ bool LLVolume::unpackVolumeFacesInternal(const LLSD& mdl) min_tc.setValue(mdl[i]["TexCoord0Domain"]["Min"]); max_tc.setValue(mdl[i]["TexCoord0Domain"]["Max"]); + //unpack normalized scale/translation + if (mdl[i].has("NormalizedScale")) + { + face.mNormalizedScale.setValue(mdl[i]["NormalizedScale"]); + } + else + { + face.mNormalizedScale.set(1, 1, 1); + } + LLVector4a pos_range; pos_range.setSub(max_pos, min_pos); LLVector2 tc_range2 = max_tc - min_tc; @@ -2542,6 +2559,34 @@ bool LLVolume::unpackVolumeFacesInternal(const LLSD& mdl) } } +#if 0 // keep this code for now in case we decide to add support for on-the-wire tangents + { + if (!tangent.empty()) + { + face.allocateTangents(face.mNumVertices); + U16* t = (U16*)&(tangent[0]); + + // NOTE: tangents coming from the asset may not be mikkt space, but they should always be used by the GLTF shaders to + // maintain compliance with the GLTF spec + LLVector4a* t_out = face.mTangents; + + for (U32 j = 0; j < num_verts; ++j) + { + t_out->set((F32)t[0], (F32)t[1], (F32)t[2], (F32) t[3]); + t_out->div(65535.f); + t_out->mul(2.f); + t_out->sub(1.f); + + F32* tp = t_out->getF32ptr(); + tp[3] = tp[3] < 0.f ? -1.f : 1.f; + + t_out++; + t += 4; + } + } + } +#endif + { if (!tc.empty()) { @@ -2745,7 +2790,7 @@ bool LLVolume::unpackVolumeFacesInternal(const LLSD& mdl) } } - if (!cacheOptimize()) + if (!cacheOptimize(true)) { // Out of memory? LL_WARNS() << "Failed to optimize!" << LL_ENDL; @@ -2786,11 +2831,11 @@ void LLVolume::copyVolumeFaces(const LLVolume* volume) mSculptLevel = 0; } -bool LLVolume::cacheOptimize() +bool LLVolume::cacheOptimize(bool gen_tangents) { for (S32 i = 0; i < mVolumeFaces.size(); ++i) { - if (!mVolumeFaces[i].cacheOptimize()) + if (!mVolumeFaces[i].cacheOptimize(gen_tangents)) { return false; } @@ -3306,12 +3351,12 @@ BOOL LLVolume::isFlat(S32 face) bool LLVolumeParams::isSculpt() const { - return mSculptID.notNull(); + return (mSculptType & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_NONE; } bool LLVolumeParams::isMeshSculpt() const { - return isSculpt() && ((mSculptType & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH); + return (mSculptType & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH; } bool LLVolumeParams::operator==(const LLVolumeParams ¶ms) const @@ -3726,6 +3771,7 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) { //attempt to approximate the number of triangles that will result from generating a volume LoD set for the //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost + LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME; F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; for (S32 i = 0; i < 4; i++) { @@ -4073,7 +4119,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en { if (tangent_out != NULL) // if the caller wants tangents, we may need to generate them { - genTangents(i); + genTangents(i); } if (isUnique()) @@ -4861,6 +4907,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) } mOptimized = src.mOptimized; + mNormalizedScale = src.mNormalizedScale; //delete return *this; @@ -5383,256 +5430,218 @@ public: } }; +// data structures for tangent generation -bool LLVolumeFace::cacheOptimize() -{ //optimize for vertex cache according to Forsyth method: - // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html - - llassert(!mOptimized); - mOptimized = TRUE; +struct MikktData +{ + LLVolumeFace* face; + std::vector<LLVector3> p; + std::vector<LLVector3> n; + std::vector<LLVector2> tc; + std::vector<LLVector4> w; + std::vector<LLVector4> t; - LLVCacheLRU cache; - - if (mNumVertices < 3 || mNumIndices < 3) - { //nothing to do - return true; - } + MikktData(LLVolumeFace* f) + : face(f) + { + U32 count = face->mNumIndices; - //mapping of vertices to triangles and indices - std::vector<LLVCacheVertexData> vertex_data; + p.resize(count); + n.resize(count); + tc.resize(count); + t.resize(count); - //mapping of triangles do vertices - std::vector<LLVCacheTriangleData> triangle_data; + if (face->mWeights) + { + w.resize(count); + } - try - { - triangle_data.resize(mNumIndices / 3); - vertex_data.resize(mNumVertices); - for (U32 i = 0; i < mNumIndices; i++) - { //populate vertex data and triangle data arrays - U16 idx = mIndices[i]; - U32 tri_idx = i / 3; + LLVector3 inv_scale(1.f / face->mNormalizedScale.mV[0], 1.f / face->mNormalizedScale.mV[1], 1.f / face->mNormalizedScale.mV[2]); + - if (idx >= mNumVertices) + for (int i = 0; i < face->mNumIndices; ++i) + { + U32 idx = face->mIndices[i]; + + p[i].set(face->mPositions[idx].getF32ptr()); + p[i].scaleVec(face->mNormalizedScale); //put mesh in original coordinate frame when reconstructing tangents + n[i].set(face->mNormals[idx].getF32ptr()); + n[i].scaleVec(inv_scale); + n[i].normalize(); + tc[i].set(face->mTexCoords[idx]); + + if (idx >= face->mNumVertices) { // invalid index // replace with a valid index to avoid crashes - idx = mNumVertices - 1; - mIndices[i] = idx; + idx = face->mNumVertices - 1; + face->mIndices[i] = idx; // Needs better logging LL_DEBUGS_ONCE("LLVOLUME") << "Invalid index, substituting" << LL_ENDL; } - vertex_data[idx].mTriangles.push_back(&(triangle_data[tri_idx])); - vertex_data[idx].mIdx = idx; - triangle_data[tri_idx].mVertex[i % 3] = &(vertex_data[idx]); + if (face->mWeights) + { + w[i].set(face->mWeights[idx].getF32ptr()); + } } } - catch (std::bad_alloc&) - { - // resize or push_back failed - LL_WARNS("LLVOLUME") << "Resize for " << mNumVertices << " vertices failed" << LL_ENDL; - return false; - } +}; - /*F32 pre_acmr = 1.f; - //measure cache misses from before rebuild - { - LLVCacheFIFO test_cache; - for (U32 i = 0; i < mNumIndices; ++i) - { - test_cache.addVertex(&vertex_data[mIndices[i]]); - } - for (U32 i = 0; i < mNumVertices; i++) - { - vertex_data[i].mCacheTag = -1; - } +bool LLVolumeFace::cacheOptimize(bool gen_tangents) +{ //optimize for vertex cache according to Forsyth method: + LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME; + llassert(!mOptimized); + mOptimized = TRUE; - pre_acmr = (F32) test_cache.mMisses/(mNumIndices/3); - }*/ + if (gen_tangents && mNormals && mTexCoords) + { // generate mikkt space tangents before cache optimizing since the index buffer may change + // a bit of a hack to do this here, but this function gets called exactly once for the lifetime of a mesh + // and is executed on a background thread + SMikkTSpaceInterface ms; - for (U32 i = 0; i < mNumVertices; i++) - { //initialize score values (no cache -- might try a fifo cache here) - LLVCacheVertexData& data = vertex_data[i]; + ms.m_getNumFaces = [](const SMikkTSpaceContext* pContext) + { + MikktData* data = (MikktData*)pContext->m_pUserData; + LLVolumeFace* face = data->face; + return face->mNumIndices / 3; + }; - data.mScore = find_vertex_score(data); - data.mActiveTriangles = data.mTriangles.size(); + ms.m_getNumVerticesOfFace = [](const SMikkTSpaceContext* pContext, const int iFace) + { + return 3; + }; - for (U32 j = 0; j < data.mActiveTriangles; ++j) - { - data.mTriangles[j]->mScore += data.mScore; - } - } + ms.m_getPosition = [](const SMikkTSpaceContext* pContext, float fvPosOut[], const int iFace, const int iVert) + { + MikktData* data = (MikktData*)pContext->m_pUserData; + F32* v = data->p[iFace * 3 + iVert].mV; + fvPosOut[0] = v[0]; + fvPosOut[1] = v[1]; + fvPosOut[2] = v[2]; + }; + + ms.m_getNormal = [](const SMikkTSpaceContext* pContext, float fvNormOut[], const int iFace, const int iVert) + { + MikktData* data = (MikktData*)pContext->m_pUserData; + F32* n = data->n[iFace * 3 + iVert].mV; + fvNormOut[0] = n[0]; + fvNormOut[1] = n[1]; + fvNormOut[2] = n[2]; + }; + + ms.m_getTexCoord = [](const SMikkTSpaceContext* pContext, float fvTexcOut[], const int iFace, const int iVert) + { + MikktData* data = (MikktData*)pContext->m_pUserData; + F32* tc = data->tc[iFace * 3 + iVert].mV; + fvTexcOut[0] = tc[0]; + fvTexcOut[1] = tc[1]; + }; - //sort triangle data by score - std::sort(triangle_data.begin(), triangle_data.end()); + ms.m_setTSpaceBasic = [](const SMikkTSpaceContext* pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert) + { + MikktData* data = (MikktData*)pContext->m_pUserData; + S32 i = iFace * 3 + iVert; + + data->t[i].set(fvTangent); + data->t[i].mV[3] = fSign; + }; - std::vector<U16> new_indices; + ms.m_setTSpace = nullptr; - LLVCacheTriangleData* tri; + MikktData data(this); - //prime pump by adding first triangle to cache; - tri = &(triangle_data[0]); - cache.addTriangle(tri); - new_indices.push_back(tri->mVertex[0]->mIdx); - new_indices.push_back(tri->mVertex[1]->mIdx); - new_indices.push_back(tri->mVertex[2]->mIdx); - tri->complete(); + SMikkTSpaceContext ctx = { &ms, &data }; - U32 breaks = 0; - for (U32 i = 1; i < mNumIndices/3; ++i) - { - cache.updateScores(); - tri = cache.mBestTriangle; - if (!tri) - { - breaks++; - for (U32 j = 0; j < triangle_data.size(); ++j) - { - if (triangle_data[j].mActive) - { - tri = &(triangle_data[j]); - break; - } - } - } - - cache.addTriangle(tri); - new_indices.push_back(tri->mVertex[0]->mIdx); - new_indices.push_back(tri->mVertex[1]->mIdx); - new_indices.push_back(tri->mVertex[2]->mIdx); - tri->complete(); - } + genTangSpaceDefault(&ctx); - for (U32 i = 0; i < mNumIndices; ++i) - { - mIndices[i] = new_indices[i]; - } + //re-weld + meshopt_Stream mos[] = + { + { &data.p[0], sizeof(LLVector3), sizeof(LLVector3) }, + { &data.n[0], sizeof(LLVector3), sizeof(LLVector3) }, + { &data.t[0], sizeof(LLVector4), sizeof(LLVector4) }, + { &data.tc[0], sizeof(LLVector2), sizeof(LLVector2) }, + { data.w.empty() ? nullptr : &data.w[0], sizeof(LLVector4), sizeof(LLVector4) } + }; - /*F32 post_acmr = 1.f; - //measure cache misses from after rebuild - { - LLVCacheFIFO test_cache; - for (U32 i = 0; i < mNumVertices; i++) - { - vertex_data[i].mCacheTag = -1; - } + std::vector<U32> remap; + remap.resize(data.p.size()); - for (U32 i = 0; i < mNumIndices; ++i) - { - test_cache.addVertex(&vertex_data[mIndices[i]]); - } - - post_acmr = (F32) test_cache.mMisses/(mNumIndices/3); - }*/ + U32 stream_count = data.w.empty() ? 4 : 5; - //optimize for pre-TnL cache - - //allocate space for new buffer - S32 num_verts = mNumVertices; - S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc<64>(sizeof(LLVector4a)*2*num_verts+size); - if (pos == NULL) - { - LL_WARNS("LLVOLUME") << "Allocation of positions vector[" << sizeof(LLVector4a) * 2 * num_verts + size << "] failed. " << LL_ENDL; - return false; - } - LLVector4a* norm = pos + num_verts; - LLVector2* tc = (LLVector2*) (norm + num_verts); + U32 vert_count = meshopt_generateVertexRemapMulti(&remap[0], nullptr, data.p.size(), data.p.size(), mos, stream_count); - LLVector4a* wght = NULL; - if (mWeights) - { - wght = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - if (wght == NULL) - { - ll_aligned_free<64>(pos); - LL_WARNS("LLVOLUME") << "Allocation of weights[" << sizeof(LLVector4a) * num_verts << "] failed" << LL_ENDL; - return false; - } - } + if (vert_count < 65535) + { + std::vector<U32> indices; + indices.resize(mNumIndices); - LLVector4a* binorm = NULL; - if (mTangents) - { - binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - if (binorm == NULL) - { - ll_aligned_free<64>(pos); - ll_aligned_free_16(wght); - LL_WARNS("LLVOLUME") << "Allocation of binormals[" << sizeof(LLVector4a)*num_verts << "] failed" << LL_ENDL; - return false; - } - } + //copy results back into volume + resizeVertices(vert_count); - //allocate mapping of old indices to new indices - std::vector<S32> new_idx; + if (!data.w.empty()) + { + allocateWeights(vert_count); + } - try - { - new_idx.resize(mNumVertices, -1); - } - catch (std::bad_alloc&) - { - ll_aligned_free<64>(pos); - ll_aligned_free_16(wght); - ll_aligned_free_16(binorm); - LL_WARNS("LLVOLUME") << "Resize failed: " << mNumVertices << LL_ENDL; - return false; - } + allocateTangents(mNumVertices); - S32 cur_idx = 0; - for (U32 i = 0; i < mNumIndices; ++i) - { - U16 idx = mIndices[i]; - if (new_idx[idx] == -1) - { //this vertex hasn't been added yet - new_idx[idx] = cur_idx; + for (int i = 0; i < mNumIndices; ++i) + { + U32 src_idx = i; + U32 dst_idx = remap[i]; + mIndices[i] = dst_idx; - //copy vertex data - pos[cur_idx] = mPositions[idx]; - norm[cur_idx] = mNormals[idx]; - tc[cur_idx] = mTexCoords[idx]; - if (mWeights) - { - wght[cur_idx] = mWeights[idx]; - } - if (mTangents) - { - binorm[cur_idx] = mTangents[idx]; - } + mPositions[dst_idx].load3(data.p[src_idx].mV); + mNormals[dst_idx].load3(data.n[src_idx].mV); + mTexCoords[dst_idx] = data.tc[src_idx]; - cur_idx++; - } - } + mTangents[dst_idx].loadua(data.t[src_idx].mV); - for (U32 i = 0; i < mNumIndices; ++i) - { - mIndices[i] = new_idx[mIndices[i]]; - } - - ll_aligned_free<64>(mPositions); - // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer - ll_aligned_free_16(mWeights); - ll_aligned_free_16(mTangents); -#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS - ll_aligned_free_16(mJointIndices); - ll_aligned_free_16(mJustWeights); - mJustWeights = NULL; - mJointIndices = NULL; // filled in later as necessary by skinning code for acceleration -#endif + if (mWeights) + { + mWeights[dst_idx].loadua(data.w[src_idx].mV); + } + } + } + else + { + // blew past the max vertex size limit, use legacy tangent generation which never adds verts + createTangents(); + } - mPositions = pos; - mNormals = norm; - mTexCoords = tc; - mWeights = wght; - mTangents = binorm; + // put back in normalized coordinate frame + LLVector4a inv_scale(1.f/mNormalizedScale.mV[0], 1.f / mNormalizedScale.mV[1], 1.f / mNormalizedScale.mV[2]); + LLVector4a scale; + scale.load3(mNormalizedScale.mV); + scale.getF32ptr()[3] = 1.f; + + for (int i = 0; i < mNumVertices; ++i) + { + mPositions[i].mul(inv_scale); + mNormals[i].mul(scale); + mNormals[i].normalize3(); + F32 w = mTangents[i].getF32ptr()[3]; + mTangents[i].mul(scale); + mTangents[i].normalize3(); + mTangents[i].getF32ptr()[3] = w; + } + } - //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); - //LL_INFOS() << result << LL_ENDL; + // cache optimize index buffer + + // meshopt needs scratch space, do some pointer shuffling to avoid an extra index buffer copy + U16* src_indices = mIndices; + mIndices = nullptr; + resizeIndices(mNumIndices); + + meshopt_optimizeVertexCache<U16>(mIndices, src_indices, mNumIndices, mNumVertices); + + ll_aligned_free_16(src_indices); return true; } @@ -6442,35 +6451,31 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe void LLVolumeFace::createTangents() { - LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME + LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME; - if (!mTangents) - { - allocateTangents(mNumVertices); - - //generate tangents - //LLVector4a* pos = mPositions; - //LLVector2* tc = (LLVector2*) mTexCoords; - LLVector4a* binorm = (LLVector4a*) mTangents; + if (!mTangents) + { + allocateTangents(mNumVertices); + + //generate tangents + LLVector4a* ptr = (LLVector4a*)mTangents; - LLVector4a* end = mTangents+mNumVertices; - while (binorm < end) - { - (*binorm++).clear(); - } + LLVector4a* end = mTangents + mNumVertices; + while (ptr < end) + { + (*ptr++).clear(); + } - binorm = mTangents; + CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices / 3, mIndices, mTangents); - CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); + //normalize normals + for (U32 i = 0; i < mNumVertices; i++) + { + //bump map/planar projection code requires normals to be normalized + mNormals[i].normalize3fast(); + } + } - //normalize tangents - for (U32 i = 0; i < mNumVertices; i++) - { - //binorm[i].normalize3fast(); - //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast(); - } - } } void LLVolumeFace::resizeVertices(S32 num_verts) diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1509241623..ad6a669531 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -908,7 +908,7 @@ public: void remap(); void optimize(F32 angle_cutoff = 2.f); - bool cacheOptimize(); + bool cacheOptimize(bool gen_tangents = false); void createOctree(F32 scaler = 0.25f, const LLVector4a& center = LLVector4a(0,0,0), const LLVector4a& size = LLVector4a(0.5f,0.5f,0.5f)); void destroyOctree(); @@ -960,10 +960,6 @@ public: // indexes for mPositions/mNormals/mTexCoords U16* mIndices; - // vertex buffer filled in by LLFace to cache this volume face geometry in vram - // (declared as a LLPointer to LLRefCount to avoid dependency on LLVertexBuffer) - mutable LLPointer<LLRefCount> mVertexBuffer; - std::vector<S32> mEdge; //list of skin weights for rigged volumes @@ -985,6 +981,11 @@ public: //whether or not face has been cache optimized BOOL mOptimized; + // if this is a mesh asset, scale and translation that were applied + // when encoding the source mesh into a unit cube + // used for regenerating tangents + LLVector3 mNormalizedScale = LLVector3(1,1,1); + private: LLOctreeNode<LLVolumeTriangle, LLVolumeTriangle*>* mOctree; LLVolumeTriangle* mOctreeTriangles; @@ -1033,7 +1034,7 @@ public: void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } void regen(); - void genTangents(S32 face); + void genTangents(S32 face); BOOL isConvex() const; BOOL isCap(S32 face); @@ -1087,7 +1088,10 @@ public: void copyVolumeFaces(const LLVolume* volume); void copyFacesTo(std::vector<LLVolumeFace> &faces) const; void copyFacesFrom(const std::vector<LLVolumeFace> &faces); - bool cacheOptimize(); + + // use meshoptimizer to optimize index buffer for vertex shader cache + // gen_tangents - if true, generate MikkTSpace tangents if needed before optimizing index buffer + bool cacheOptimize(bool gen_tangents = false); private: void sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data, U8 sculpt_type); diff --git a/indra/llmath/v3color.h b/indra/llmath/v3color.h index 43a632408c..d925f56e97 100644 --- a/indra/llmath/v3color.h +++ b/indra/llmath/v3color.h @@ -33,6 +33,7 @@ class LLVector4; #include "llerror.h" #include "llmath.h" #include "llsd.h" +#include "v3math.h" // needed for linearColor3v implemtation below #include <string.h> // LLColor3 = |r g b| @@ -87,6 +88,16 @@ public: const LLColor3& set(F32 x, F32 y, F32 z); // Sets LLColor3 to (x, y, z) const LLColor3& set(const LLColor3 &vec); // Sets LLColor3 to vec const LLColor3& set(const F32 *vec); // Sets LLColor3 to vec + + // set from a vector of unknown type and size + // may leave some data unmodified + template<typename T> + const LLColor3& set(const std::vector<T>& v); + + // write to a vector of unknown type and size + // maye leave some data unmodified + template<typename T> + void write(std::vector<T>& v) const; F32 magVec() const; // deprecated F32 magVecSquared() const; // deprecated @@ -484,13 +495,45 @@ inline const LLColor3 srgbColor3(const LLColor3 &a) { return srgbColor; } -inline const LLColor3 linearColor3(const LLColor3 &a) { +inline const LLColor3 linearColor3p(const F32* v) { LLColor3 linearColor; - linearColor.mV[0] = sRGBtoLinear(a.mV[0]); - linearColor.mV[1] = sRGBtoLinear(a.mV[1]); - linearColor.mV[2] = sRGBtoLinear(a.mV[2]); + linearColor.mV[0] = sRGBtoLinear(v[0]); + linearColor.mV[1] = sRGBtoLinear(v[1]); + linearColor.mV[2] = sRGBtoLinear(v[2]); return linearColor; } +template<class T> +inline const LLColor3 linearColor3(const T& a) { + return linearColor3p(a.mV); +} + +template<class T> +inline const LLVector3 linearColor3v(const T& a) { + return LLVector3(linearColor3p(a.mV).mV); +} + +template<typename T> +const LLColor3& LLColor3::set(const std::vector<T>& v) +{ + for (S32 i = 0; i < llmin((S32)v.size(), 3); ++i) + { + mV[i] = v[i]; + } + + return *this; +} + +// write to a vector of unknown type and size +// maye leave some data unmodified +template<typename T> +void LLColor3::write(std::vector<T>& v) const +{ + for (int i = 0; i < llmin((S32)v.size(), 3); ++i) + { + v[i] = mV[i]; + } +} + #endif diff --git a/indra/llmath/v4color.h b/indra/llmath/v4color.h index 175edf1471..daa61594fb 100644 --- a/indra/llmath/v4color.h +++ b/indra/llmath/v4color.h @@ -88,8 +88,18 @@ class LLColor4 const LLColor4& set(const LLColor3 &vec); // Sets LLColor4 to LLColor3 vec (no change in alpha) const LLColor4& set(const LLColor3 &vec, F32 a); // Sets LLColor4 to LLColor3 vec, with alpha specified const LLColor4& set(const F32 *vec); // Sets LLColor4 to vec - const LLColor4& set(const LLColor4U& color4u); // Sets LLColor4 to color4u, rescaled. + const LLColor4& set(const F64 *vec); // Sets LLColor4 to (double)vec + const LLColor4& set(const LLColor4U& color4u); // Sets LLColor4 to color4u, rescaled. + // set from a vector of unknown type and size + // may leave some data unmodified + template<typename T> + const LLColor4& set(const std::vector<T>& v); + + // write to a vector of unknown type and size + // maye leave some data unmodified + template<typename T> + void write(std::vector<T>& v) const; const LLColor4& setAlpha(F32 a); @@ -334,6 +344,15 @@ inline const LLColor4& LLColor4::set(const F32 *vec) return (*this); } +inline const LLColor4& LLColor4::set(const F64 *vec) +{ + mV[VX] = static_cast<F32>(vec[VX]); + mV[VY] = static_cast<F32>(vec[VY]); + mV[VZ] = static_cast<F32>(vec[VZ]); + mV[VW] = static_cast<F32>(vec[VW]); + return (*this); +} + // deprecated inline const LLColor4& LLColor4::setVec(F32 x, F32 y, F32 z) { @@ -680,5 +699,25 @@ inline const LLColor4 linearColor4(const LLColor4 &a) return linearColor; } +template<typename T> +const LLColor4& LLColor4::set(const std::vector<T>& v) +{ + for (S32 i = 0; i < llmin((S32)v.size(), 4); ++i) + { + mV[i] = v[i]; + } + + return *this; +} + +template<typename T> +void LLColor4::write(std::vector<T>& v) const +{ + for (int i = 0; i < llmin((S32)v.size(), 4); ++i) + { + v[i] = mV[i]; + } +} + #endif |