1 files changed, 462 insertions, 307 deletions
diff --git a/indra/newview/llmodelpreview.cpp b/indra/newview/llmodelpreview.cpp
index a9e80ab5da..5d81d2c9b3 100644
--- a/indra/newview/llmodelpreview.cpp
+++ b/indra/newview/llmodelpreview.cpp
@@ -41,6 +41,7 @@
 #include "lliconctrl.h"
 #include "llmatrix4a.h"
 #include "llmeshrepository.h"
+#include "llmeshoptimizer.h"
 #include "llrender.h"
 #include "llsdutil_math.h"
 #include "llskinningutil.h"
@@ -66,7 +67,6 @@
 #include "lltabcontainer.h"
 #include "lltextbox.h"
 
-#include "glod/glod.h"
 #include <boost/algorithm/string.hpp>
 
 bool LLModelPreview::sIgnoreLoadedCallback = false;
@@ -89,19 +89,6 @@ static const F32 PREVIEW_ZOOM_LIMIT(10.f);
 
 const F32 SKIN_WEIGHT_CAMERA_DISTANCE = 16.f;
 
-BOOL stop_gloderror()
-{
-    GLuint error = glodGetError();
-
-    if (error != GLOD_NO_ERROR)
-    {
-        LL_WARNS() << "GLOD error detected, cannot generate LOD: " << std::hex << error << LL_ENDL;
-        return TRUE;
-    }
-
-    return FALSE;
-}
-
 LLViewerFetchedTexture* bindMaterialDiffuseTexture(const LLImportMaterial& material)
 {
     LLViewerFetchedTexture *texture = LLViewerTextureManager::getFetchedTexture(material.getDiffuseMap(), FTT_DEFAULT, TRUE, LLGLTexture::BOOST_PREVIEW);
@@ -193,6 +180,7 @@ LLModelPreview::LLModelPreview(S32 width, S32 height, LLFloater* fmp)
     mPreviewLOD = 0;
     mModelLoader = NULL;
     mMaxTriangleLimit = 0;
+    mMinTriangleLimit = 0;
     mDirty = false;
     mGenLOD = false;
     mLoading = false;
@@ -200,10 +188,6 @@ LLModelPreview::LLModelPreview(S32 width, S32 height, LLFloater* fmp)
     mLoadState = LLModelLoader::STARTING;
     mGroup = 0;
     mLODFrozen = false;
-    mBuildShareTolerance = 0.f;
-    mBuildQueueMode = GLOD_QUEUE_GREEDY;
-    mBuildBorderMode = GLOD_BORDER_UNLOCK;
-    mBuildOperator = GLOD_OPERATOR_EDGE_COLLAPSE;
 
     for (U32 i = 0; i < LLModel::NUM_LODS; ++i)
     {
@@ -211,10 +195,6 @@ LLModelPreview::LLModelPreview(S32 width, S32 height, LLFloater* fmp)
         mRequestedCreaseAngle[i] = -1.f;
         mRequestedLoDMode[i] = 0;
         mRequestedErrorThreshold[i] = 0.f;
-        mRequestedBuildOperator[i] = 0;
-        mRequestedQueueMode[i] = 0;
-        mRequestedBorderMode[i] = 0;
-        mRequestedShareTolerance[i] = 0.f;
     }
 
     mViewOption["show_textures"] = false;
@@ -224,23 +204,11 @@ LLModelPreview::LLModelPreview(S32 width, S32 height, LLFloater* fmp)
     mHasPivot = false;
     mModelPivot = LLVector3(0.0f, 0.0f, 0.0f);
 
-    glodInit();
-
     createPreviewAvatar();
 }
 
 LLModelPreview::~LLModelPreview()
 {
-    // glod apparently has internal mem alignment issues that are angering
-    // the heap-check code in windows, these should be hunted down in that
-    // TP code, if possible
-    //
-    // kernel32.dll!HeapFree()  + 0x14 bytes	
-    // msvcr100.dll!free(void * pBlock)  Line 51	C
-    // glod.dll!glodGetGroupParameteriv()  + 0x119 bytes	
-    // glod.dll!glodShutdown()  + 0x77 bytes	
-    //
-    //glodShutdown();
     if (mModelLoader)
     {
         mModelLoader->shutdown();
@@ -591,7 +559,7 @@ void LLModelPreview::rebuildUploadData()
                 bool upload_skinweights = fmp && fmp->childGetValue("upload_skin").asBoolean();
                 if (upload_skinweights && high_lod_model->mSkinInfo.mJointNames.size() > 0)
                 {
-                    LLQuaternion bind_rot = LLSkinningUtil::getUnscaledQuaternion(high_lod_model->mSkinInfo.mBindShapeMatrix);
+                    LLQuaternion bind_rot = LLSkinningUtil::getUnscaledQuaternion(LLMatrix4(high_lod_model->mSkinInfo.mBindShapeMatrix));
                     LLQuaternion identity;
                     if (!bind_rot.isEqualEps(identity, 0.01))
                     {
@@ -826,11 +794,6 @@ void LLModelPreview::loadModel(std::string filename, S32 lod, bool force_disable
 
     mLODFile[lod] = filename;
 
-    if (lod == LLModel::LOD_HIGH)
-    {
-        clearGLODGroup();
-    }
-
     std::map<std::string, std::string> joint_alias_map;
     getJointAliases(joint_alias_map);
 
@@ -931,7 +894,6 @@ void LLModelPreview::clearIncompatible(S32 lod)
                 if (i == LLModel::LOD_HIGH)
                 {
                     mBaseModel = mModel[lod];
-                    clearGLODGroup();
                     mBaseScene = mScene[lod];
                     mVertexBuffer[5].clear();
                 }
@@ -940,23 +902,6 @@ void LLModelPreview::clearIncompatible(S32 lod)
     }
 }
 
-void LLModelPreview::clearGLODGroup()
-{
-    if (mGroup)
-    {
-        for (std::map<LLPointer<LLModel>, U32>::iterator iter = mObject.begin(); iter != mObject.end(); ++iter)
-        {
-            glodDeleteObject(iter->second);
-            stop_gloderror();
-        }
-        mObject.clear();
-
-        glodDeleteGroup(mGroup);
-        stop_gloderror();
-        mGroup = 0;
-    }
-}
-
 void LLModelPreview::loadModelCallback(S32 loaded_lod)
 {
     assert_main_thread();
@@ -1108,7 +1053,6 @@ void LLModelPreview::loadModelCallback(S32 loaded_lod)
             }
 
             mBaseModel = mModel[loaded_lod];
-            clearGLODGroup();
 
             mBaseScene = mScene[loaded_lod];
             mVertexBuffer[5].clear();
@@ -1339,240 +1283,442 @@ void LLModelPreview::restoreNormals()
     updateStatusMessages();
 }
 
-void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_limit)
+// Runs per object, but likely it is a better way to run per model+submodels
+// returns a ratio of base model indices to resulting indices
+// returns -1 in case of failure
+F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *target_model, F32 indices_decimator, F32 error_threshold, bool sloppy)
 {
-    // Allow LoD from -1 to LLModel::LOD_PHYSICS
-    if (which_lod < -1 || which_lod > LLModel::NUM_LODS - 1)
+    // Figure out buffer size
+    S32 size_indices = 0;
+    S32 size_vertices = 0;
+
+    for (U32 face_idx = 0; face_idx < base_model->getNumVolumeFaces(); ++face_idx)
     {
-        std::ostringstream out;
-        out << "Invalid level of detail: " << which_lod;
-        LL_WARNS() << out.str() << LL_ENDL;
-        LLFloaterModelPreview::addStringToLog(out, false);
-        assert(which_lod >= -1 && which_lod < LLModel::NUM_LODS);
-        return;
+        const LLVolumeFace &face = base_model->getVolumeFace(face_idx);
+        size_indices += face.mNumIndices;
+        size_vertices += face.mNumVertices;
     }
 
-    if (mBaseModel.empty())
+    // Allocate buffers, note that we are using U32 buffer instead of U16
+    U32* combined_indices = (U32*)ll_aligned_malloc_32(size_indices * sizeof(U32));
+    U32* output_indices = (U32*)ll_aligned_malloc_32(size_indices * sizeof(U32));
+
+    // extra space for normals and text coords
+    S32 tc_bytes_size = ((size_vertices * sizeof(LLVector2)) + 0xF) & ~0xF;
+    LLVector4a* combined_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_vertices + tc_bytes_size);
+    LLVector4a* combined_normals = combined_positions + size_vertices;
+    LLVector2* combined_tex_coords = (LLVector2*)(combined_normals + size_vertices);
+
+    // copy indices and vertices into new buffers
+    S32 combined_positions_shift = 0;
+    S32 indices_idx_shift = 0;
+    S32 combined_indices_shift = 0;
+    for (U32 face_idx = 0; face_idx < base_model->getNumVolumeFaces(); ++face_idx)
     {
-        return;
+        const LLVolumeFace &face = base_model->getVolumeFace(face_idx);
+
+        // vertices
+        S32 copy_bytes = face.mNumVertices * sizeof(LLVector4a);
+        LLVector4a::memcpyNonAliased16((F32*)(combined_positions + combined_positions_shift), (F32*)face.mPositions, copy_bytes);
+
+        // normals
+        LLVector4a::memcpyNonAliased16((F32*)(combined_normals + combined_positions_shift), (F32*)face.mNormals, copy_bytes);
+
+        // tex coords
+        copy_bytes = face.mNumVertices * sizeof(LLVector2);
+        memcpy((void*)(combined_tex_coords + combined_positions_shift), (void*)face.mTexCoords, copy_bytes);
+
+        combined_positions_shift += face.mNumVertices;
+
+        // indices, sadly can't do dumb memcpy for indices, need to adjust each value
+        for (S32 i = 0; i < face.mNumIndices; ++i)
+        {
+            U16 idx = face.mIndices[i];
+
+            combined_indices[combined_indices_shift] = idx + indices_idx_shift;
+            combined_indices_shift++;
+        }
+        indices_idx_shift += face.mNumVertices;
     }
 
-    LLVertexBuffer::unbind();
+    // Now that we have buffers, optimize
+    S32 target_indices = 0;
+    F32 result_code = 0; // how far from original the model is, 1 == 100%
+    S32 new_indices = 0;
+
+    target_indices = llmax(3, llfloor(size_indices / indices_decimator)); // leave at least one triangle
+    new_indices = LLMeshOptimizer::simplifyU32(
+        output_indices,
+        combined_indices,
+        size_indices,
+        combined_positions,
+        size_vertices,
+        LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_VERTEX],
+        target_indices,
+        error_threshold,
+        sloppy,
+        &result_code);
 
-    bool no_ff = LLGLSLShader::sNoFixedFunction;
-    LLGLSLShader* shader = LLGLSLShader::sCurBoundShaderPtr;
-    LLGLSLShader::sNoFixedFunction = false;
 
-    if (shader)
+    if (result_code < 0)
     {
-        shader->unbind();
+        LL_WARNS() << "Negative result code from meshoptimizer for model " << target_model->mLabel
+            << " target Indices: " << target_indices
+            << " new Indices: " << new_indices
+            << " original count: " << size_indices << LL_ENDL;
     }
 
-    stop_gloderror();
-    static U32 cur_name = 1;
+    // repack back into individual faces
 
-    S32 limit = -1;
+    LLVector4a* buffer_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_vertices + tc_bytes_size);
+    LLVector4a* buffer_normals = buffer_positions + size_vertices;
+    LLVector2* buffer_tex_coords = (LLVector2*)(buffer_normals + size_vertices);
+    S32 buffer_idx_size = (size_indices * sizeof(U16) + 0xF) & ~0xF;
+    U16* buffer_indices = (U16*)ll_aligned_malloc_16(buffer_idx_size);
+    S32* old_to_new_positions_map = new S32[size_vertices];
 
-    U32 triangle_count = 0;
+    S32 buf_positions_copied = 0;
+    S32 buf_indices_copied = 0;
+    indices_idx_shift = 0;
+    S32 valid_faces = 0;
 
-    U32 instanced_triangle_count = 0;
-
-    //get the triangle count for the whole scene
-    for (LLModelLoader::scene::iterator iter = mBaseScene.begin(), endIter = mBaseScene.end(); iter != endIter; ++iter)
+    // Crude method to copy indices back into face
+    for (U32 face_idx = 0; face_idx < base_model->getNumVolumeFaces(); ++face_idx)
     {
-        for (LLModelLoader::model_instance_list::iterator instance = iter->second.begin(), end_instance = iter->second.end(); instance != end_instance; ++instance)
+        const LLVolumeFace &face = base_model->getVolumeFace(face_idx);
+
+        // reset data for new run
+        buf_positions_copied = 0;
+        buf_indices_copied = 0;
+        bool copy_triangle = false;
+        S32 range = indices_idx_shift + face.mNumVertices;
+
+        for (S32 i = 0; i < size_vertices; i++)
         {
-            LLModel* mdl = instance->mModel;
-            if (mdl)
+            old_to_new_positions_map[i] = -1;
+        }
+
+        // Copy relevant indices and vertices
+        for (S32 i = 0; i < new_indices; ++i)
+        {
+            U32 idx = output_indices[i];
+
+            if ((i % 3) == 0)
             {
-                instanced_triangle_count += mdl->getNumTriangles();
+                copy_triangle = idx >= indices_idx_shift && idx < range;
             }
-        }
-    }
 
-    //get the triangle count for the non-instanced set of models
-    for (U32 i = 0; i < mBaseModel.size(); ++i)
-    {
-        triangle_count += mBaseModel[i]->getNumTriangles();
-    }
+            if (copy_triangle)
+            {
+                if (old_to_new_positions_map[idx] == -1)
+                {
+                    // New position, need to copy it
+                    // Validate size
+                    if (buf_positions_copied >= U16_MAX)
+                    {
+                        // Normally this shouldn't happen since the whole point is to reduce amount of vertices
+                        // but it might happen if user tries to run optimization with too large triangle or error value
+                        // so fallback to 'per face' mode or verify requested limits and copy base model as is.
+                        LL_WARNS() << "Over triangle limit. Failed to optimize in 'per object' mode, falling back to per face variant for"
+                            << " model " << target_model->mLabel
+                            << " target Indices: " << target_indices
+                            << " new Indices: " << new_indices
+                            << " original count: " << size_indices
+                            << " error treshold: " << error_threshold
+                            << LL_ENDL;
+                        return -1;
+                    }
 
-    //get ratio of uninstanced triangles to instanced triangles
-    F32 triangle_ratio = (F32)triangle_count / (F32)instanced_triangle_count;
+                    // Copy vertice, normals, tcs
+                    buffer_positions[buf_positions_copied] = combined_positions[idx];
+                    buffer_normals[buf_positions_copied] = combined_normals[idx];
+                    buffer_tex_coords[buf_positions_copied] = combined_tex_coords[idx];
 
-    U32 base_triangle_count = triangle_count;
+                    old_to_new_positions_map[idx] = buf_positions_copied;
 
-    U32 type_mask = LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL | LLVertexBuffer::MAP_TEXCOORD0;
+                    buffer_indices[buf_indices_copied] = (U16)buf_positions_copied;
+                    buf_positions_copied++;
+                }
+                else
+                {
+                    // existing position
+                    buffer_indices[buf_indices_copied] = (U16)old_to_new_positions_map[idx];
+                }
+                buf_indices_copied++;
+            }
+        }
 
-    U32 lod_mode = 0;
+        if (buf_positions_copied >= U16_MAX)
+        {
+            break;
+        }
 
-    F32 lod_error_threshold = 0;
+        LLVolumeFace &new_face = target_model->getVolumeFace(face_idx);
+        //new_face = face; //temp
 
-    // The LoD should be in range from Lowest to High
-    if (which_lod > -1 && which_lod < NUM_LOD)
-    {
-        LLCtrlSelectionInterface* iface = mFMP->childGetSelectionInterface("lod_mode_" + lod_name[which_lod]);
-        if (iface)
+        if (buf_indices_copied < 3)
         {
-            lod_mode = iface->getFirstSelectedIndex();
+            // face was optimized away
+            new_face.resizeIndices(3);
+            new_face.resizeVertices(1);
+            memset(new_face.mIndices, 0, sizeof(U16) * 3);
+            new_face.mPositions[0].clear(); // set first vertice to 0
+            new_face.mNormals[0].clear();
+            new_face.mTexCoords[0].setZero();
         }
+        else
+        {
+            new_face.resizeIndices(buf_indices_copied);
+            new_face.resizeVertices(buf_positions_copied);
 
-        lod_error_threshold = mFMP->childGetValue("lod_error_threshold_" + lod_name[which_lod]).asReal();
-    }
+            S32 idx_size = (buf_indices_copied * sizeof(U16) + 0xF) & ~0xF;
+            LLVector4a::memcpyNonAliased16((F32*)new_face.mIndices, (F32*)buffer_indices, idx_size);
 
-    if (which_lod != -1)
-    {
-        mRequestedLoDMode[which_lod] = lod_mode;
+            LLVector4a::memcpyNonAliased16((F32*)new_face.mPositions, (F32*)buffer_positions, buf_positions_copied * sizeof(LLVector4a));
+            LLVector4a::memcpyNonAliased16((F32*)new_face.mNormals, (F32*)buffer_normals, buf_positions_copied * sizeof(LLVector4a));
+
+            U32 tex_size = (buf_positions_copied * sizeof(LLVector2) + 0xF)&~0xF;
+            LLVector4a::memcpyNonAliased16((F32*)new_face.mTexCoords, (F32*)buffer_tex_coords, tex_size);
+
+            valid_faces++;
+        }
+
+        indices_idx_shift += face.mNumVertices;
     }
 
-    if (lod_mode == 0)
+    delete[]old_to_new_positions_map;
+    ll_aligned_free<64>(combined_positions);
+    ll_aligned_free<64>(buffer_positions);
+    ll_aligned_free_32(output_indices);
+    ll_aligned_free_16(buffer_indices);
+    ll_aligned_free_32(combined_indices);
+
+    if (new_indices < 3 || valid_faces == 0)
     {
-        lod_mode = GLOD_TRIANGLE_BUDGET;
+        // Model should have at least one visible triangle
 
-        // The LoD should be in range from Lowest to High
-        if (which_lod > -1 && which_lod < NUM_LOD)
+        if (!sloppy)
         {
-            limit = mFMP->childGetValue("lod_triangle_limit_" + lod_name[which_lod]).asInteger();
-            //convert from "scene wide" to "non-instanced" triangle limit
-            limit = (S32)((F32)limit*triangle_ratio);
+            // Should only happen with sloppy
+            // non sloppy shouldn't be capable of optimizing mesh away
+            LL_WARNS() << "Failed to generate triangles"
+                << " model " << target_model->mLabel
+                << " target Indices: " << target_indices
+                << " new Indices: " << new_indices
+                << " original count: " << size_indices
+                << " error treshold: " << error_threshold
+                << LL_ENDL;
         }
+
+        return -1;
+    }
+
+    return (F32)size_indices / (F32)new_indices;
+}
+
+F32 LLModelPreview::genMeshOptimizerPerFace(LLModel *base_model, LLModel *target_model, U32 face_idx, F32 indices_decimator, F32 error_threshold, bool sloppy)
+{
+    const LLVolumeFace &face = base_model->getVolumeFace(face_idx);
+    S32 size_indices = face.mNumIndices;
+    // todo: do not allocate per each face, add one large buffer somewhere
+    // faces have limited amount of indices
+    S32 size = (size_indices * sizeof(U16) + 0xF) & ~0xF;
+    U16* output = (U16*)ll_aligned_malloc_16(size);
+
+    S32 target_indices = 0;
+    F32 result_code = 0; // how far from original the model is, 1 == 100%
+    S32 new_indices = 0;
+
+    target_indices = llmax(3, llfloor(size_indices / indices_decimator)); // leave at least one triangle
+    new_indices = LLMeshOptimizer::simplify(
+        output,
+        face.mIndices,
+        size_indices,
+        face.mPositions,
+        face.mNumVertices,
+        LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_VERTEX],
+        target_indices,
+        error_threshold,
+        sloppy,
+        &result_code);
+
+
+    if (result_code < 0)
+    {
+        LL_WARNS() << "Negative result code from meshoptimizer for face " << face_idx
+            << " of model " << target_model->mLabel
+            << " target Indices: " << target_indices
+            << " new Indices: " << new_indices
+            << " original count: " << size_indices
+            << " error treshold: " << error_threshold
+            << LL_ENDL;
+    }
+
+    LLVolumeFace &new_face = target_model->getVolumeFace(face_idx);
+
+    // Copy old values
+    new_face = face;
+
+
+    if (new_indices < 3)
+    {
+        if (!sloppy)
+        {
+            // meshopt_optimizeSloppy() can optimize triangles away even if target_indices is > 2,
+            // but optimize() isn't supposed to
+            LL_INFOS() << "No indices generated by meshoptimizer for face " << face_idx
+                << " of model " << target_model->mLabel
+                << " target Indices: " << target_indices
+                << " original count: " << size_indices
+                << " error treshold: " << error_threshold
+                << LL_ENDL;
+        }
+
+        // Face got optimized away
+        // Generate empty triangle
+        new_face.resizeIndices(3);
+        new_face.resizeVertices(1);
+        memset(new_face.mIndices, 0, sizeof(U16) * 3);
+        new_face.mPositions[0].clear(); // set first vertice to 0
+        new_face.mNormals[0].clear();
+        new_face.mTexCoords[0].setZero();
     }
     else
     {
-        lod_mode = GLOD_ERROR_THRESHOLD;
-    }
+        // Assign new values
+        new_face.resizeIndices(new_indices); // will wipe out mIndices, so new_face can't substitute output
+        S32 idx_size = (new_indices * sizeof(U16) + 0xF) & ~0xF;
+        LLVector4a::memcpyNonAliased16((F32*)new_face.mIndices, (F32*)output, idx_size);
 
-    bool object_dirty = false;
+        // clear unused values
+        new_face.optimize();
+    }
 
-    if (mGroup == 0)
+    ll_aligned_free_16(output);
+     
+    if (new_indices < 3)
     {
-        object_dirty = true;
-        mGroup = cur_name++;
-        glodNewGroup(mGroup);
+        // At least one triangle is needed
+        return -1;
     }
 
-    if (object_dirty)
+    return (F32)size_indices / (F32)new_indices;
+}
+
+void LLModelPreview::genMeshOptimizerLODs(S32 which_lod, S32 meshopt_mode, U32 decimation, bool enforce_tri_limit)
+{
+    LL_INFOS() << "Generating lod " << which_lod << " using meshoptimizer" << LL_ENDL;
+    // Allow LoD from -1 to LLModel::LOD_PHYSICS
+    if (which_lod < -1 || which_lod > LLModel::NUM_LODS - 1)
     {
-        for (LLModelLoader::model_list::iterator iter = mBaseModel.begin(); iter != mBaseModel.end(); ++iter)
-        { //build GLOD objects for each model in base model list
-            LLModel* mdl = *iter;
+        std::ostringstream out;
+        out << "Invalid level of detail: " << which_lod;
+        LL_WARNS() << out.str() << LL_ENDL;
+        LLFloaterModelPreview::addStringToLog(out, false);
+        assert(lod >= -1 && lod < LLModel::NUM_LODS);
+        return;
+    }
 
-            if (mObject[mdl] != 0)
-            {
-                glodDeleteObject(mObject[mdl]);
-            }
+    if (mBaseModel.empty())
+    {
+        return;
+    }
 
-            mObject[mdl] = cur_name++;
+    //get the triangle count for all base models
+    S32 base_triangle_count = 0;
+    for (S32 i = 0; i < mBaseModel.size(); ++i)
+    {
+        base_triangle_count += mBaseModel[i]->getNumTriangles();
+    }
 
-            glodNewObject(mObject[mdl], mGroup, GLOD_DISCRETE);
-            stop_gloderror();
+    // Urgh...
+    // TODO: add interface to mFMP to get error treshold or let mFMP write one into LLModelPreview
+    // We should not be accesing views from other class!
+    U32 lod_mode = LIMIT_TRIANGLES;
+    F32 indices_decimator = 0;
+    F32 triangle_limit = 0;
+    F32 lod_error_threshold = 1; //100%
 
-            if (iter == mBaseModel.begin() && !mdl->mSkinWeights.empty())
-            { //regenerate vertex buffer for skinned models to prevent animation feedback during LOD generation
-                mVertexBuffer[5].clear();
-            }
+    // If requesting a single lod
+    if (which_lod > -1 && which_lod < NUM_LOD)
+    {
+        LLCtrlSelectionInterface* iface = mFMP->childGetSelectionInterface("lod_mode_" + lod_name[which_lod]);
+        if (iface)
+        {
+            lod_mode = iface->getFirstSelectedIndex();
+        }
 
-            if (mVertexBuffer[5].empty())
+        if (lod_mode == LIMIT_TRIANGLES)
+        {
+            if (!enforce_tri_limit)
             {
-                genBuffers(5, false);
-            }
+                triangle_limit = base_triangle_count;
+                // reset to default value for this lod
+                F32 pw = pow((F32)decimation, (F32)(LLModel::LOD_HIGH - which_lod));
 
-            U32 tri_count = 0;
-            for (U32 i = 0; i < mVertexBuffer[5][mdl].size(); ++i)
+                triangle_limit /= pw; //indices_ratio can be 1/pw
+            }
+            else
             {
-                LLVertexBuffer* buff = mVertexBuffer[5][mdl][i];
-                buff->setBuffer(type_mask & buff->getTypeMask());
 
-                U32 num_indices = mVertexBuffer[5][mdl][i]->getNumIndices();
-                if (num_indices > 2)
-                {
-                    glodInsertElements(mObject[mdl], i, GL_TRIANGLES, num_indices, GL_UNSIGNED_SHORT, (U8*)mVertexBuffer[5][mdl][i]->getIndicesPointer(), 0, 0.f);
-                }
-                tri_count += num_indices / 3;
-                stop_gloderror();
-            }
+                // UI spacifies limit for all models of single lod
+                triangle_limit = mFMP->childGetValue("lod_triangle_limit_" + lod_name[which_lod]).asInteger();
 
-            glodBuildObject(mObject[mdl]);
-            stop_gloderror();
+            }
+            // meshoptimizer doesn't use triangle limit, it uses indices limit, so convert it to aproximate ratio
+            indices_decimator = (F32)base_triangle_count / triangle_limit;
         }
+        else
+        {
+            lod_error_threshold = mFMP->childGetValue("lod_error_threshold_" + lod_name[which_lod]).asReal();
+        }
+    }
+    else
+    {
+        // we are genrating all lods and each lod will get own indices_decimator
+        indices_decimator = 1;
+        triangle_limit = base_triangle_count;
     }
 
+    mMaxTriangleLimit = base_triangle_count;
+    mMinTriangleLimit = mBaseModel.size();
+
+    // Build models
 
     S32 start = LLModel::LOD_HIGH;
     S32 end = 0;
 
     if (which_lod != -1)
     {
-        start = end = which_lod;
+        start = which_lod;
+        end = which_lod;
     }
 
-    mMaxTriangleLimit = base_triangle_count;
-
     for (S32 lod = start; lod >= end; --lod)
     {
         if (which_lod == -1)
         {
+            // we are genrating all lods and each lod gets own indices_ratio
             if (lod < start)
             {
-                triangle_count /= decimation;
-            }
-        }
-        else
-        {
-            if (enforce_tri_limit)
-            {
-                triangle_count = limit;
-            }
-            else
-            {
-                for (S32 j = LLModel::LOD_HIGH; j>which_lod; --j)
-                {
-                    triangle_count /= decimation;
-                }
+                indices_decimator *= decimation;
+                triangle_limit /= decimation;
             }
         }
 
+        mRequestedTriangleCount[lod] = llmax(mMinTriangleLimit, (S32)triangle_limit);
+        mRequestedErrorThreshold[lod] = lod_error_threshold;
+        mRequestedLoDMode[lod] = lod_mode;
+
         mModel[lod].clear();
         mModel[lod].resize(mBaseModel.size());
         mVertexBuffer[lod].clear();
 
-        U32 actual_tris = 0;
-        U32 actual_verts = 0;
-        U32 submeshes = 0;
-
-        mRequestedTriangleCount[lod] = (S32)((F32)triangle_count / triangle_ratio);
-        mRequestedErrorThreshold[lod] = lod_error_threshold;
-
-        glodGroupParameteri(mGroup, GLOD_ADAPT_MODE, lod_mode);
-        stop_gloderror();
-
-        glodGroupParameteri(mGroup, GLOD_ERROR_MODE, GLOD_OBJECT_SPACE_ERROR);
-        stop_gloderror();
-
-        glodGroupParameterf(mGroup, GLOD_OBJECT_SPACE_ERROR_THRESHOLD, lod_error_threshold);
-        stop_gloderror();
-
-        if (lod_mode != GLOD_TRIANGLE_BUDGET)
-        {
-            glodGroupParameteri(mGroup, GLOD_MAX_TRIANGLES, 0);
-        }
-        else
-        {
-            //SH-632: always add 1 to desired amount to avoid decimating below desired amount
-            glodGroupParameteri(mGroup, GLOD_MAX_TRIANGLES, triangle_count + 1);
-        }
-
-        stop_gloderror();
-        glodAdaptGroup(mGroup);
-        stop_gloderror();
 
         for (U32 mdl_idx = 0; mdl_idx < mBaseModel.size(); ++mdl_idx)
         {
             LLModel* base = mBaseModel[mdl_idx];
 
-            GLint patch_count = 0;
-            glodGetObjectParameteriv(mObject[base], GLOD_NUM_PATCHES, &patch_count);
-            stop_gloderror();
-
             LLVolumeParams volume_params;
             volume_params.setType(LL_PCODE_PROFILE_SQUARE, LL_PCODE_PATH_LINE);
             mModel[lod][mdl_idx] = new LLModel(volume_params, 0.f);
@@ -1581,74 +1727,102 @@ void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_lim
 
             mModel[lod][mdl_idx]->mLabel = name;
             mModel[lod][mdl_idx]->mSubmodelID = base->mSubmodelID;
-
-            GLint* sizes = new GLint[patch_count * 2];
-            glodGetObjectParameteriv(mObject[base], GLOD_PATCH_SIZES, sizes);
-            stop_gloderror();
-
-            GLint* names = new GLint[patch_count];
-            glodGetObjectParameteriv(mObject[base], GLOD_PATCH_NAMES, names);
-            stop_gloderror();
-
-            mModel[lod][mdl_idx]->setNumVolumeFaces(patch_count);
+            mModel[lod][mdl_idx]->setNumVolumeFaces(base->getNumVolumeFaces());
 
             LLModel* target_model = mModel[lod][mdl_idx];
 
-            for (GLint i = 0; i < patch_count; ++i)
-            {
-                type_mask = mVertexBuffer[5][base][i]->getTypeMask();
+            S32 model_meshopt_mode = meshopt_mode;
 
-                LLPointer<LLVertexBuffer> buff = new LLVertexBuffer(type_mask, 0);
+            // Ideally this should run not per model,
+            // but combine all submodels with origin model as well
+            if (model_meshopt_mode == MESH_OPTIMIZER_COMBINE)
+            {
+                // Run meshoptimizer for each model/object, up to 8 faces in one model.
 
-                if (sizes[i * 2 + 1] > 0 && sizes[i * 2] > 0)
+                // Ideally this should run not per model,
+                // but combine all submodels with origin model as well
+                F32 res = genMeshOptimizerPerModel(base, target_model, indices_decimator, lod_error_threshold, false);
+                if (res < 0)
                 {
-                    if (!buff->allocateBuffer(sizes[i * 2 + 1], sizes[i * 2], true))
+                    // U16 vertices overflow, shouldn't happen, but just in case
+                    for (U32 face_idx = 0; face_idx < base->getNumVolumeFaces(); ++face_idx)
                     {
-                        // Todo: find a way to stop preview in this case instead of crashing
-                        LL_ERRS() << "Failed buffer allocation during preview LOD generation."
-                            << " Vertices: " << sizes[i * 2 + 1]
-                            << " Indices: " << sizes[i * 2] << LL_ENDL;
+                        genMeshOptimizerPerFace(base, target_model, face_idx, indices_decimator, lod_error_threshold, false);
                     }
-                    buff->setBuffer(type_mask);
-                    glodFillElements(mObject[base], names[i], GL_UNSIGNED_SHORT, (U8*)buff->getIndicesPointer());
-                    stop_gloderror();
                 }
-                else
+            }
+
+            if (model_meshopt_mode == MESH_OPTIMIZER_SLOPPY)
+            {
+                // Run meshoptimizer for each face
+                for (U32 face_idx = 0; face_idx < base->getNumVolumeFaces(); ++face_idx)
                 {
-                    // This face was eliminated or we failed to allocate buffer,
-                    // attempt to create a dummy triangle (one vertex, 3 indices, all 0)
-                    buff->allocateBuffer(1, 3, true);
-                    memset((U8*)buff->getMappedData(), 0, buff->getSize());
-                    memset((U8*)buff->getIndicesPointer(), 0, buff->getIndicesSize());
+                    genMeshOptimizerPerFace(base, target_model, face_idx, indices_decimator, lod_error_threshold, true);
                 }
 
-                buff->validateRange(0, buff->getNumVerts() - 1, buff->getNumIndices(), 0);
+                LL_INFOS() << "Model " << target_model->getName()
+                    << " lod " << which_lod
+                    << " simplified using per face method." << LL_ENDL;
+            }
 
-                LLStrider<LLVector3> pos;
-                LLStrider<LLVector3> norm;
-                LLStrider<LLVector2> tc;
-                LLStrider<U16> index;
+            if (model_meshopt_mode == MESH_OPTIMIZER_AUTO)
+            {
+                // Switches between 'combine' method and 'per model sloppy' based on combine's result.
+                F32 allowed_ratio_drift = 2.f;
+                F32 res_ratio = genMeshOptimizerPerModel(base, target_model, indices_decimator, lod_error_threshold, false);
 
-                buff->getVertexStrider(pos);
-                if (type_mask & LLVertexBuffer::MAP_NORMAL)
+                if (res_ratio < 0)
                 {
-                    buff->getNormalStrider(norm);
+                    // U16 vertices overflow, shouldn't happen, but just in case
+                    for (U32 face_idx = 0; face_idx < base->getNumVolumeFaces(); ++face_idx)
+                    {
+                        genMeshOptimizerPerFace(base, target_model, face_idx, indices_decimator, lod_error_threshold, false);
+                    }
                 }
-                if (type_mask & LLVertexBuffer::MAP_TEXCOORD0)
+                else if (res_ratio * allowed_ratio_drift < indices_decimator)
                 {
-                    buff->getTexCoord0Strider(tc);
-                }
+                    // Try sloppy variant if normal one failed to simplify model enough.
+                    res_ratio = genMeshOptimizerPerModel(base, target_model, indices_decimator, lod_error_threshold, true);
+
+                    // Sloppy has a tendecy to error into lower side, so a request for 100
+                    // triangles turns into ~70, so check for significant difference from target decimation
+                    F32 sloppy_ratio_drift = 1.4f;
+                    if (lod_mode == LIMIT_TRIANGLES
+                        && (res_ratio > indices_decimator * sloppy_ratio_drift || res_ratio < 0))
+                    {
+                        // Apply a correction to compensate.
 
-                buff->getIndexStrider(index);
+                        // (indices_decimator / res_ratio) by itself is likely to overshoot to a differend
+                        // side due to overal lack of precision, and we don't need an ideal result, which
+                        // likely does not exist, just a better one, so a partial correction is enough.
+                        F32 sloppy_decimator = indices_decimator * (indices_decimator / res_ratio + 1) / 2;
+                        res_ratio = genMeshOptimizerPerModel(base, target_model, sloppy_decimator, lod_error_threshold, true);
+                    }
 
-                target_model->setVolumeFaceData(names[i], pos, norm, tc, index, buff->getNumVerts(), buff->getNumIndices());
-                actual_tris += buff->getNumIndices() / 3;
-                actual_verts += buff->getNumVerts();
-                ++submeshes;
 
-                if (!validate_face(target_model->getVolumeFace(names[i])))
+                    if (res_ratio < 0)
+                    {
+                        // Sloppy variant failed to generate triangles.
+                        // Can happen with models that are too simple as is.
+                        // Fallback to normal method or use lower decimator.
+                        genMeshOptimizerPerModel(base, target_model, indices_decimator, lod_error_threshold, false);
+
+                        LL_INFOS() << "Model " << target_model->getName()
+                            << " lod " << which_lod
+                            << " simplified using per model method." << LL_ENDL;
+                    }
+                    else
+                    {
+                        LL_INFOS() << "Model " << target_model->getName()
+                            << " lod " << which_lod
+                            << " sloppily simplified using per model method." << LL_ENDL;
+                    }
+                }
+                else
                 {
-                    LL_ERRS() << "Invalid face generated during LOD generation." << LL_ENDL;
+                    LL_INFOS() << "Model " << target_model->getName()
+                        << " lod " << which_lod
+                        << " simplified using per model method." << LL_ENDL;
                 }
             }
 
@@ -1658,6 +1832,7 @@ void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_lim
             target_model->mPosition = base->mPosition;
             target_model->mSkinWeights = base->mSkinWeights;
             target_model->mSkinInfo = base->mSkinInfo;
+
             //copy material list
             target_model->mMaterialList = base->mMaterialList;
 
@@ -1665,9 +1840,6 @@ void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_lim
             {
                 LL_ERRS() << "Invalid model generated when creating LODs" << LL_ENDL;
             }
-
-            delete[] sizes;
-            delete[] names;
         }
 
         //rebuild scene based on mBaseScene
@@ -1695,13 +1867,6 @@ void LLModelPreview::genLODs(S32 which_lod, U32 decimation, bool enforce_tri_lim
     }
 
     mResourceCost = calcResourceCost();
-
-    LLVertexBuffer::unbind();
-    LLGLSLShader::sNoFixedFunction = no_ff;
-    if (shader)
-    {
-        shader->bind();
-    }
 }
 
 void LLModelPreview::updateStatusMessages()
@@ -1821,6 +1986,7 @@ void LLModelPreview::updateStatusMessages()
     if (mMaxTriangleLimit == 0)
     {
         mMaxTriangleLimit = total_tris[LLModel::LOD_HIGH];
+        mMinTriangleLimit = mUploadData.size();
     }
 
     mHasDegenerate = false;
@@ -2264,7 +2430,7 @@ void LLModelPreview::updateLodControls(S32 lod)
     S32 lod_mode = lod_combo->getCurrentIndex();
     if (lod_mode == LOD_FROM_FILE) // LoD from file
     {
-        fmp->mLODMode[lod] = 0;
+        fmp->mLODMode[lod] = LOD_FROM_FILE;
         for (U32 i = 0; i < num_file_controls; ++i)
         {
             mFMP->childSetVisible(file_controls[i] + lod_name[lod], true);
@@ -2277,7 +2443,7 @@ void LLModelPreview::updateLodControls(S32 lod)
     }
     else if (lod_mode == USE_LOD_ABOVE) // use LoD above
     {
-        fmp->mLODMode[lod] = 2;
+        fmp->mLODMode[lod] = USE_LOD_ABOVE;
         for (U32 i = 0; i < num_file_controls; ++i)
         {
             mFMP->childSetVisible(file_controls[i] + lod_name[lod], false);
@@ -2303,7 +2469,7 @@ void LLModelPreview::updateLodControls(S32 lod)
     }
     else // auto generate, the default case for all LoDs except High
     {
-        fmp->mLODMode[lod] = 1;
+        fmp->mLODMode[lod] = MESH_OPTIMIZER_AUTO;
 
         //don't actually regenerate lod when refreshing UI
         mLODFrozen = true;
@@ -2323,6 +2489,7 @@ void LLModelPreview::updateLodControls(S32 lod)
         LLSpinCtrl* limit = mFMP->getChild<LLSpinCtrl>("lod_triangle_limit_" + lod_name[lod]);
 
         limit->setMaxValue(mMaxTriangleLimit);
+        limit->setMinValue(mMinTriangleLimit);
         limit->forceSetValue(mRequestedTriangleCount[lod]);
 
         threshold->forceSetValue(mRequestedErrorThreshold[lod]);
@@ -2335,7 +2502,8 @@ void LLModelPreview::updateLodControls(S32 lod)
             threshold->setVisible(false);
 
             limit->setMaxValue(mMaxTriangleLimit);
-            limit->setIncrement(mMaxTriangleLimit / 32);
+            limit->setMinValue(mMinTriangleLimit);
+            limit->setIncrement(llmax((U32)1, mMaxTriangleLimit / 32));
         }
         else
         {
@@ -2499,6 +2667,8 @@ void LLModelPreview::genBuffers(S32 lod, bool include_skin_weights)
                 *(index_strider++) = vf.mIndices[i];
             }
 
+            vb->flush();
+
             mVertexBuffer[lod][mdl].push_back(vb);
 
             vertex_count += num_vertices;
@@ -2724,8 +2894,6 @@ BOOL LLModelPreview::render()
     LLMutexLock lock(this);
     mNeedsUpdate = FALSE;
 
-    bool use_shaders = LLGLSLShader::sNoFixedFunction;
-
     bool edges = mViewOption["show_edges"];
     bool joint_overrides = mViewOption["show_joint_overrides"];
     bool joint_positions = mViewOption["show_joint_positions"];
@@ -2743,10 +2911,8 @@ BOOL LLModelPreview::render()
     LLGLDisable fog(GL_FOG);
 
     {
-        if (use_shaders)
-        {
-            gUIProgram.bind();
-        }
+        gUIProgram.bind();
+
         //clear background to grey
         gGL.matrixMode(LLRender::MM_PROJECTION);
         gGL.pushMatrix();
@@ -2765,10 +2931,7 @@ BOOL LLModelPreview::render()
 
         gGL.matrixMode(LLRender::MM_MODELVIEW);
         gGL.popMatrix();
-        if (use_shaders)
-        {
-            gUIProgram.unbind();
-        }
+        gUIProgram.unbind();
     }
 
     LLFloaterModelPreview* fmp = LLFloaterModelPreview::sInstance;
@@ -2921,10 +3084,7 @@ BOOL LLModelPreview::render()
         refresh();
     }
 
-    if (use_shaders)
-    {
-        gObjectPreviewProgram.bind();
-    }
+    gObjectPreviewProgram.bind();
 
     gGL.loadIdentity();
     gPipeline.enableLightsPreview();
@@ -2957,7 +3117,6 @@ BOOL LLModelPreview::render()
     {
         genBuffers(-1, skin_weight);
         //genBuffers(3);
-        //genLODs();
     }
 
     if (!mModel[mPreviewLOD].empty())
@@ -2985,6 +3144,11 @@ BOOL LLModelPreview::render()
             genBuffers(mPreviewLOD, skin_weight);
         }
 
+        if (physics && mVertexBuffer[LLModel::LOD_PHYSICS].empty())
+        {
+            genBuffers(LLModel::LOD_PHYSICS, false);
+        }
+
         if (!skin_weight)
         {
             for (LLMeshUploadThread::instance_list::iterator iter = mUploadData.begin(); iter != mUploadData.end(); ++iter)
@@ -3046,6 +3210,7 @@ BOOL LLModelPreview::render()
                         glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
                         glLineWidth(1.f);
                     }
+                    buffer->flush();
                 }
                 gGL.popMatrix();
             }
@@ -3126,7 +3291,7 @@ BOOL LLModelPreview::render()
                                         }
 
                                         gGL.diffuseColor4ubv(hull_colors[i].mV);
-                                        LLVertexBuffer::drawArrays(LLRender::TRIANGLES, physics.mMesh[i].mPositions, physics.mMesh[i].mNormals);
+                                        LLVertexBuffer::drawArrays(LLRender::TRIANGLES, physics.mMesh[i].mPositions);
 
                                         if (explode > 0.f)
                                         {
@@ -3139,11 +3304,6 @@ BOOL LLModelPreview::render()
 
                         if (render_mesh)
                         {
-                            if (mVertexBuffer[LLModel::LOD_PHYSICS].empty())
-                            {
-                                genBuffers(LLModel::LOD_PHYSICS, false);
-                            }
-
                             U32 num_models = mVertexBuffer[LLModel::LOD_PHYSICS][model].size();
                             if (pass > 0){
                                 for (U32 i = 0; i < num_models; ++i)
@@ -3163,6 +3323,8 @@ BOOL LLModelPreview::render()
 
                                     glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
                                     glLineWidth(1.f);
+
+                                    buffer->flush();
                                 }
                             }
                         }
@@ -3207,11 +3369,6 @@ BOOL LLModelPreview::render()
 
                                 if (physics.mHull.empty())
                                 {
-                                    if (mVertexBuffer[LLModel::LOD_PHYSICS].empty())
-                                    {
-                                        genBuffers(LLModel::LOD_PHYSICS, false);
-                                    }
-
                                     U32 num_models = mVertexBuffer[LLModel::LOD_PHYSICS][model].size();
                                     for (U32 v = 0; v < num_models; ++v)
                                     {
@@ -3238,6 +3395,8 @@ BOOL LLModelPreview::render()
                                                 buffer->draw(LLRender::POINTS, 3, i);
                                             }
                                         }
+
+                                        buffer->flush();
                                     }
                                 }
                             }
@@ -3298,7 +3457,7 @@ BOOL LLModelPreview::render()
                                 LLJoint *joint = getPreviewAvatar()->getJoint(skin->mJointNums[j]);
                                 if (joint)
                                 {
-                                    const LLVector3& jointPos = skin->mAlternateBindMatrix[j].getTranslation();
+                                    const LLVector3& jointPos = LLVector3(skin->mAlternateBindMatrix[j].getTranslation());
                                     if (joint->aboveJointPosThreshold(jointPos))
                                     {
                                         bool override_changed;
@@ -3340,11 +3499,10 @@ BOOL LLModelPreview::render()
                             //build matrix palette
 
                             LLMatrix4a mat[LL_MAX_JOINTS_PER_MESH_OBJECT];
-                            LLSkinningUtil::initSkinningMatrixPalette((LLMatrix4*)mat, joint_count,
+                            LLSkinningUtil::initSkinningMatrixPalette(mat, joint_count,
                                 skin, getPreviewAvatar());
 
-                            LLMatrix4a bind_shape_matrix;
-                            bind_shape_matrix.loadu(skin->mBindShapeMatrix);
+                            const LLMatrix4a& bind_shape_matrix = skin->mBindShapeMatrix;
                             U32 max_joints = LLSkinningUtil::getMaxJointCount();
                             for (U32 j = 0; j < buffer->getNumVerts(); ++j)
                             {
@@ -3427,10 +3585,7 @@ BOOL LLModelPreview::render()
         }
     }
 
-    if (use_shaders)
-    {
-        gObjectPreviewProgram.unbind();
-    }
+    gObjectPreviewProgram.unbind();
 
     gGL.popMatrix();
 
@@ -3544,7 +3699,7 @@ bool LLModelPreview::lodQueryCallback()
         {
             S32 lod = preview->mLodsQuery.back();
             preview->mLodsQuery.pop_back();
-            preview->genLODs(lod);
+            preview->genMeshOptimizerLODs(lod, MESH_OPTIMIZER_AUTO);
 
             if (preview->mLookUpLodFiles && (lod == LLModel::LOD_HIGH))
             {
@@ -3559,11 +3714,11 @@ bool LLModelPreview::lodQueryCallback()
     return true;
 }
 
-void LLModelPreview::onLODParamCommit(S32 lod, bool enforce_tri_limit)
+void LLModelPreview::onLODMeshOptimizerParamCommit(S32 requested_lod, bool enforce_tri_limit, S32 mode)
 {
     if (!mLODFrozen)
     {
-        genLODs(lod, 3, enforce_tri_limit);
+        genMeshOptimizerLODs(requested_lod, mode, 3, enforce_tri_limit);
         refresh();
     }
 }