From b08340f1831005ae227577899a64408cc939a12d Mon Sep 17 00:00:00 2001
From: Andrey Kleshchev <andreykproductengine@lindenlab.com>
Date: Fri, 10 Jun 2022 19:43:14 +0300
Subject: SL-17475 Remap models before simplification

---
 indra/llmath/CMakeLists.txt               |   3 +
 indra/llmath/llvolume.cpp                 |  42 ++++++++
 indra/llmath/llvolume.h                   |   4 +
 indra/llmeshoptimizer/llmeshoptimizer.cpp | 170 ++++++++++++++++++++++++++++--
 indra/llmeshoptimizer/llmeshoptimizer.h   |  70 +++++++++++-
 indra/llprimitive/lldaeloader.cpp         |   2 +-
 indra/llprimitive/llmodel.cpp             |   8 ++
 indra/llprimitive/llmodel.h               |   1 +
 indra/newview/llfloatermodelpreview.cpp   |   4 +-
 indra/newview/llmodelpreview.cpp          |  81 +++++++++-----
 indra/newview/llmodelpreview.h            |   2 +-
 11 files changed, 343 insertions(+), 44 deletions(-)

diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt
index 552e820127..4617309606 100644
--- a/indra/llmath/CMakeLists.txt
+++ b/indra/llmath/CMakeLists.txt
@@ -4,12 +4,14 @@ project(llmath)
 
 include(00-Common)
 include(LLCommon)
+include(LLMeshOptimizer)
 include(bugsplat)
 include(Boost)
 
 include_directories(
     ${LLCOMMON_INCLUDE_DIRS}
     ${LLCOMMON_SYSTEM_INCLUDE_DIRS}
+    ${LLMESHOPTIMIZER_INCLUDE_DIRS}
     )
 
 set(llmath_SOURCE_FILES
@@ -109,6 +111,7 @@ add_library (llmath ${llmath_SOURCE_FILES})
 
 target_link_libraries(llmath
     ${LLCOMMON_LIBRARIES}
+    ${LLMESHOPTIMIZER_LIBRARIES}
     )
 
 # Add tests
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 5099920f32..23f372f6e3 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -49,6 +49,7 @@
 #include "llsdserialize.h"
 #include "llvector4a.h"
 #include "llmatrix4a.h"
+#include "llmeshoptimizer.h"
 #include "lltimer.h"
 
 #define DEBUG_SILHOUETTE_BINORMALS 0
@@ -4952,6 +4953,47 @@ bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector3& a
 	return a.mV[2] < b.mV[2];
 }
 
+void LLVolumeFace::remap()
+{
+    // generate a remap buffer
+    std::vector<unsigned int> remap(mNumIndices);
+    S32 remap_vertices_count = LLMeshOptimizer::generateRemapMulti(&remap[0],
+        NULL,
+        mNumIndices,
+        mPositions,
+        mNormals,
+        mTexCoords,
+        mNumVertices);
+
+    // Allocate new buffers
+    U16* remap_indices = (U16*)ll_aligned_malloc_16(mNumIndices * sizeof(U16));
+
+    S32 tc_bytes_size = ((remap_vertices_count * sizeof(LLVector2)) + 0xF) & ~0xF;
+    LLVector4a* remap_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * remap_vertices_count + tc_bytes_size);
+    LLVector4a* remap_normals = remap_positions + remap_vertices_count;
+    LLVector2* remap_tex_coords = (LLVector2*)(remap_normals + remap_vertices_count);
+
+    // fill the buffers
+    LLMeshOptimizer::remapIndexBufferU16(remap_indices, mIndices, mNumIndices, &remap[0]);
+    LLMeshOptimizer::remapPositionsBuffer(remap_positions, mPositions, mNumVertices, &remap[0]);
+    LLMeshOptimizer::remapNormalsBuffer(remap_normals, mNormals, mNumVertices, &remap[0]);
+    LLMeshOptimizer::remapUVBuffer(remap_tex_coords, mTexCoords, mNumVertices, &remap[0]);
+
+    // free unused buffers
+    ll_aligned_free_16(mIndices);
+    ll_aligned_free<64>(mPositions);
+    ll_aligned_free_16(mTangents);
+
+    mTangents = NULL;
+
+    mIndices = remap_indices;
+    mPositions = remap_positions;
+    mNormals = remap_normals;
+    mTexCoords = remap_tex_coords;
+    mNumVertices = remap_vertices_count;
+    mNumAllocatedVertices = remap_vertices_count;
+}
+
 void LLVolumeFace::optimize(F32 angle_cutoff)
 {
 	LLVolumeFace new_face;
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index c0b224b1ff..9697952f5b 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -902,6 +902,10 @@ public:
 		typedef std::map<LLVector3, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap;
 	};
 
+    // Eliminates non unique triangles, takes positions,
+    // normals and texture coordinates into account.
+    void remap();
+
 	void optimize(F32 angle_cutoff = 2.f);
 	bool cacheOptimize();
 
diff --git a/indra/llmeshoptimizer/llmeshoptimizer.cpp b/indra/llmeshoptimizer/llmeshoptimizer.cpp
index a879389c5a..8570887ddd 100644
--- a/indra/llmeshoptimizer/llmeshoptimizer.cpp
+++ b/indra/llmeshoptimizer/llmeshoptimizer.cpp
@@ -28,6 +28,9 @@
 
 #include "meshoptimizer.h"
 
+#include "llmath.h"
+#include "v2math.h"
+
 LLMeshOptimizer::LLMeshOptimizer()
 {
     // Todo: Looks like for memory management, we can add allocator and deallocator callbacks
@@ -40,24 +43,173 @@ LLMeshOptimizer::~LLMeshOptimizer()
 }
 
 //static
-void LLMeshOptimizer::generateShadowIndexBuffer(U16 *destination,
-    const U16 *indices,
+void LLMeshOptimizer::generateShadowIndexBufferU32(U32 *destination,
+    const U32 *indices,
     U64 index_count,
-    const LLVector4a *vertex_positions,
-    U64 vertex_count,
-    U64 vertex_positions_stride
+    const LLVector4a * vertex_positions,
+    const LLVector4a * normals,
+    const LLVector2 * text_coords,
+    U64 vertex_count
 )
 {
-    meshopt_generateShadowIndexBuffer<unsigned short>(destination,
+    meshopt_Stream streams[3];
+
+    S32 index = 0;
+    if (vertex_positions)
+    {
+        streams[index].data = (const float*)vertex_positions;
+        // Despite being LLVector4a, only x, y and z are in use
+        streams[index].size = sizeof(F32) * 3;
+        streams[index].stride = sizeof(F32) * 4;
+        index++;
+    }
+    if (normals)
+    {
+        streams[index].data = (const float*)normals;
+        streams[index].size = sizeof(F32) * 3;
+        streams[index].stride = sizeof(F32) * 4;
+        index++;
+    }
+    if (text_coords)
+    {
+        streams[index].data = (const float*)text_coords;
+        streams[index].size = sizeof(F32) * 2;
+        streams[index].stride = sizeof(F32) * 2;
+        index++;
+    }
+
+    if (index == 0)
+    {
+        // invalid
+        return;
+    }
+
+    meshopt_generateShadowIndexBufferMulti<unsigned int>(destination,
         indices,
         index_count,
-        (const float*)vertex_positions, // verify that it is correct to convert to float
         vertex_count,
-        sizeof(LLVector4a),
-        vertex_positions_stride
+        streams,
+        index
         );
 }
 
+//static
+void LLMeshOptimizer::generateShadowIndexBufferU16(U16 *destination,
+    const U16 *indices,
+    U64 index_count,
+    const LLVector4a * vertex_positions,
+    const LLVector4a * normals,
+    const LLVector2 * text_coords,
+    U64 vertex_count
+)
+{
+    meshopt_Stream streams[3];
+
+    S32 index = 0;
+    if (vertex_positions)
+    {
+        streams[index].data = (const float*)vertex_positions;
+        streams[index].size = sizeof(F32) * 3;
+        streams[index].stride = sizeof(F32) * 4;
+        index++;
+    }
+    if (normals)
+    {
+        streams[index].data = (const float*)normals;
+        streams[index].size = sizeof(F32) * 3;
+        streams[index].stride = sizeof(F32) * 4;
+        index++;
+    }
+    if (text_coords)
+    {
+        streams[index].data = (const float*)text_coords;
+        streams[index].size = sizeof(F32) * 2;
+        streams[index].stride = sizeof(F32) * 2;
+        index++;
+    }
+
+    if (index == 0)
+    {
+        // invalid
+        return;
+    }
+
+    meshopt_generateShadowIndexBufferMulti<unsigned short>(destination,
+        indices,
+        index_count,
+        vertex_count,
+        streams,
+        index);
+}
+
+void LLMeshOptimizer::optimizeVertexCacheU32(U32 * destination, const U32 * indices, U64 index_count, U64 vertex_count)
+{
+    meshopt_optimizeVertexCache<unsigned int>(destination, indices, index_count, vertex_count);
+}
+
+void LLMeshOptimizer::optimizeVertexCacheU16(U16 * destination, const U16 * indices, U64 index_count, U64 vertex_count)
+{
+    meshopt_optimizeVertexCache<unsigned short>(destination, indices, index_count, vertex_count);
+}
+
+size_t LLMeshOptimizer::generateRemapMulti(
+    unsigned int* remap,
+    const U32 * indices,
+    U64 index_count,
+    const LLVector4a * vertex_positions,
+    const LLVector4a * normals,
+    const LLVector2 * text_coords,
+    U64 vertex_count)
+{
+    meshopt_Stream streams[] = {
+       {(const float*)vertex_positions, sizeof(F32) * 3, sizeof(F32) * 4},
+       {(const float*)normals, sizeof(F32) * 3, sizeof(F32) * 4},
+       {(const float*)text_coords, sizeof(F32) * 2, sizeof(F32) * 2},
+    };
+
+    return meshopt_generateVertexRemapMulti(&remap[0], indices, index_count, vertex_count, streams, sizeof(streams) / sizeof(streams[0]));
+}
+
+void LLMeshOptimizer::remapIndexBufferU32(U32 * destination_indices,
+    const U32 * indices,
+    U64 index_count,
+    const unsigned int* remap)
+{
+    meshopt_remapIndexBuffer<unsigned int>(destination_indices, indices, index_count, remap);
+}
+
+void LLMeshOptimizer::remapIndexBufferU16(U16 * destination_indices,
+    const U16 * indices,
+    U64 index_count,
+    const unsigned int* remap)
+{
+    meshopt_remapIndexBuffer<unsigned short>(destination_indices, indices, index_count, remap);
+}
+
+void LLMeshOptimizer::remapPositionsBuffer(LLVector4a * destination_vertices,
+    const LLVector4a * vertex_positions,
+    U64 vertex_count,
+    const unsigned int* remap)
+{
+    meshopt_remapVertexBuffer((float*)destination_vertices, (const float*)vertex_positions, vertex_count, sizeof(LLVector4a), remap);
+}
+
+void LLMeshOptimizer::remapNormalsBuffer(LLVector4a * destination_normalss,
+    const LLVector4a * normals,
+    U64 mormals_count,
+    const unsigned int* remap)
+{
+    meshopt_remapVertexBuffer((float*)destination_normalss, (const float*)normals, mormals_count, sizeof(LLVector4a), remap);
+}
+
+void LLMeshOptimizer::remapUVBuffer(LLVector2 * destination_uvs,
+    const LLVector2 * uv_positions,
+    U64 uv_count,
+    const unsigned int* remap)
+{
+    meshopt_remapVertexBuffer((float*)destination_uvs, (const float*)uv_positions, uv_count, sizeof(LLVector2), remap);
+}
+
 //static
 U64 LLMeshOptimizer::simplifyU32(U32 *destination,
     const U32 *indices,
diff --git a/indra/llmeshoptimizer/llmeshoptimizer.h b/indra/llmeshoptimizer/llmeshoptimizer.h
index e8dd16dae9..c76f8a5a89 100644
--- a/indra/llmeshoptimizer/llmeshoptimizer.h
+++ b/indra/llmeshoptimizer/llmeshoptimizer.h
@@ -28,7 +28,8 @@
 
 #include "linden_common.h"
 
-#include "llmath.h"
+class LLVector4a;
+class LLVector2;
 
 class LLMeshOptimizer
 {
@@ -36,13 +37,74 @@ public:
     LLMeshOptimizer();
     ~LLMeshOptimizer();
 
-    static void generateShadowIndexBuffer(
+    static void generateShadowIndexBufferU32(
+        U32 *destination,
+        const U32 *indices,
+        U64 index_count,
+        const LLVector4a * vertex_positions,
+        const LLVector4a * normals,
+        const LLVector2 * text_coords,
+        U64 vertex_count);
+
+    static void generateShadowIndexBufferU16(
         U16 *destination,
         const U16 *indices,
         U64 index_count,
-        const LLVector4a *vertex_positions,
+        const LLVector4a * vertex_positions,
+        const LLVector4a * normals,
+        const LLVector2 * text_coords,
+        U64 vertex_count);
+
+    static void optimizeVertexCacheU32(
+        U32 *destination,
+        const U32 *indices,
+        U64 index_count,
+        U64 vertex_count);
+
+    static void optimizeVertexCacheU16(
+        U16 *destination,
+        const U16 *indices,
+        U64 index_count,
+        U64 vertex_count);
+
+    // Remap functions
+
+    static size_t generateRemapMulti(
+        unsigned int* remap,
+        const U32 * indices,
+        U64 index_count,
+        const LLVector4a * vertex_positions,
+        const LLVector4a * normals,
+        const LLVector2 * text_coords,
+        U64 vertex_count);
+
+    static void remapIndexBufferU32(U32 * destination_indices,
+        const U32 * indices,
+        U64 index_count,
+        const unsigned int* remap);
+
+    static void remapIndexBufferU16(U16 * destination_indices,
+        const U16 * indices,
+        U64 index_count,
+        const unsigned int* remap);
+
+
+    static void remapPositionsBuffer(LLVector4a * destination_vertices,
+        const LLVector4a * vertex_positions,
         U64 vertex_count,
-        U64 vertex_positions_stride);
+        const unsigned int* remap);
+
+    static void remapNormalsBuffer(LLVector4a * destination_normalss,
+        const LLVector4a * normals,
+        U64 mormals_count,
+        const unsigned int* remap);
+
+    static void remapUVBuffer(LLVector2 * destination_uvs,
+        const LLVector2 * uv_positions,
+        U64 uv_count,
+        const unsigned int* remap);
+
+    // Simplification
 
     // returns amount of indices in destiantion
     // sloppy engages a variant of a mechanizm that does not respect topology as much
diff --git a/indra/llprimitive/lldaeloader.cpp b/indra/llprimitive/lldaeloader.cpp
index e89690438e..68654486a4 100644
--- a/indra/llprimitive/lldaeloader.cpp
+++ b/indra/llprimitive/lldaeloader.cpp
@@ -2577,7 +2577,7 @@ bool LLDAELoader::loadModelsFromDomMesh(domMesh* mesh, std::vector<LLModel*>& mo
 
 		if (!mNoOptimize)
 		{
-			ret->optimizeVolumeFaces();
+			ret->remapVolumeFaces();
 		}
 
 		volume_faces = remainder.size();
diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp
index 204ff63712..a2716ecfab 100644
--- a/indra/llprimitive/llmodel.cpp
+++ b/indra/llprimitive/llmodel.cpp
@@ -107,6 +107,14 @@ void LLModel::offsetMesh( const LLVector3& pivotPoint )
 	}
 }
 
+void LLModel::remapVolumeFaces()
+{
+    for (U32 i = 0; i < getNumVolumeFaces(); ++i)
+    {
+        mVolumeFaces[i].remap();
+    }
+}
+
 void LLModel::optimizeVolumeFaces()
 {
 	for (U32 i = 0; i < getNumVolumeFaces(); ++i)
diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h
index 3881b1338c..354ceb26b7 100644
--- a/indra/llprimitive/llmodel.h
+++ b/indra/llprimitive/llmodel.h
@@ -184,6 +184,7 @@ public:
 	void sortVolumeFacesByMaterialName();
 	void normalizeVolumeFaces();
 	void trimVolumeFacesToSize(U32 new_count = LL_SCULPT_MESH_MAX_FACES, LLVolume::face_list_t* remainder = NULL);
+    void remapVolumeFaces();
 	void optimizeVolumeFaces();
 	void offsetMesh( const LLVector3& pivotPoint );
 	void getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& translation_out);
diff --git a/indra/newview/llfloatermodelpreview.cpp b/indra/newview/llfloatermodelpreview.cpp
index fe5120376c..58fbdba315 100644
--- a/indra/newview/llfloatermodelpreview.cpp
+++ b/indra/newview/llfloatermodelpreview.cpp
@@ -741,7 +741,7 @@ void LLFloaterModelPreview::onLODParamCommit(S32 lod, bool enforce_tri_limit)
     {
     case LLModelPreview::MESH_OPTIMIZER_AUTO:
     case LLModelPreview::MESH_OPTIMIZER_SLOPPY:
-    case LLModelPreview::MESH_OPTIMIZER_COMBINE:
+    case LLModelPreview::MESH_OPTIMIZER_PRECISE:
         mModelPreview->onLODMeshOptimizerParamCommit(lod, enforce_tri_limit, mode);
         break;
     default:
@@ -1745,7 +1745,7 @@ void LLFloaterModelPreview::onLoDSourceCommit(S32 lod)
     S32 index = lod_source_combo->getCurrentIndex();
 	if (index == LLModelPreview::MESH_OPTIMIZER_AUTO
         || index == LLModelPreview::MESH_OPTIMIZER_SLOPPY
-        || index == LLModelPreview::MESH_OPTIMIZER_COMBINE)
+        || index == LLModelPreview::MESH_OPTIMIZER_PRECISE)
 	{ //rebuild LoD to update triangle counts
 		onLODParamCommit(lod, true);
 	}
diff --git a/indra/newview/llmodelpreview.cpp b/indra/newview/llmodelpreview.cpp
index 859d987fc3..707a8b970f 100644
--- a/indra/newview/llmodelpreview.cpp
+++ b/indra/newview/llmodelpreview.cpp
@@ -1223,6 +1223,7 @@ void LLModelPreview::restoreNormals()
 // returns -1 in case of failure
 F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *target_model, F32 indices_decimator, F32 error_threshold, bool sloppy)
 {
+    // I. Weld faces together
     // Figure out buffer size
     S32 size_indices = 0;
     S32 size_vertices = 0;
@@ -1281,7 +1282,35 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
         indices_idx_shift += face.mNumVertices;
     }
 
-    // Now that we have buffers, optimize
+    // II. Remap.
+    std::vector<unsigned int> remap(size_indices);
+    S32 size_remap_vertices = LLMeshOptimizer::generateRemapMulti(&remap[0],
+        combined_indices,
+        size_indices,
+        combined_positions,
+        combined_normals,
+        combined_tex_coords,
+        size_vertices);
+
+    // Allocate new buffers
+    U32* remap_indices = (U32*)ll_aligned_malloc_32(size_indices * sizeof(U32));
+
+    S32 remap_tc_bytes_size = ((size_remap_vertices * sizeof(LLVector2)) + 0xF) & ~0xF;
+    LLVector4a* remap_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_remap_vertices + remap_tc_bytes_size);
+    LLVector4a* remap_normals = remap_positions + size_remap_vertices;
+    LLVector2* remap_tex_coords = (LLVector2*)(remap_normals + size_remap_vertices);
+
+    // fill the buffers
+    LLMeshOptimizer::remapIndexBufferU32(remap_indices, combined_indices, size_indices, &remap[0]);
+    LLMeshOptimizer::remapPositionsBuffer(remap_positions, combined_positions, size_vertices, &remap[0]);
+    LLMeshOptimizer::remapNormalsBuffer(remap_normals, combined_normals, size_vertices, &remap[0]);
+    LLMeshOptimizer::remapUVBuffer(remap_tex_coords, combined_tex_coords, size_vertices, &remap[0]);
+
+    // free unused buffers
+    ll_aligned_free<64>(combined_positions);
+    ll_aligned_free_32(combined_indices);
+
+    // III. Simplify
     S32 target_indices = 0;
     F32 result_error = 0; // how far from original the model is, 1 == 100%
     S32 new_indices = 0;
@@ -1294,19 +1323,19 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
     {
         target_indices = 3;
     }
+
     new_indices = LLMeshOptimizer::simplifyU32(
         output_indices,
-        combined_indices,
+        remap_indices,
         size_indices,
-        combined_positions,
-        size_vertices,
+        remap_positions,
+        size_remap_vertices,
         LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_VERTEX],
         target_indices,
         error_threshold,
         sloppy,
         &result_error);
 
-
     if (result_error < 0)
     {
         LL_WARNS() << "Negative result error from meshoptimizer for model " << target_model->mLabel
@@ -1315,24 +1344,25 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
             << " original count: " << size_indices << LL_ENDL;
     }
 
+    ll_aligned_free_32(remap_indices);
+
     if (new_indices < 3)
     {
         // Model should have at least one visible triangle
-        ll_aligned_free<64>(combined_positions);
+        ll_aligned_free<64>(remap_positions);
         ll_aligned_free_32(output_indices);
-        ll_aligned_free_32(combined_indices);
 
         return -1;
     }
 
-    // repack back into individual faces
+    // IV. Repack back into individual faces
 
-    LLVector4a* buffer_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_vertices + tc_bytes_size);
-    LLVector4a* buffer_normals = buffer_positions + size_vertices;
-    LLVector2* buffer_tex_coords = (LLVector2*)(buffer_normals + size_vertices);
+    LLVector4a* buffer_positions = (LLVector4a*)ll_aligned_malloc<64>(sizeof(LLVector4a) * 2 * size_remap_vertices + tc_bytes_size);
+    LLVector4a* buffer_normals = buffer_positions + size_remap_vertices;
+    LLVector2* buffer_tex_coords = (LLVector2*)(buffer_normals + size_remap_vertices);
     S32 buffer_idx_size = (size_indices * sizeof(U16) + 0xF) & ~0xF;
     U16* buffer_indices = (U16*)ll_aligned_malloc_16(buffer_idx_size);
-    S32* old_to_new_positions_map = new S32[size_vertices];
+    S32* old_to_new_positions_map = new S32[size_remap_vertices];
 
     S32 buf_positions_copied = 0;
     S32 buf_indices_copied = 0;
@@ -1350,7 +1380,7 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
         bool copy_triangle = false;
         S32 range = indices_idx_shift + face.mNumVertices;
 
-        for (S32 i = 0; i < size_vertices; i++)
+        for (S32 i = 0; i < size_remap_vertices; i++)
         {
             old_to_new_positions_map[i] = -1;
         }
@@ -1408,9 +1438,9 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
                     }
 
                     // Copy vertice, normals, tcs
-                    buffer_positions[buf_positions_copied] = combined_positions[idx];
-                    buffer_normals[buf_positions_copied] = combined_normals[idx];
-                    buffer_tex_coords[buf_positions_copied] = combined_tex_coords[idx];
+                    buffer_positions[buf_positions_copied] = remap_positions[idx];
+                    buffer_normals[buf_positions_copied] = remap_normals[idx];
+                    buffer_tex_coords[buf_positions_copied] = remap_tex_coords[idx];
 
                     old_to_new_positions_map[idx] = buf_positions_copied;
 
@@ -1465,11 +1495,10 @@ F32 LLModelPreview::genMeshOptimizerPerModel(LLModel *base_model, LLModel *targe
     }
 
     delete[]old_to_new_positions_map;
-    ll_aligned_free<64>(combined_positions);
+    ll_aligned_free<64>(remap_positions);
     ll_aligned_free<64>(buffer_positions);
     ll_aligned_free_32(output_indices);
     ll_aligned_free_16(buffer_indices);
-    ll_aligned_free_32(combined_indices);
 
     if (new_indices < 3 || valid_faces == 0)
     {
@@ -1488,10 +1517,9 @@ F32 LLModelPreview::genMeshOptimizerPerFace(LLModel *base_model, LLModel *target
     {
         return -1;
     }
-    // todo: do not allocate per each face, add one large buffer somewhere
-    // faces have limited amount of indices
+
     S32 size = (size_indices * sizeof(U16) + 0xF) & ~0xF;
-    U16* output = (U16*)ll_aligned_malloc_16(size);
+    U16* output_indices = (U16*)ll_aligned_malloc_16(size);
 
     S32 target_indices = 0;
     F32 result_error = 0; // how far from original the model is, 1 == 100%
@@ -1505,8 +1533,9 @@ F32 LLModelPreview::genMeshOptimizerPerFace(LLModel *base_model, LLModel *target
     {
         target_indices = 3;
     }
+
     new_indices = LLMeshOptimizer::simplify(
-        output,
+        output_indices,
         face.mIndices,
         size_indices,
         face.mPositions,
@@ -1517,7 +1546,6 @@ F32 LLModelPreview::genMeshOptimizerPerFace(LLModel *base_model, LLModel *target
         sloppy,
         &result_error);
 
-
     if (result_error < 0)
     {
         LL_WARNS() << "Negative result error from meshoptimizer for face " << face_idx
@@ -1534,7 +1562,6 @@ F32 LLModelPreview::genMeshOptimizerPerFace(LLModel *base_model, LLModel *target
     // Copy old values
     new_face = face;
 
-
     if (new_indices < 3)
     {
         if (!sloppy)
@@ -1563,13 +1590,13 @@ F32 LLModelPreview::genMeshOptimizerPerFace(LLModel *base_model, LLModel *target
         // Assign new values
         new_face.resizeIndices(new_indices); // will wipe out mIndices, so new_face can't substitute output
         S32 idx_size = (new_indices * sizeof(U16) + 0xF) & ~0xF;
-        LLVector4a::memcpyNonAliased16((F32*)new_face.mIndices, (F32*)output, idx_size);
+        LLVector4a::memcpyNonAliased16((F32*)new_face.mIndices, (F32*)output_indices, idx_size);
 
         // clear unused values
         new_face.optimize();
     }
 
-    ll_aligned_free_16(output);
+    ll_aligned_free_16(output_indices);
      
     if (new_indices < 3)
     {
@@ -1711,7 +1738,7 @@ void LLModelPreview::genMeshOptimizerLODs(S32 which_lod, S32 meshopt_mode, U32 d
 
             // Ideally this should run not per model,
             // but combine all submodels with origin model as well
-            if (model_meshopt_mode == MESH_OPTIMIZER_COMBINE)
+            if (model_meshopt_mode == MESH_OPTIMIZER_PRECISE)
             {
                 // Run meshoptimizer for each model/object, up to 8 faces in one model.
 
diff --git a/indra/newview/llmodelpreview.h b/indra/newview/llmodelpreview.h
index 9e32215e6a..727fe79373 100644
--- a/indra/newview/llmodelpreview.h
+++ b/indra/newview/llmodelpreview.h
@@ -125,7 +125,7 @@ public:
     {
         LOD_FROM_FILE = 0,
         MESH_OPTIMIZER_AUTO, // automatically selects method based on model or face
-        MESH_OPTIMIZER_COMBINE, // combines faces into a single model, simplifies, then splits back into faces
+        MESH_OPTIMIZER_PRECISE, // combines faces into a single model, simplifies, then splits back into faces
         MESH_OPTIMIZER_SLOPPY, // uses sloppy method, works per face
         USE_LOD_ABOVE,
     } eLoDMode;
-- 
cgit v1.2.3