From 7f021738ef2d74f78093dbe4fa37cbfa6645e05a Mon Sep 17 00:00:00 2001
From: Nicky <nicky.dasmijn@posteo.nl>
Date: Wed, 10 Apr 2024 21:02:40 +0200
Subject: Fix ASAN errors from LLVector4a::memcpyNonAliased16

Found by running with -fsanitze=thread
Suggestion to avoid accessing invalid memory:

In both cases memory will be allocated by can be accessed beyond bounds.

In LLPolyMesh it can be off by at least one (+x%2). Though I am not even sure if even in best case it always will be a multiple of 16.

In LLViewerJointMesh::updateFaceData the code tries to account for padding by, but the allocation in LLPolyMeshSharedData::allocateVertexData is done without any padding. Thus the sizes must not match.

Replacing the calls with memcpy as a quick fix to see if the error goes away fixed address sanitzer complaining.

It is up to debate if memcpy is a good replacement. LLVector4a::memcpyNonAliased16 was invented for performance. But on the other hand one could argue that nowadays every stdlib maintainer will very heavily optmize functions like memcpy themselves and could take advantage of CPU features the old LL implementation does not take into account.

AVX comes to mind. In any case did I not measure any of this.
---
 indra/newview/llviewerjointmesh.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'indra/newview/llviewerjointmesh.cpp')

diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 5d46c695b7..b5ccb3591f 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -405,13 +405,19 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 		
 				F32* tc = (F32*) tex_coordsp.get();
 				F32* vw = (F32*) vertex_weightsp.get();
-				F32* cw = (F32*) clothing_weightsp.get();	
+				F32* cw = (F32*) clothing_weightsp.get();
 
-				S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF;
-				LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size);
-				S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF;	
-				LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size);	
-				LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));	
+				//S32 tc_size = (num_verts*2*sizeof(F32)+0xF) & ~0xF;
+				//LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), tc_size);
+				//S32 vw_size = (num_verts*sizeof(F32)+0xF) & ~0xF;
+				//LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), vw_size);
+
+                // Both allocated in LLPolyMeshSharedData::allocateVertexData(unsigned int)
+
+                memcpy(tc, mMesh->getTexCoords(), num_verts*2*sizeof(F32) );
+                memcpy(vw, mMesh->getWeights(), num_verts*sizeof(F32) );
+
+                LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));
 			}
 
 			const U32 idx_count = mMesh->getNumFaces()*3;
-- 
cgit v1.2.3