75 files changed, 3684 insertions, 934 deletions
diff --git a/indra/llfilesystem/lldir.cpp b/indra/llfilesystem/lldir.cpp
index a18dc0a4f1..99d4850610 100644
--- a/indra/llfilesystem/lldir.cpp
+++ b/indra/llfilesystem/lldir.cpp
@@ -721,6 +721,8 @@ std::vector<std::string> LLDir::findSkinnedFilenames(const std::string& subdir,
                                                      const std::string& filename,
                                                      ESkinConstraint constraint) const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
+
     // Recognize subdirs that have no localization.
     static const std::set<std::string> sUnlocalized = list_of
         ("")                        // top-level directory not localized
diff --git a/indra/llimage/llimagej2c.cpp b/indra/llimage/llimagej2c.cpp
index 4ec95bbcc3..753e5d24df 100644
--- a/indra/llimage/llimagej2c.cpp
+++ b/indra/llimage/llimagej2c.cpp
@@ -276,16 +276,20 @@ S32 LLImageJ2C::calcDataSizeJ2C(S32 w, S32 h, S32 comp, S32 discard_level, F32 r
     // Estimate the number of layers. This is consistent with what's done for j2c encoding in LLImageJ2CKDU::encodeImpl().
     constexpr S32 precision = 8; // assumed bitrate per component channel, might change in future for HDR support
     constexpr S32 max_components = 4; // assumed the file has four components; three color and alpha
-    S32 nb_layers = 1;
-    const S32 surface = w*h;
-    S32 s = 64*64;
-    S32 totalbytes = (S32)(s * max_components * precision * rate); // first level computed before loop
-    while (surface > s)
+    // Use MAX_IMAGE_SIZE_DEFAULT (currently 2048) if either dimension is unknown (zero)
+    S32 width  = (w > 0) ? w : 2048;
+    S32 height = (h > 0) ? h : 2048;
+    S32 max_dimension = llmax(width, height); // Find largest dimension
+    S32 block_area = MAX_BLOCK_SIZE * MAX_BLOCK_SIZE; // Calculated initial block area from established max block size (currently 64)
+    block_area *= (max_dimension / MAX_BLOCK_SIZE / max_components); // Adjust initial block area by ratio of largest dimension to block size per component
+    S32 totalbytes = (S32) (block_area * max_components * precision); // First block layer computed before loop without compression rate
+    S32 block_layers = 1; // Start at layer 1 since first block layer is computed outside loop
+    while (block_layers < 6) // Walk five layers for the five discards in JPEG2000
     {
-        if (nb_layers <= (5 - discard_level))
-            totalbytes += (S32)(s * max_components * precision * rate);
-        nb_layers++;
-        s *= 4;
+        if (block_layers <= (5 - discard_level))  // Walk backwards from discard 5 to required discard layer.
+            totalbytes += (S32) (block_area * max_components * precision * rate); // Add each block layer reduced by assumed compression rate
+        block_layers++; // Move to next layer
+        block_area *= 4; // Increase block area by power of four
     }
 
     totalbytes /= 8; // to bytes
diff --git a/indra/llinventory/llpermissionsflags.h b/indra/llinventory/llpermissionsflags.h
index aaf1fd3afb..0ab4de0ab2 100644
--- a/indra/llinventory/llpermissionsflags.h
+++ b/indra/llinventory/llpermissionsflags.h
@@ -37,48 +37,48 @@ typedef U32 PermissionBit;
 // Do you have permission to transfer ownership of the object or
 // item. Fair use rules dictate that if you cannot copy, you can
 // always transfer.
-const PermissionBit PERM_TRANSFER           = (1 << 13); // 0x00002000
+constexpr PermissionBit PERM_TRANSFER           = (1 << 13); // 0x00002000
 
 // objects, scale or change textures
 // parcels, allow building on it
-const PermissionBit PERM_MODIFY             = (1 << 14); // 0x00004000
+constexpr PermissionBit PERM_MODIFY             = (1 << 14); // 0x00004000
 
 // objects, allow copy
-const PermissionBit PERM_COPY               = (1 << 15); // 0x00008000
+constexpr PermissionBit PERM_COPY               = (1 << 15); // 0x00008000
 
 // parcels, allow entry, deprecated
-//const PermissionBit PERM_ENTER            = (1 << 16); // 0x00010000
+//constexpr PermissionBit PERM_ENTER            = (1 << 16); // 0x00010000
 
 // parcels, allow terraform, deprecated
-//const PermissionBit PERM_TERRAFORM        = (1 << 17); // 0x00020000
+//constexpr PermissionBit PERM_TERRAFORM        = (1 << 17); // 0x00020000
 
 // NOTA BENE: This flag is NO LONGER USED!!! However, it is possible that some
 // objects in the universe have it set so DON"T USE IT going forward.
-//const PermissionBit PERM_OWNER_DEBIT      = (1 << 18); // 0x00040000
+//constexpr PermissionBit PERM_OWNER_DEBIT      = (1 << 18); // 0x00040000
 
 // objects, can grab/translate/rotate
-const PermissionBit PERM_MOVE               = (1 << 19); // 0x00080000
+constexpr PermissionBit PERM_MOVE               = (1 << 19); // 0x00080000
 
 // parcels, avatars take damage, deprecated
 //const PermissionBit   PERM_DAMAGE         = (1 << 20); // 0x00100000
 
 // don't use bit 31 -- printf/scanf with "%x" assume signed numbers
-const PermissionBit PERM_RESERVED           = ((U32)1) << 31;
+constexpr PermissionBit PERM_RESERVED           = ((U32)1) << 31;
 
-const PermissionMask PERM_NONE              = 0x00000000;
-const PermissionMask PERM_ALL               = 0x7FFFFFFF;
-//const PermissionMask PERM_ALL_PARCEL      = PERM_MODIFY | PERM_ENTER | PERM_TERRAFORM | PERM_DAMAGE;
-const PermissionMask PERM_ITEM_UNRESTRICTED =  PERM_MODIFY | PERM_COPY | PERM_TRANSFER;
+constexpr PermissionMask PERM_NONE              = 0x00000000;
+constexpr PermissionMask PERM_ALL               = 0x7FFFFFFF;
+//constexpr PermissionMask PERM_ALL_PARCEL      = PERM_MODIFY | PERM_ENTER | PERM_TERRAFORM | PERM_DAMAGE;
+constexpr PermissionMask PERM_ITEM_UNRESTRICTED =  PERM_MODIFY | PERM_COPY | PERM_TRANSFER;
 
 
 // Useful stuff for transmission.
 // Which permissions field are we trying to change?
-const U8 PERM_BASE      = 0x01;
+constexpr U8 PERM_BASE      = 0x01;
 // TODO: Add another PERM_OWNER operation type for allowOperationBy  DK 04/03/06
-const U8 PERM_OWNER     = 0x02;
-const U8 PERM_GROUP     = 0x04;
-const U8 PERM_EVERYONE  = 0x08;
-const U8 PERM_NEXT_OWNER = 0x10;
+constexpr U8 PERM_OWNER     = 0x02;
+constexpr U8 PERM_GROUP     = 0x04;
+constexpr U8 PERM_EVERYONE  = 0x08;
+constexpr U8 PERM_NEXT_OWNER = 0x10;
 
 // This is just a quickie debugging key
 // no modify: PERM_ALL & ~PERM_MODIFY                  = 0x7fffbfff
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 56ac22ca18..700e61467b 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -2710,7 +2710,7 @@ bool LLVolume::unpackVolumeFacesInternal(const LLSD& mdl)
 }
 
 
-bool LLVolume::isMeshAssetLoaded()
+bool LLVolume::isMeshAssetLoaded() const
 {
     return mIsMeshAssetLoaded;
 }
@@ -2733,7 +2733,7 @@ void LLVolume::setMeshAssetUnavaliable(bool unavaliable)
     }
 }
 
-bool LLVolume::isMeshAssetUnavaliable()
+bool LLVolume::isMeshAssetUnavaliable() const
 {
     return mIsMeshAssetUnavaliable;
 }
@@ -3730,6 +3730,207 @@ S32 LLVolume::getNumTriangles(S32* vcount) const
     return triangle_count;
 }
 
+void LLVolumeFace::generateSilhouetteEdge(const LLVolume* volume, std::vector<S32>& edge) const
+{
+    llassert(edge.empty()); // edge is supposed to be a scratch array
+
+    if (volume->isMeshAssetLoaded()) { return; }
+
+    if (mTypeMask & CAP_MASK)
+    {
+        // Logic copied from LLVolumeFace::createCap - indicates a face created via
+        // createUnCutCubeCap.
+        if (!(mTypeMask & HOLLOW_MASK) &&
+            !(mTypeMask & OPEN_MASK) &&
+            ((volume->getParams().getPathParams().getBegin()==0.0f)&&
+            (volume->getParams().getPathParams().getEnd()==1.0f))&&
+            (volume->getParams().getProfileParams().getCurveType()==LL_PCODE_PROFILE_SQUARE &&
+             volume->getParams().getPathParams().getCurveType()==LL_PCODE_PATH_LINE)
+            )
+        {
+            LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME("llvfgse - CAP_MASK");
+
+            const LLAlignedArray<LLVector4a,64>& profile = volume->getProfile().mProfile;
+            S32 grid_size = (profile.size()-1)/4;
+            edge.resize(mNumIndices);
+            llassert(edge.size() == 6*grid_size*grid_size);
+
+            S32 cur_edge = 0;
+            for(S32 gx = 0;gx<grid_size;gx++)
+            {
+                for(S32 gy = 0;gy<grid_size;gy++)
+                {
+                    if (mTypeMask & TOP_MASK)
+                    {
+
+                        S32 edge_value = grid_size * 2 * gy + gx * 2;
+
+                        if (gx > 0)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1; // Mark face to higlight it
+                        }
+
+                        if (gy < grid_size - 1)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        edge[cur_edge++] = edge_value;
+
+                        if (gx < grid_size - 1)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        if (gy > 0)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        edge[cur_edge++] = edge_value;
+                    }
+                    else
+                    {
+                        S32 edge_value = grid_size * 2 * gy + gx * 2;
+
+                        if (gy > 0)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        if (gx < grid_size - 1)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        edge[cur_edge++] = edge_value;
+
+                        if (gy < grid_size - 1)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        if (gx > 0)
+                        {
+                            edge[cur_edge++] = edge_value;
+                        }
+                        else
+                        {
+                            edge[cur_edge++] = -1;
+                        }
+
+                        edge[cur_edge++] = edge_value;
+                    }
+                }
+            }
+        }
+    }
+    else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK))
+    {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME("llvfgse - END_MASK or SIDE_MASK");
+
+        edge.resize(mNumIndices);
+        llassert(edge.size() == 6*(mNumS-1)*(mNumT-1));
+
+        S32 cur_edge = 0;
+        const bool flat_face = mTypeMask & FLAT_MASK;
+        for (S32 t = 0; t < (mNumT-1); t++)
+        {
+            for (S32 s = 0; s < (mNumS-1); s++)
+            {
+                // bottom left/top right neighbor face
+                edge[cur_edge++] = (mNumS-1)*2*t+s*2+1;
+
+                if (t < mNumT-2)
+                {   // top right/top left neighbor face
+                    edge[cur_edge++] = (mNumS-1)*2*(t+1)+s*2+1;
+                }
+                else if (mNumT <= 3 || volume->getPath().isOpen())
+                {   // no neighbor
+                    edge[cur_edge++] = -1;
+                }
+                else
+                {   // wrap on T
+                    edge[cur_edge++] = s*2+1;
+                }
+
+                if (s > 0)
+                {   // top left/bottom left neighbor face
+                    edge[cur_edge++] = (mNumS-1)*2*t+s*2-1;
+                }
+                else if (flat_face || volume->getProfile().isOpen())
+                {   // no neighbor
+                    edge[cur_edge++] = -1;
+                }
+                else
+                {   // wrap on S
+                    edge[cur_edge++] = (mNumS-1)*2*t+(mNumS-2)*2+1;
+                }
+
+                if (t > 0)
+                {   // bottom left/bottom right neighbor face
+                    edge[cur_edge++] = (mNumS-1)*2*(t-1)+s*2;
+                }
+                else if (mNumT <= 3 || volume->getPath().isOpen())
+                {   // no neighbor
+                    edge[cur_edge++] = -1;
+                }
+                else
+                {   // wrap on T
+                    edge[cur_edge++] = (mNumS-1)*2*(mNumT-2)+s*2;
+                }
+
+                if (s < mNumS-2)
+                {   // bottom right/top right neighbor face
+                    edge[cur_edge++] = (mNumS-1)*2*t+(s+1)*2;
+                }
+                else if (flat_face || volume->getProfile().isOpen())
+                {   // no neighbor
+                    edge[cur_edge++] = -1;
+                }
+                else
+                {   // wrap on S
+                    edge[cur_edge++] = (mNumS-1)*2*t;
+                }
+
+                // top right/bottom left neighbor face
+                edge[cur_edge++] = (mNumS-1)*2*t+s*2;
+            }
+        }
+    }
+    else
+    {
+        LL_ERRS() << "Unknown/uninitialized face type!" << LL_ENDL;
+    }
+}
 
 //-----------------------------------------------------------------------------
 // generateSilhouetteVertices()
@@ -3761,6 +3962,13 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
     }
 
     S32 cur_index = 0;
+    // Scratch array for per-face silhouette edge information. This also has a
+    // lot of dev-only debug information that we might not care about anymore.
+    // (see DEBUG_SILHOUETTE_EDGE_MAP)
+    // *TODO: Consider removing the debug associated with
+    // DEBUG_SILHOUETTE_EDGE_MAP, and remove its associated computational
+    // overhead in generateSilhouetteEdge.
+    std::vector<S32> edge;
     //for each face
     for (face_list_t::iterator iter = mVolumeFaces.begin();
          iter != mVolumeFaces.end(); ++iter)
@@ -3768,7 +3976,16 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
         LLVolumeFace& face = *iter;
 
         if (!(face_mask & (0x1 << cur_index++)) ||
-             face.mNumIndices == 0 || face.mEdge.empty())
+             face.mNumIndices == 0)
+        {
+            continue;
+        }
+        // Attempt to generate "edge" info for this silhouette, which is used
+        // for some prims. If the edge array remains empty, then this
+        // silhouette generation method is not supported for this face.
+        edge.clear();
+        face.generateSilhouetteEdge(this, edge);
+        if (edge.empty())
         {
             continue;
         }
@@ -3782,7 +3999,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
             {
                 for (S32 k = 0; k < 3; k++)
                 {
-                    S32 index = face.mEdge[j * 3 + k];
+                    S32 index = edge[j * 3 + k];
 
                     if (index == -1)
                     {
@@ -3834,7 +4051,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 
                 //for each edge
                 for (S32 k = 0; k < 3; k++) {
-                    S32 nIndex = face.mEdge[j*3+k];
+                    S32 nIndex = edge[j*3+k];
                     if (nIndex <= -1) {
                         continue;
                     }
@@ -3949,7 +4166,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
                     // *FIX IF NEEDED:  this does not deal with neighboring degenerate faces
                     for (S32 k = 0; k < 3; k++)
                     {
-                        S32 index = face.mEdge[j*3+k];
+                        S32 index = edge[j*3+k];
                         if (index != -1)
                         {
                             fFacing[j] = fFacing[index];
@@ -3961,10 +4178,10 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 
                 //for each edge
                 for (S32 k = 0; k < 3; k++) {
-                    S32 index = face.mEdge[j*3+k];
+                    S32 index = edge[j*3+k];
                     if (index != -1 && fFacing[index] == (AWAY | TOWARDS)) {
                         //our neighbor is degenerate, make him face our direction
-                        fFacing[face.mEdge[j*3+k]] = fFacing[j];
+                        fFacing[edge[j*3+k]] = fFacing[j];
                         continue;
                     }
 
@@ -5775,30 +5992,16 @@ bool LLVolumeFace::createUnCutCubeCap(LLVolume* volume, bool partial_build)
 
     if (!partial_build)
     {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME("llvfcuccm - generate indices");
+
         resizeIndices(grid_size*grid_size*6);
-        if (!volume->isMeshAssetLoaded())
-        {
-            S32 size = grid_size * grid_size * 6;
-            try
-            {
-                mEdge.resize(size);
-            }
-            catch (std::bad_alloc&)
-            {
-                LL_WARNS("LLVOLUME") << "Resize of mEdge to " << size << " failed" << LL_ENDL;
-                return false;
-            }
-        }
 
         U16* out = mIndices;
 
         S32 idxs[] = {0,1,(grid_size+1)+1,(grid_size+1)+1,(grid_size+1),0};
 
-        int cur_edge = 0;
-
         for(S32 gx = 0;gx<grid_size;gx++)
         {
-
             for(S32 gy = 0;gy<grid_size;gy++)
             {
                 if (mTypeMask & TOP_MASK)
@@ -5808,47 +6011,6 @@ bool LLVolumeFace::createUnCutCubeCap(LLVolume* volume, bool partial_build)
                         *out++ = ((gy*(grid_size+1))+gx+idxs[i]);
                     }
 
-                    S32 edge_value = grid_size * 2 * gy + gx * 2;
-
-                    if (gx > 0)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1; // Mark face to higlight it
-                    }
-
-                    if (gy < grid_size - 1)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    mEdge[cur_edge++] = edge_value;
-
-                    if (gx < grid_size - 1)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    if (gy > 0)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    mEdge[cur_edge++] = edge_value;
                 }
                 else
                 {
@@ -5856,48 +6018,6 @@ bool LLVolumeFace::createUnCutCubeCap(LLVolume* volume, bool partial_build)
                     {
                         *out++ = ((gy*(grid_size+1))+gx+idxs[i]);
                     }
-
-                    S32 edge_value = grid_size * 2 * gy + gx * 2;
-
-                    if (gy > 0)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    if (gx < grid_size - 1)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    mEdge[cur_edge++] = edge_value;
-
-                    if (gy < grid_size - 1)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    if (gx > 0)
-                    {
-                        mEdge[cur_edge++] = edge_value;
-                    }
-                    else
-                    {
-                        mEdge[cur_edge++] = -1;
-                    }
-
-                    mEdge[cur_edge++] = edge_value;
                 }
             }
         }
@@ -6377,6 +6497,8 @@ void LLVolumeFace::createTangents()
 
 void LLVolumeFace::resizeVertices(S32 num_verts)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
+
     ll_aligned_free<64>(mPositions);
     //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
     ll_aligned_free_16(mTangents);
@@ -6499,6 +6621,8 @@ void LLVolumeFace::allocateJointIndices(S32 num_verts)
 
 void LLVolumeFace::resizeIndices(S32 num_indices)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
+
     ll_aligned_free_16(mIndices);
     llassert(num_indices % 3 == 0);
 
@@ -6591,19 +6715,6 @@ bool LLVolumeFace::createSide(LLVolume* volume, bool partial_build)
     {
         resizeVertices(num_vertices);
         resizeIndices(num_indices);
-
-        if (!volume->isMeshAssetLoaded())
-        {
-            try
-            {
-                mEdge.resize(num_indices);
-            }
-            catch (std::bad_alloc&)
-            {
-                LL_WARNS("LLVOLUME") << "Resize of mEdge to " << num_indices << " failed" << LL_ENDL;
-                return false;
-            }
-        }
     }
 
     LL_CHECK_MEMORY
@@ -6618,6 +6729,7 @@ bool LLVolumeFace::createSide(LLVolume* volume, bool partial_build)
     bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2;
 
     // Copy the vertices into the array
+    { LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME("llvfcs - copy verts");
     for (t = mBeginT; t < end_t; t++)
     {
         tt = path_data[t].mTexT;
@@ -6702,6 +6814,7 @@ bool LLVolumeFace::createSide(LLVolume* volume, bool partial_build)
             cur_vertex++;
         }
     }
+    }
     LL_CHECK_MEMORY
 
     mCenter->clear();
@@ -6755,11 +6868,11 @@ bool LLVolumeFace::createSide(LLVolume* volume, bool partial_build)
     mCenter->mul(0.5f);
 
     S32 cur_index = 0;
-    S32 cur_edge = 0;
-    bool flat_face = mTypeMask & FLAT_MASK;
 
     if (!partial_build)
     {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME("llvfcs - generate indices");
+
         // Now we generate the indices.
         for (t = 0; t < (mNumT-1); t++)
         {
@@ -6771,64 +6884,6 @@ bool LLVolumeFace::createSide(LLVolume* volume, bool partial_build)
                 mIndices[cur_index++] = s   + mNumS*t;          //bottom left
                 mIndices[cur_index++] = s+1 + mNumS*t;          //bottom right
                 mIndices[cur_index++] = s+1 + mNumS*(t+1);      //top right
-
-                // bottom left/top right neighbor face
-                mEdge[cur_edge++] = (mNumS-1)*2*t+s*2+1;
-
-                if (t < mNumT-2)
-                {   // top right/top left neighbor face
-                    mEdge[cur_edge++] = (mNumS-1)*2*(t+1)+s*2+1;
-                }
-                else if (mNumT <= 3 || volume->getPath().isOpen())
-                {   // no neighbor
-                    mEdge[cur_edge++] = -1;
-                }
-                else
-                {   // wrap on T
-                    mEdge[cur_edge++] = s*2+1;
-                }
-
-                if (s > 0)
-                {   // top left/bottom left neighbor face
-                    mEdge[cur_edge++] = (mNumS-1)*2*t+s*2-1;
-                }
-                else if (flat_face || volume->getProfile().isOpen())
-                {   // no neighbor
-                    mEdge[cur_edge++] = -1;
-                }
-                else
-                {   // wrap on S
-                    mEdge[cur_edge++] = (mNumS-1)*2*t+(mNumS-2)*2+1;
-                }
-
-                if (t > 0)
-                {   // bottom left/bottom right neighbor face
-                    mEdge[cur_edge++] = (mNumS-1)*2*(t-1)+s*2;
-                }
-                else if (mNumT <= 3 || volume->getPath().isOpen())
-                {   // no neighbor
-                    mEdge[cur_edge++] = -1;
-                }
-                else
-                {   // wrap on T
-                    mEdge[cur_edge++] = (mNumS-1)*2*(mNumT-2)+s*2;
-                }
-
-                if (s < mNumS-2)
-                {   // bottom right/top right neighbor face
-                    mEdge[cur_edge++] = (mNumS-1)*2*t+(s+1)*2;
-                }
-                else if (flat_face || volume->getProfile().isOpen())
-                {   // no neighbor
-                    mEdge[cur_edge++] = -1;
-                }
-                else
-                {   // wrap on S
-                    mEdge[cur_edge++] = (mNumS-1)*2*t;
-                }
-
-                // top right/bottom left neighbor face
-                mEdge[cur_edge++] = (mNumS-1)*2*t+s*2;
             }
         }
     }
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index bbb2a16b0b..27c5fc5a49 100644
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -918,6 +918,15 @@ public:
     // Get a reference to the octree, which may be null
     const LLVolumeOctree* getOctree() const;
 
+    // Part of silhouette generation (used by selection outlines)
+    // Populates the provided edge array with numbers corresponding to
+    // *partial* logic of whether a particular index should be rendered
+    // as a silhouette edge. -1 indicates the index should be rendered as a
+    // silhouette edge. See generateSilhouetteVertices for the full logic.
+    // Silhouette edges can only be generated for some types of prims. If a
+    // silhouette edge cannot be generated, the edge array will be left empty.
+    void generateSilhouetteEdge(const LLVolume* volume, std::vector<S32>& edge) const;
+
     enum
     {
         SINGLE_MASK =   0x0001,
@@ -963,8 +972,6 @@ public:
     // indexes for mPositions/mNormals/mTexCoords
     U16* mIndices;
 
-    std::vector<S32>    mEdge;
-
     //list of skin weights for rigged volumes
     // format is mWeights[vertex_index].mV[influence] = <joint_index>.<weight>
     // mWeights.size() should be empty or match mVertices.size()
@@ -1113,9 +1120,9 @@ private:
 
 public:
     virtual void setMeshAssetLoaded(bool loaded);
-    virtual bool isMeshAssetLoaded();
+    virtual bool isMeshAssetLoaded() const;
     virtual void setMeshAssetUnavaliable(bool unavaliable);
-    virtual bool isMeshAssetUnavaliable();
+    virtual bool isMeshAssetUnavaliable() const;
 
  protected:
     bool mUnique;
diff --git a/indra/llrender/llcubemaparray.cpp b/indra/llrender/llcubemaparray.cpp
index be69b997da..4f5e13765a 100644
--- a/indra/llrender/llcubemaparray.cpp
+++ b/indra/llrender/llcubemaparray.cpp
@@ -125,27 +125,25 @@ void LLCubeMapArray::allocate(U32 resolution, U32 components, U32 count, bool us
     mImage->setHasMipMaps(use_mips);
 
     bind(0);
+    free_cur_tex_image();
 
     U32 format = components == 4 ? GL_RGBA16F : GL_RGB16F;
-
     U32 mip = 0;
-
-    free_cur_tex_image();
-
-    while (resolution >= 1)
+    U32 mip_resolution = resolution;
+    while (mip_resolution >= 1)
     {
-        glTexImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, format, resolution, resolution, count * 6, 0,
+        glTexImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, format, mip_resolution, mip_resolution, count * 6, 0,
             GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
 
         if (!use_mips)
         {
             break;
         }
-        resolution /= 2;
+        mip_resolution /= 2;
         ++mip;
     }
 
-    alloc_tex_image(resolution * 6, resolution, format);
+    alloc_tex_image(resolution, resolution, format, count * 6);
 
     mImage->setAddressMode(LLTexUnit::TAM_CLAMP);
 
diff --git a/indra/llrender/llfontbitmapcache.cpp b/indra/llrender/llfontbitmapcache.cpp
index 46c2e89797..ee9cfd0719 100644
--- a/indra/llrender/llfontbitmapcache.cpp
+++ b/indra/llrender/llfontbitmapcache.cpp
@@ -117,7 +117,7 @@ bool LLFontBitmapCache::nextOpenPos(S32 width, S32& pos_x, S32& pos_y, EFontGlyp
             }
 
             // Make corresponding GL image.
-            mImageGLVec[bitmap_idx].push_back(new LLImageGL(image_raw, false));
+            mImageGLVec[bitmap_idx].push_back(new LLImageGL(image_raw, false, false));
             LLImageGL* image_gl = getImageGL(bitmap_type, bitmap_num);
 
             // Start at beginning of the new image.
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index 7959b3bb57..c14efe8ab4 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -2742,7 +2742,7 @@ LLGLDepthTest::LLGLDepthTest(GLboolean depth_enabled, GLboolean write_enabled, G
 : mPrevDepthEnabled(sDepthEnabled), mPrevDepthFunc(sDepthFunc), mPrevWriteEnabled(sWriteEnabled)
 {
     stop_glerror();
-
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
     checkState();
 
     if (!depth_enabled)
@@ -2775,6 +2775,7 @@ LLGLDepthTest::LLGLDepthTest(GLboolean depth_enabled, GLboolean write_enabled, G
 
 LLGLDepthTest::~LLGLDepthTest()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE;
     checkState();
     if (sDepthEnabled != mPrevDepthEnabled )
     {
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index e76a30a954..daa768e8ab 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -1552,6 +1552,34 @@ void LLGLSLShader::uniform4fv(U32 index, U32 count, const GLfloat* v)
     }
 }
 
+void LLGLSLShader::uniform4uiv(U32 index, U32 count, const GLuint* v)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
+    llassert(sCurBoundShaderPtr == this);
+
+    if (mProgramObject)
+    {
+        if (mUniform.size() <= index)
+        {
+            LL_WARNS_ONCE("Shader") << "Uniform index out of bounds. Size: " << (S32)mUniform.size() << " index: " << index << LL_ENDL;
+            llassert(false);
+            return;
+        }
+
+        if (mUniform[index] >= 0)
+        {
+            const auto& iter = mValue.find(mUniform[index]);
+            LLVector4 vec((F32)v[0], (F32)v[1], (F32)v[2], (F32)v[3]);
+            if (iter == mValue.end() || shouldChange(iter->second, vec) || count != 1)
+            {
+                LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
+                glUniform4uiv(mUniform[index], count, v);
+                mValue[mUniform[index]] = vec;
+            }
+        }
+    }
+}
+
 void LLGLSLShader::uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, const GLfloat* v)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
@@ -1886,6 +1914,24 @@ void LLGLSLShader::uniform4fv(const LLStaticHashedString& uniform, U32 count, co
     }
 }
 
+void LLGLSLShader::uniform4uiv(const LLStaticHashedString& uniform, U32 count, const GLuint* v)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
+    GLint location = getUniformLocation(uniform);
+
+    if (location >= 0)
+    {
+        LLVector4 vec((F32)v[0], (F32)v[1], (F32)v[2], (F32)v[3]);
+        const auto& iter = mValue.find(location);
+        if (iter == mValue.end() || shouldChange(iter->second, vec) || count != 1)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
+            glUniform4uiv(location, count, v);
+            mValue[location] = vec;
+        }
+    }
+}
+
 void LLGLSLShader::uniformMatrix4fv(const LLStaticHashedString& uniform, U32 count, GLboolean transpose, const GLfloat* v)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h
index 86e5625dca..f3c41cd819 100644
--- a/indra/llrender/llglslshader.h
+++ b/indra/llrender/llglslshader.h
@@ -208,6 +208,7 @@ public:
     void uniform2fv(U32 index, U32 count, const GLfloat* v);
     void uniform3fv(U32 index, U32 count, const GLfloat* v);
     void uniform4fv(U32 index, U32 count, const GLfloat* v);
+    void uniform4uiv(U32 index, U32 count, const GLuint* v);
     void uniform2i(const LLStaticHashedString& uniform, GLint i, GLint j);
     void uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, const GLfloat* v);
     void uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, const GLfloat* v);
@@ -223,6 +224,7 @@ public:
     void uniform2fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v);
     void uniform3fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v);
     void uniform4fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v);
+    void uniform4uiv(const LLStaticHashedString& uniform, U32 count, const GLuint* v);
     void uniformMatrix4fv(const LLStaticHashedString& uniform, U32 count, GLboolean transpose, const GLfloat* v);
 
     void setMinimumAlpha(F32 minimum);
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index e4b176ff69..68c20048ec 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -67,12 +67,13 @@ static U64 sTextureBytes = 0;
 
 // track a texture alloc on the currently bound texture.
 // asserts that no currently tracked alloc exists
-void LLImageGLMemory::alloc_tex_image(U32 width, U32 height, U32 pixformat)
+void LLImageGLMemory::alloc_tex_image(U32 width, U32 height, U32 intformat, U32 count)
 {
     U32 texUnit = gGL.getCurrentTexUnitIndex();
     llassert(texUnit == 0); // allocations should always be done on tex unit 0
     U32 texName = gGL.getTexUnit(texUnit)->getCurrTexture();
-    U64 size = LLImageGL::dataFormatBytes(pixformat, width, height);
+    U64 size = LLImageGL::dataFormatBytes(intformat, width, height);
+    size *= count;
 
     llassert(size >= 0);
 
@@ -280,6 +281,15 @@ S32 LLImageGL::dataFormatBits(S32 dataformat)
 {
     switch (dataformat)
     {
+    case GL_COMPRESSED_RED:                         return 8;
+    case GL_COMPRESSED_RG:                          return 16;
+    case GL_COMPRESSED_RGB:                         return 24;
+    case GL_COMPRESSED_SRGB:                        return 32;
+    case GL_COMPRESSED_RGBA:                        return 32;
+    case GL_COMPRESSED_SRGB_ALPHA:                  return 32;
+    case GL_COMPRESSED_LUMINANCE:                   return 8;
+    case GL_COMPRESSED_LUMINANCE_ALPHA:             return 16;
+    case GL_COMPRESSED_ALPHA:                       return 8;
     case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:          return 4;
     case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:    return 4;
     case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:          return 8;
@@ -287,21 +297,35 @@ S32 LLImageGL::dataFormatBits(S32 dataformat)
     case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:          return 8;
     case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:    return 8;
     case GL_LUMINANCE:                              return 8;
+    case GL_LUMINANCE8:                             return 8;
     case GL_ALPHA:                                  return 8;
+    case GL_ALPHA8:                                 return 8;
     case GL_RED:                                    return 8;
+    case GL_R8:                                     return 8;
     case GL_COLOR_INDEX:                            return 8;
     case GL_LUMINANCE_ALPHA:                        return 16;
+    case GL_LUMINANCE8_ALPHA8:                      return 16;
+    case GL_RG:                                     return 16;
+    case GL_RG8:                                    return 16;
     case GL_RGB:                                    return 24;
     case GL_SRGB:                                   return 24;
     case GL_RGB8:                                   return 24;
     case GL_RGBA:                                   return 32;
+    case GL_RGBA8:                                  return 32;
     case GL_SRGB_ALPHA:                             return 32;
     case GL_BGRA:                                   return 32;      // Used for QuickTime media textures on the Mac
     case GL_DEPTH_COMPONENT:                        return 24;
+    case GL_DEPTH_COMPONENT24:                      return 24;
+    case GL_R16F:                                   return 16;
+    case GL_RG16F:                                  return 32;
     case GL_RGB16F:                                 return 48;
     case GL_RGBA16F:                                return 64;
+    case GL_R32F:                                   return 32;
+    case GL_RG32F:                                  return 64;
+    case GL_RGB32F:                                 return 96;
+    case GL_RGBA32F:                                return 128;
     default:
-        LL_ERRS() << "LLImageGL::Unknown format: " << dataformat << LL_ENDL;
+        LL_ERRS() << "LLImageGL::Unknown format: " << std::hex << dataformat << std::dec << LL_ENDL;
         return 0;
     }
 }
@@ -344,13 +368,14 @@ S32 LLImageGL::dataFormatComponents(S32 dataformat)
       case GL_RED:                              return 1;
       case GL_COLOR_INDEX:                      return 1;
       case GL_LUMINANCE_ALPHA:                  return 2;
+      case GL_RG:                               return 2;
       case GL_RGB:                              return 3;
       case GL_SRGB:                             return 3;
       case GL_RGBA:                             return 4;
       case GL_SRGB_ALPHA:                       return 4;
       case GL_BGRA:                             return 4;       // Used for QuickTime media textures on the Mac
       default:
-        LL_ERRS() << "LLImageGL::Unknown format: " << dataformat << LL_ENDL;
+        LL_ERRS() << "LLImageGL::Unknown format: " << std::hex << dataformat << std::dec << LL_ENDL;
         return 0;
     }
 }
@@ -411,29 +436,29 @@ bool LLImageGL::create(LLPointer<LLImageGL>& dest, const LLImageRaw* imageraw, b
 
 //----------------------------------------------------------------------------
 
-LLImageGL::LLImageGL(bool usemipmaps)
+LLImageGL::LLImageGL(bool usemipmaps/* = true*/, bool allow_compression/* = true*/)
 :   mSaveData(0), mExternalTexture(false)
 {
-    init(usemipmaps);
+    init(usemipmaps, allow_compression);
     setSize(0, 0, 0);
     sImageList.insert(this);
     sCount++;
 }
 
-LLImageGL::LLImageGL(U32 width, U32 height, U8 components, bool usemipmaps)
+LLImageGL::LLImageGL(U32 width, U32 height, U8 components, bool usemipmaps/* = true*/, bool allow_compression/* = true*/)
 :   mSaveData(0), mExternalTexture(false)
 {
     llassert( components <= 4 );
-    init(usemipmaps);
+    init(usemipmaps, allow_compression);
     setSize(width, height, components);
     sImageList.insert(this);
     sCount++;
 }
 
-LLImageGL::LLImageGL(const LLImageRaw* imageraw, bool usemipmaps)
+LLImageGL::LLImageGL(const LLImageRaw* imageraw, bool usemipmaps/* = true*/, bool allow_compression/* = true*/)
 :   mSaveData(0), mExternalTexture(false)
 {
-    init(usemipmaps);
+    init(usemipmaps, allow_compression);
     setSize(0, 0, 0);
     sImageList.insert(this);
     sCount++;
@@ -450,7 +475,7 @@ LLImageGL::LLImageGL(
     LLGLenum formatType,
     LLTexUnit::eTextureAddressMode addressMode)
 {
-    init(false);
+    init(false, true);
     mTexName = texName;
     mTarget = target;
     mComponents = components;
@@ -472,7 +497,7 @@ LLImageGL::~LLImageGL()
     }
 }
 
-void LLImageGL::init(bool usemipmaps)
+void LLImageGL::init(bool usemipmaps, bool allow_compression)
 {
 #if LL_IMAGEGL_THREAD_CHECK
     mActiveThread = LLThread::currentID();
@@ -502,7 +527,7 @@ void LLImageGL::init(bool usemipmaps)
     mHeight = 0;
     mCurrentDiscardLevel = -1;
 
-    mAllowCompression = true;
+    mAllowCompression = allow_compression;
 
     mTarget = GL_TEXTURE_2D;
     mBindTarget = LLTexUnit::TT_TEXTURE;
@@ -1230,90 +1255,122 @@ void LLImageGL::deleteTextures(S32 numTextures, const U32 *textures)
 void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 width, S32 height, U32 pixformat, U32 pixtype, const void* pixels, bool allow_compression)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
-    bool use_scratch = false;
-    U32* scratch = NULL;
+    std::unique_ptr<U32[]> scratch;
     if (LLRender::sGLCoreProfile)
     {
-        if (pixformat == GL_ALPHA && pixtype == GL_UNSIGNED_BYTE)
-        { //GL_ALPHA is deprecated, convert to RGBA
-            if (pixels != nullptr)
-            {
-                use_scratch = true;
-                scratch = new(std::nothrow) U32[width * height];
-                if (!scratch)
-                {
-                    LLError::LLUserWarningMsg::showOutOfMemory();
-                    LL_ERRS() << "Failed to allocate " << (U32)(width * height * sizeof(U32))
-                              << " bytes for a manual image W" << width << " H" << height << LL_ENDL;
-                }
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+        if (gGLManager.mGLVersion >= 3.29f)
+        {
+            if (pixformat == GL_ALPHA)
+            { //GL_ALPHA is deprecated, convert to RGBA
+                const GLint mask[] = { GL_ZERO, GL_ZERO, GL_ZERO, GL_RED };
+                glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask);
+                pixformat = GL_RED;
+                intformat = GL_R8;
+            }
 
-                U32 pixel_count = (U32)(width * height);
-                for (U32 i = 0; i < pixel_count; i++)
-                {
-                    U8* pix = (U8*)&scratch[i];
-                    pix[0] = pix[1] = pix[2] = 0;
-                    pix[3] = ((U8*)pixels)[i];
-                }
+            if (pixformat == GL_LUMINANCE)
+            { //GL_LUMINANCE is deprecated, convert to GL_RGBA
+                const GLint mask[] = { GL_RED, GL_RED, GL_RED, GL_ONE };
+                glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask);
+                pixformat = GL_RED;
+                intformat = GL_R8;
             }
 
-            pixformat = GL_RGBA;
-            intformat = GL_RGBA8;
+            if (pixformat == GL_LUMINANCE_ALPHA)
+            { //GL_LUMINANCE_ALPHA is deprecated, convert to RGBA
+                const GLint mask[] = { GL_RED, GL_RED, GL_RED, GL_GREEN };
+                glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask);
+                pixformat = GL_RG;
+                intformat = GL_RG8;
+            }
         }
-
-        if (pixformat == GL_LUMINANCE_ALPHA && pixtype == GL_UNSIGNED_BYTE)
-        { //GL_LUMINANCE_ALPHA is deprecated, convert to RGBA
-            if (pixels != nullptr)
-            {
-                use_scratch = true;
-                scratch = new(std::nothrow) U32[width * height];
-                if (!scratch)
+        else
+        {
+            if (pixformat == GL_ALPHA && pixtype == GL_UNSIGNED_BYTE)
+            { //GL_ALPHA is deprecated, convert to RGBA
+                if (pixels != nullptr)
                 {
-                    LLError::LLUserWarningMsg::showOutOfMemory();
-                    LL_ERRS() << "Failed to allocate " << (U32)(width * height * sizeof(U32))
-                        << " bytes for a manual image W" << width << " H" << height << LL_ENDL;
-                }
+                    scratch.reset(new(std::nothrow) U32[width * height]);
+                    if (!scratch)
+                    {
+                        LLError::LLUserWarningMsg::showOutOfMemory();
+                        LL_ERRS() << "Failed to allocate " << (U32)(width * height * sizeof(U32))
+                            << " bytes for a manual image W" << width << " H" << height << LL_ENDL;
+                    }
 
-                U32 pixel_count = (U32)(width * height);
-                for (U32 i = 0; i < pixel_count; i++)
-                {
-                    U8 lum = ((U8*)pixels)[i * 2 + 0];
-                    U8 alpha = ((U8*)pixels)[i * 2 + 1];
+                    U32 pixel_count = (U32)(width * height);
+                    for (U32 i = 0; i < pixel_count; i++)
+                    {
+                        U8* pix = (U8*)&scratch[i];
+                        pix[0] = pix[1] = pix[2] = 0;
+                        pix[3] = ((U8*)pixels)[i];
+                    }
 
-                    U8* pix = (U8*)&scratch[i];
-                    pix[0] = pix[1] = pix[2] = lum;
-                    pix[3] = alpha;
+                    pixels = scratch.get();
                 }
-            }
 
-            pixformat = GL_RGBA;
-            intformat = GL_RGBA8;
-        }
+                pixformat = GL_RGBA;
+                intformat = GL_RGBA8;
+            }
 
-        if (pixformat == GL_LUMINANCE && pixtype == GL_UNSIGNED_BYTE)
-        { //GL_LUMINANCE_ALPHA is deprecated, convert to RGB
-            if (pixels != nullptr)
-            {
-                use_scratch = true;
-                scratch = new(std::nothrow) U32[width * height];
-                if (!scratch)
+            if (pixformat == GL_LUMINANCE_ALPHA && pixtype == GL_UNSIGNED_BYTE)
+            { //GL_LUMINANCE_ALPHA is deprecated, convert to RGBA
+                if (pixels != nullptr)
                 {
-                    LLError::LLUserWarningMsg::showOutOfMemory();
-                    LL_ERRS() << "Failed to allocate " << (U32)(width * height * sizeof(U32))
-                        << " bytes for a manual image W" << width << " H" << height << LL_ENDL;
+                    scratch.reset(new(std::nothrow) U32[width * height]);
+                    if (!scratch)
+                    {
+                        LLError::LLUserWarningMsg::showOutOfMemory();
+                        LL_ERRS() << "Failed to allocate " << (U32)(width * height * sizeof(U32))
+                            << " bytes for a manual image W" << width << " H" << height << LL_ENDL;
+                    }
+
+                    U32 pixel_count = (U32)(width * height);
+                    for (U32 i = 0; i < pixel_count; i++)
+                    {
+                        U8 lum = ((U8*)pixels)[i * 2 + 0];
+                        U8 alpha = ((U8*)pixels)[i * 2 + 1];
+
+                        U8* pix = (U8*)&scratch[i];
+                        pix[0] = pix[1] = pix[2] = lum;
+                        pix[3] = alpha;
+                    }
+
+                    pixels = scratch.get();
                 }
 
-                U32 pixel_count = (U32)(width * height);
-                for (U32 i = 0; i < pixel_count; i++)
+                pixformat = GL_RGBA;
+                intformat = GL_RGBA8;
+            }
+
+            if (pixformat == GL_LUMINANCE && pixtype == GL_UNSIGNED_BYTE)
+            { //GL_LUMINANCE_ALPHA is deprecated, convert to RGB
+                if (pixels != nullptr)
                 {
-                    U8 lum = ((U8*)pixels)[i];
+                    scratch.reset(new(std::nothrow) U32[width * height]);
+                    if (!scratch)
+                    {
+                        LLError::LLUserWarningMsg::showOutOfMemory();
+                        LL_ERRS() << "Failed to allocate " << (U32)(width * height * sizeof(U32))
+                            << " bytes for a manual image W" << width << " H" << height << LL_ENDL;
+                    }
+
+                    U32 pixel_count = (U32)(width * height);
+                    for (U32 i = 0; i < pixel_count; i++)
+                    {
+                        U8 lum = ((U8*)pixels)[i];
+
+                        U8* pix = (U8*)&scratch[i];
+                        pix[0] = pix[1] = pix[2] = lum;
+                        pix[3] = 255;
+                    }
 
-                    U8* pix = (U8*)&scratch[i];
-                    pix[0] = pix[1] = pix[2] = lum;
-                    pix[3] = 255;
+                    pixels = scratch.get();
                 }
+                pixformat = GL_RGBA;
+                intformat = GL_RGB8;
             }
-            pixformat = GL_RGBA;
-            intformat = GL_RGB8;
         }
     }
 
@@ -1322,6 +1379,14 @@ void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 widt
     {
         switch (intformat)
         {
+        case GL_RED:
+        case GL_R8:
+            intformat = GL_COMPRESSED_RED;
+            break;
+        case GL_RG:
+        case GL_RG8:
+            intformat = GL_COMPRESSED_RG;
+            break;
         case GL_RGB:
         case GL_RGB8:
             intformat = GL_COMPRESSED_RGB;
@@ -1350,12 +1415,8 @@ void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 widt
         case GL_ALPHA8:
             intformat = GL_COMPRESSED_ALPHA;
             break;
-        case GL_RED:
-        case GL_R8:
-            intformat = GL_COMPRESSED_RED;
-            break;
         default:
-            LL_WARNS() << "Could not compress format: " << std::hex << intformat << LL_ENDL;
+            LL_WARNS() << "Could not compress format: " << std::hex << intformat << std::dec << LL_ENDL;
             break;
         }
     }
@@ -1371,7 +1432,7 @@ void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 widt
         if (!use_sub_image)
         {
             LL_PROFILE_ZONE_NAMED("glTexImage2D alloc + copy");
-            glTexImage2D(target, miplevel, intformat, width, height, 0, pixformat, pixtype, use_scratch ? scratch : pixels);
+            glTexImage2D(target, miplevel, intformat, width, height, 0, pixformat, pixtype, pixels);
         }
         else
         {
@@ -1381,21 +1442,16 @@ void LLImageGL::setManualImage(U32 target, S32 miplevel, S32 intformat, S32 widt
                 glTexImage2D(target, miplevel, intformat, width, height, 0, pixformat, pixtype, nullptr);
             }
 
-            U8* src = (U8*)(use_scratch ? scratch : pixels);
+            U8* src = (U8*)(pixels);
             if (src)
             {
                 LL_PROFILE_ZONE_NAMED("glTexImage2D copy");
                 sub_image_lines(target, miplevel, 0, 0, width, height, pixformat, pixtype, src, width);
             }
         }
-        alloc_tex_image(width, height, pixformat);
+        alloc_tex_image(width, height, intformat, 1);
     }
     stop_glerror();
-
-    if (use_scratch)
-    {
-        delete[] scratch;
-    }
 }
 
 //create an empty GL texture: just create a texture name
@@ -2374,11 +2430,11 @@ bool LLImageGL::scaleDown(S32 desired_discard)
         gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, temp_texname, true);
         {
             LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("scaleDown - glTexImage2D");
-            glTexImage2D(mTarget, 0, mFormatPrimary, desired_width, desired_height, 0, mFormatPrimary, mFormatType, NULL);
+            glTexImage2D(mTarget, 0, mFormatInternal, desired_width, desired_height, 0, mFormatPrimary, mFormatType, NULL);
         }
 
         // account for new texture getting created
-        alloc_tex_image(desired_width, desired_height, mFormatPrimary);
+        alloc_tex_image(desired_width, desired_height, mFormatInternal, 1);
 
         // Use render-to-texture to scale down the texture
         {
@@ -2432,10 +2488,10 @@ bool LLImageGL::scaleDown(S32 desired_discard)
         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
 
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, sScratchPBO);
-        glTexImage2D(mTarget, 0, mFormatPrimary, desired_width, desired_height, 0, mFormatPrimary, mFormatType, nullptr);
+        glTexImage2D(mTarget, 0, mFormatInternal, desired_width, desired_height, 0, mFormatPrimary, mFormatType, nullptr);
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 
-        alloc_tex_image(desired_width, desired_height, mFormatPrimary);
+        alloc_tex_image(desired_width, desired_height, mFormatInternal, 1);
 
         if (mHasMipMaps)
         {
diff --git a/indra/llrender/llimagegl.h b/indra/llrender/llimagegl.h
index 5073701c30..a8b94bd5b0 100644
--- a/indra/llrender/llimagegl.h
+++ b/indra/llrender/llimagegl.h
@@ -50,7 +50,7 @@ class LLWindow;
 
 namespace LLImageGLMemory
 {
-    void alloc_tex_image(U32 width, U32 height, U32 pixformat);
+    void alloc_tex_image(U32 width, U32 height, U32 intformat, U32 count);
     void free_tex_image(U32 texName);
     void free_tex_images(U32 count, const U32* texNames);
     void free_cur_tex_image();
@@ -101,9 +101,9 @@ public:
     static bool create(LLPointer<LLImageGL>& dest, const LLImageRaw* imageraw, bool usemipmaps = true);
 
 public:
-    LLImageGL(bool usemipmaps = true);
-    LLImageGL(U32 width, U32 height, U8 components, bool usemipmaps = true);
-    LLImageGL(const LLImageRaw* imageraw, bool usemipmaps = true);
+    LLImageGL(bool usemipmaps = true, bool allow_compression = true);
+    LLImageGL(U32 width, U32 height, U8 components, bool usemipmaps = true, bool allow_compression = true);
+    LLImageGL(const LLImageRaw* imageraw, bool usemipmaps = true, bool allow_compression = true);
 
     // For wrapping textures created via GL elsewhere with our API only. Use with caution.
     LLImageGL(LLGLuint mTexName, U32 components, LLGLenum target, LLGLint  formatInternal, LLGLenum formatPrimary, LLGLenum formatType, LLTexUnit::eTextureAddressMode addressMode);
@@ -203,7 +203,7 @@ public:
 
     LLGLenum getTexTarget()const { return mTarget; }
 
-    void init(bool usemipmaps);
+    void init(bool usemipmaps, bool allow_compression);
     virtual void cleanup(); // Clean up the LLImageGL so it can be reinitialized.  Be careful when using this in derived class destructors
 
     void setNeedsAlphaAndPickMask(bool need_mask);
diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp
index f700201ace..21a0820d32 100644
--- a/indra/llrender/llrendertarget.cpp
+++ b/indra/llrender/llrendertarget.cpp
@@ -475,12 +475,10 @@ void LLRenderTarget::clear(U32 mask_in)
 
 U32 LLRenderTarget::getTexture(U32 attachment) const
 {
-    if (attachment > mTex.size()-1)
-    {
-        LL_ERRS() << "Invalid attachment index." << LL_ENDL;
-    }
-    if (mTex.empty())
+    if (attachment >= mTex.size())
     {
+        LL_WARNS() << "Invalid attachment index " << attachment << " for size " << mTex.size() << LL_ENDL;
+        llassert(false);
         return 0;
     }
     return mTex[attachment];
diff --git a/indra/llrender/llshadermgr.cpp b/indra/llrender/llshadermgr.cpp
index 512ef340f9..e9608491a4 100644
--- a/indra/llrender/llshadermgr.cpp
+++ b/indra/llrender/llshadermgr.cpp
@@ -595,8 +595,6 @@ GLuint LLShaderMgr::loadShaderFile(const std::string& filename, S32 & shader_lev
                 extra_code_text[extra_code_count++] = strdup("precision highp float;\n");
             }
         }
-
-        extra_code_text[extra_code_count++] = strdup("#define FXAA_GLSL_130 1\n");
     }
 
     // Use alpha float to store bit flags
diff --git a/indra/llui/llfloater.cpp b/indra/llui/llfloater.cpp
index ff174d8470..6ad74c09e6 100644
--- a/indra/llui/llfloater.cpp
+++ b/indra/llui/llfloater.cpp
@@ -1983,6 +1983,9 @@ void LLFloater::onClickCloseBtn(bool app_quitting)
 // virtual
 void LLFloater::draw()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
+    LL_PROFILE_ZONE_TEXT(getTitle().c_str(), getTitle().length());
+
     const F32 alpha = getCurrentTransparency();
 
     // draw background
diff --git a/indra/newview/VIEWER_VERSION.txt b/indra/newview/VIEWER_VERSION.txt
index 346a7e3aa1..e0eaaa0bbc 100644
--- a/indra/newview/VIEWER_VERSION.txt
+++ b/indra/newview/VIEWER_VERSION.txt
@@ -1 +1 @@
-7.1.10
+7.1.11
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index 5e7ae32b81..6b8f698b0b 100644
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -9821,6 +9821,17 @@
       <key>Value</key>
       <string>00000000-0000-0000-0000-000000000000</string>
     </map>
+    <key>RenderCASSharpness</key>
+    <map>
+        <key>Comment</key>
+        <string>Level of sharpening to apply via Contrast Adaptive Sharpening (0.0(off) - 1.0)</string>
+        <key>Persist</key>
+        <integer>1</integer>
+        <key>Type</key>
+        <string>F32</string>
+        <key>Value</key>
+        <real>0.4</real>
+    </map>
     <key>ReplaySession</key>
     <map>
       <key>Comment</key>
@@ -13158,17 +13169,6 @@
       <key>Value</key>
       <integer>0</integer>
     </map>
-    <key>VoiceEffectExpiryWarningTime</key>
-    <map>
-      <key>Comment</key>
-      <string>How much notice to give of Voice Morph subscriptions expiry, in seconds.</string>
-      <key>Persist</key>
-      <integer>1</integer>
-      <key>Type</key>
-      <string>S32</string>
-      <key>Value</key>
-      <integer>259200</integer>
-    </map>
     <key>VoiceMorphingEnabled</key>
     <map>
       <key>Comment</key>
diff --git a/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl b/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl
new file mode 100644
index 0000000000..96d08058cf
--- /dev/null
+++ b/indra/newview/app_settings/shaders/class1/deferred/CASF.glsl
@@ -0,0 +1,2558 @@
+/**
+ * @file CASF.glsl
+ *
+ * $LicenseInfo:firstyear=2024&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2024, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+/*[EXTRA_CODE_HERE]*/
+
+#ifndef A_CPU
+#define A_GPU
+#define A_GLSL
+#define CAS_BETTER_DIAGONALS
+#define CAS_SLOW
+
+out vec4 frag_color;
+in vec2 vary_fragcoord;
+
+uniform sampler2D diffuseRect;
+uniform vec2 out_screen_res;
+uniform uvec4 cas_param_0;
+uniform uvec4 cas_param_1;
+
+vec3 srgb_to_linear(vec3 cs);
+vec3 linear_to_srgb(vec3 cl);
+#endif
+
+#ifndef SHADER_PORTABILITY
+//==============================================================================================================================
+//
+//                                               [A] SHADER PORTABILITY 1.20210629
+//
+//==============================================================================================================================
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+// MIT LICENSE
+// ===========
+// Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS").
+// -----------
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+// -----------
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+// Software.
+// -----------
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+#define A_2PI 6.28318530718
+#ifdef A_CPU
+ // Supporting user defined overrides.
+ #ifndef A_RESTRICT
+  #define A_RESTRICT __restrict
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifndef A_STATIC
+  #define A_STATIC static
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ // Same types across CPU and GPU.
+ // Predicate uses 32-bit integer (C friendly bool).
+ typedef uint32_t AP1;
+ typedef float AF1;
+ typedef double AD1;
+ typedef uint8_t AB1;
+ typedef uint16_t AW1;
+ typedef uint32_t AU1;
+ typedef uint64_t AL1;
+ typedef int8_t ASB1;
+ typedef int16_t ASW1;
+ typedef int32_t ASU1;
+ typedef int64_t ASL1;
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AD1_(a) ((AD1)(a))
+ #define AF1_(a) ((AF1)(a))
+ #define AL1_(a) ((AL1)(a))
+ #define AU1_(a) ((AU1)(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASL1_(a) ((ASL1)(a))
+ #define ASU1_(a) ((ASU1)(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;}
+//------------------------------------------------------------------------------------------------------------------------------
+ #define A_TRUE 1
+ #define A_FALSE 0
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                                       CPU/GPU PORTING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Get CPU and GPU to share all setup code, without duplicate code paths.
+// This uses a lower-case prefix for special vector constructs.
+//  - In C restrict pointers are used.
+//  - In the shading language, in/inout/out arguments are used.
+// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]).
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                     VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY
+//==============================================================================================================================
+ #define retAD2 AD1 *A_RESTRICT
+ #define retAD3 AD1 *A_RESTRICT
+ #define retAD4 AD1 *A_RESTRICT
+ #define retAF2 AF1 *A_RESTRICT
+ #define retAF3 AF1 *A_RESTRICT
+ #define retAF4 AF1 *A_RESTRICT
+ #define retAL2 AL1 *A_RESTRICT
+ #define retAL3 AL1 *A_RESTRICT
+ #define retAL4 AL1 *A_RESTRICT
+ #define retAU2 AU1 *A_RESTRICT
+ #define retAU3 AU1 *A_RESTRICT
+ #define retAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inAD2 AD1 *A_RESTRICT
+ #define inAD3 AD1 *A_RESTRICT
+ #define inAD4 AD1 *A_RESTRICT
+ #define inAF2 AF1 *A_RESTRICT
+ #define inAF3 AF1 *A_RESTRICT
+ #define inAF4 AF1 *A_RESTRICT
+ #define inAL2 AL1 *A_RESTRICT
+ #define inAL3 AL1 *A_RESTRICT
+ #define inAL4 AL1 *A_RESTRICT
+ #define inAU2 AU1 *A_RESTRICT
+ #define inAU3 AU1 *A_RESTRICT
+ #define inAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inoutAD2 AD1 *A_RESTRICT
+ #define inoutAD3 AD1 *A_RESTRICT
+ #define inoutAD4 AD1 *A_RESTRICT
+ #define inoutAF2 AF1 *A_RESTRICT
+ #define inoutAF3 AF1 *A_RESTRICT
+ #define inoutAF4 AF1 *A_RESTRICT
+ #define inoutAL2 AL1 *A_RESTRICT
+ #define inoutAL3 AL1 *A_RESTRICT
+ #define inoutAL4 AL1 *A_RESTRICT
+ #define inoutAU2 AU1 *A_RESTRICT
+ #define inoutAU3 AU1 *A_RESTRICT
+ #define inoutAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define outAD2 AD1 *A_RESTRICT
+ #define outAD3 AD1 *A_RESTRICT
+ #define outAD4 AD1 *A_RESTRICT
+ #define outAF2 AF1 *A_RESTRICT
+ #define outAF3 AF1 *A_RESTRICT
+ #define outAF4 AF1 *A_RESTRICT
+ #define outAL2 AL1 *A_RESTRICT
+ #define outAL3 AL1 *A_RESTRICT
+ #define outAL4 AL1 *A_RESTRICT
+ #define outAU2 AU1 *A_RESTRICT
+ #define outAU3 AU1 *A_RESTRICT
+ #define outAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define varAD2(x) AD1 x[2]
+ #define varAD3(x) AD1 x[3]
+ #define varAD4(x) AD1 x[4]
+ #define varAF2(x) AF1 x[2]
+ #define varAF3(x) AF1 x[3]
+ #define varAF4(x) AF1 x[4]
+ #define varAL2(x) AL1 x[2]
+ #define varAL3(x) AL1 x[3]
+ #define varAL4(x) AL1 x[4]
+ #define varAU2(x) AU1 x[2]
+ #define varAU3(x) AU1 x[3]
+ #define varAU4(x) AU1 x[4]
+//------------------------------------------------------------------------------------------------------------------------------
+ #define initAD2(x,y) {x,y}
+ #define initAD3(x,y,z) {x,y,z}
+ #define initAD4(x,y,z,w) {x,y,z,w}
+ #define initAF2(x,y) {x,y}
+ #define initAF3(x,y,z) {x,y,z}
+ #define initAF4(x,y,z,w) {x,y,z,w}
+ #define initAL2(x,y) {x,y}
+ #define initAL3(x,y,z) {x,y,z}
+ #define initAL4(x,y,z,w) {x,y,z,w}
+ #define initAU2(x,y) {x,y}
+ #define initAU3(x,y,z) {x,y,z}
+ #define initAU4(x,y,z,w) {x,y,z,w}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     SCALAR RETURN OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+//  - Replace transcendentals with manual versions.
+//==============================================================================================================================
+ #ifdef A_GCC
+  A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);}
+  A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);}
+  A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));}
+  A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));}
+ #else
+  A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);}
+  A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);}
+  A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));}
+  A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);}
+  A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);}
+ #else
+  A_STATIC AD1 ACosD1(AD1 a){return cos(a);}
+  A_STATIC AF1 ACosF1(AF1 a){return cosf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];}
+ A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];}
+ A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];}
+ A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];}
+ A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];}
+ A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);}
+  A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);}
+ #else
+  A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);}
+  A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);}
+  A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);}
+ #else
+  A_STATIC AD1 AFloorD1(AD1 a){return floor(a);}
+  A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);}
+ A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);}
+  A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);}
+ #else
+  A_STATIC AD1 ALog2D1(AD1 a){return log2(a);}
+  A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;}
+ A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;}
+ A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;}
+ A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ // These follow the convention that A integer types don't have signage, until they are operated on.
+ A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;}
+ A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a<b?a:b;}
+ A_STATIC AF1 AMinF1(AF1 a,AF1 b){return a<b?a:b;}
+ A_STATIC AL1 AMinL1(AL1 a,AL1 b){return a<b?a:b;}
+ A_STATIC AU1 AMinU1(AU1 a,AU1 b){return a<b?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AL1 AMinSL1(AL1 a,AL1 b){return (ASL1_(a)<ASL1_(b))?a:b;}
+ A_STATIC AU1 AMinSU1(AU1 a,AU1 b){return (ASU1_(a)<ASU1_(b))?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ARcpD1(AD1 a){return 1.0/a;}
+ A_STATIC AF1 ARcpF1(AF1 a){return 1.0f/a;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AL1 AShrSL1(AL1 a,AL1 b){return AL1_(ASL1_(a)>>ASL1_(b));}
+ A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);}
+  A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);}
+ #else
+  A_STATIC AD1 ASinD1(AD1 a){return sin(a);}
+  A_STATIC AF1 ASinF1(AF1 a){return sinf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+  A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);}
+  A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);}
+ #else
+  A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);}
+  A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                               SCALAR RETURN OPS - DEPENDENT
+//==============================================================================================================================
+ A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));}
+ A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);}
+ A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));}
+ A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));}
+ A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));}
+ A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         VECTOR OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are added as needed for production or prototyping, so not necessarily a complete set.
+// They follow a convention of taking in a destination and also returning the destination value to increase utility.
+//==============================================================================================================================
+ A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;}
+ A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;}
+ A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;}
+ A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;}
+ A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;}
+ A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;}
+ A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;}
+ A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;}
+ A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;}
+ A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;}
+ A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;}
+ A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;}
+ A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;}
+ A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;}
+ A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;}
+ A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;}
+ A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;}
+ A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;}
+ A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;}
+ A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;}
+ A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;}
+ A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;}
+ A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;}
+ A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;}
+ A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;}
+ A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;}
+ A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;}
+ A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;}
+ A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;}
+ A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;}
+ A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;}
+ A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;}
+ A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;}
+ A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;}
+ A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;}
+ A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;}
+ A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;}
+ A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;}
+ A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;}
+ A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;}
+ A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;}
+ A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;}
+ A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;}
+ A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;}
+ A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;}
+ A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;}
+ A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;}
+ A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;}
+ A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     HALF FLOAT PACKING
+//==============================================================================================================================
+ // Convert float to half (in lower 16-bits of output).
+ // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+ // Supports denormals.
+ // Conversion rules are to make computations possibly "safer" on the GPU,
+ //  -INF & -NaN -> -65504
+ //  +INF & +NaN -> +65504
+ A_STATIC AU1 AU1_AH1_AF1(AF1 f){
+  static AW1 base[512]={
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+   0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100,
+   0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,
+   0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+   0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100,
+   0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00,
+   0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+   0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff};
+  static AB1 shift[512]={
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
+   0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
+   0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
+   0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
+   0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+   0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18};
+  union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Used to output packed constant.
+ A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);}
+#endif
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                            GLSL
+//==============================================================================================================================
+#if defined(A_GLSL) && defined(A_GPU)
+ #ifndef A_SKIP_EXT
+  #ifdef A_LONG
+   #extension GL_ARB_gpu_shader_int64:require
+   #extension GL_NV_shader_atomic_int64:require
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  #ifdef A_WAVE
+   #extension GL_KHR_shader_subgroup_arithmetic:require
+   #extension GL_KHR_shader_subgroup_ballot:require
+   #extension GL_KHR_shader_subgroup_quad:require
+   #extension GL_KHR_shader_subgroup_shuffle:require
+  #endif
+ #endif
+//==============================================================================================================================
+ #define AP1 bool
+ #define AP2 bvec2
+ #define AP3 bvec3
+ #define AP4 bvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF1 float
+ #define AF2 vec2
+ #define AF3 vec3
+ #define AF4 vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1 uint
+ #define AU2 uvec2
+ #define AU3 uvec3
+ #define AU4 uvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASU1 int
+ #define ASU2 ivec2
+ #define ASU3 ivec3
+ #define ASU4 ivec4
+//==============================================================================================================================
+ #define AF1_AU1(x) uintBitsToFloat(AU1(x))
+ #define AF2_AU2(x) uintBitsToFloat(AU2(x))
+ #define AF3_AU3(x) uintBitsToFloat(AU3(x))
+ #define AF4_AU4(x) uintBitsToFloat(AU4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AF1(x) floatBitsToUint(AF1(x))
+ #define AU2_AF2(x) floatBitsToUint(AF2(x))
+ #define AU3_AF3(x) floatBitsToUint(AF3(x))
+ #define AU4_AF4(x) floatBitsToUint(AF4(x))
+//==============================================================================================================================
+ AF1 AF1_x(AF1 a){return AF1(a);}
+ AF2 AF2_x(AF1 a){return AF2(a,a);}
+ AF3 AF3_x(AF1 a){return AF3(a,a,a);}
+ AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
+ #define AF1_(a) AF1_x(AF1(a))
+ #define AF2_(a) AF2_x(AF1(a))
+ #define AF3_(a) AF3_x(AF1(a))
+ #define AF4_(a) AF4_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_x(AU1 a){return AU1(a);}
+ AU2 AU2_x(AU1 a){return AU2(a,a);}
+ AU3 AU3_x(AU1 a){return AU3(a,a,a);}
+ AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
+ #define AU1_(a) AU1_x(AU1(a))
+ #define AU2_(a) AU2_x(AU1(a))
+ #define AU3_(a) AU3_x(AU1(a))
+ #define AU4_(a) AU4_x(AU1(a))
+//==============================================================================================================================
+ AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));}
+ AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));}
+ AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));}
+ AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));}
+ AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));}
+ // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate.
+ AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){return bitfieldInsert(src,ins,0,ASU1(bits));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_MED3_F32.
+ AF1 AClampF1(AF1 x,AF1 n,AF1 m){return clamp(x,n,m);}
+ AF2 AClampF2(AF2 x,AF2 n,AF2 m){return clamp(x,n,m);}
+ AF3 AClampF3(AF3 x,AF3 n,AF3 m){return clamp(x,n,m);}
+ AF4 AClampF4(AF4 x,AF4 n,AF4 m){return clamp(x,n,m);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_FRACT_F32 (note DX frac() is different).
+ AF1 AFractF1(AF1 x){return fract(x);}
+ AF2 AFractF2(AF2 x){return fract(x);}
+ AF3 AFractF3(AF3 x){return fract(x);}
+ AF4 AFractF4(AF4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return mix(x,y,a);}
+ AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return mix(x,y,a);}
+ AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return mix(x,y,a);}
+ AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_MAX3_F32.
+ AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));}
+ AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));}
+ AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));}
+ AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));}
+ AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));}
+ AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));}
+ AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));}
+ AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));}
+ AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));}
+ AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));}
+ AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));}
+ AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));}
+ AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Clamp has an easier pattern match for med3 when some ordering is known.
+ // V_MED3_F32.
+ AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));}
+ AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));}
+ AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));}
+ AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_MIN3_F32.
+ AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));}
+ AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));}
+ AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));}
+ AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));}
+ AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));}
+ AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));}
+ AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));}
+ AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));}
+ AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));}
+ AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));}
+ AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));}
+ AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));}
+ AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently.
+ // V_COS_F32.
+ AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));}
+ AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));}
+ AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));}
+ AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Normalized trig. Valid input domain is {-256 to +256}. No GLSL compiler intrinsic exists to map to this currently.
+ // V_SIN_F32.
+ AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));}
+ AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));}
+ AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));}
+ AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARcpF1(AF1 x){return AF1_(1.0)/x;}
+ AF2 ARcpF2(AF2 x){return AF2_(1.0)/x;}
+ AF3 ARcpF3(AF3 x){return AF3_(1.0)/x;}
+ AF4 ARcpF4(AF4 x){return AF4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARsqF1(AF1 x){return AF1_(1.0)/sqrt(x);}
+ AF2 ARsqF2(AF2 x){return AF2_(1.0)/sqrt(x);}
+ AF3 ARsqF3(AF3 x){return AF3_(1.0)/sqrt(x);}
+ AF4 ARsqF4(AF4 x){return AF4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ASatF1(AF1 x){return clamp(x,AF1_(0.0),AF1_(1.0));}
+ AF2 ASatF2(AF2 x){return clamp(x,AF2_(0.0),AF2_(1.0));}
+ AF3 ASatF3(AF3 x){return clamp(x,AF3_(0.0),AF3_(1.0));}
+ AF4 ASatF4(AF4 x){return clamp(x,AF4_(0.0),AF4_(1.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));}
+ AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));}
+ AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));}
+ AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          GLSL BYTE
+//==============================================================================================================================
+ #ifdef A_BYTE
+  #define AB1 uint8_t
+  #define AB2 u8vec2
+  #define AB3 u8vec3
+  #define AB4 u8vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASB1 int8_t
+  #define ASB2 i8vec2
+  #define ASB3 i8vec3
+  #define ASB4 i8vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  AB1 AB1_x(AB1 a){return AB1(a);}
+  AB2 AB2_x(AB1 a){return AB2(a,a);}
+  AB3 AB3_x(AB1 a){return AB3(a,a,a);}
+  AB4 AB4_x(AB1 a){return AB4(a,a,a,a);}
+  #define AB1_(a) AB1_x(AB1(a))
+  #define AB2_(a) AB2_x(AB1(a))
+  #define AB3_(a) AB3_x(AB1(a))
+  #define AB4_(a) AB4_x(AB1(a))
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         GLSL DOUBLE
+//==============================================================================================================================
+ #ifdef A_DUBL
+  #define AD1 double
+  #define AD2 dvec2
+  #define AD3 dvec3
+  #define AD4 dvec4
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 AD1_x(AD1 a){return AD1(a);}
+  AD2 AD2_x(AD1 a){return AD2(a,a);}
+  AD3 AD3_x(AD1 a){return AD3(a,a,a);}
+  AD4 AD4_x(AD1 a){return AD4(a,a,a,a);}
+  #define AD1_(a) AD1_x(AD1(a))
+  #define AD2_(a) AD2_x(AD1(a))
+  #define AD3_(a) AD3_x(AD1(a))
+  #define AD4_(a) AD4_x(AD1(a))
+//==============================================================================================================================
+  AD1 AFractD1(AD1 x){return fract(x);}
+  AD2 AFractD2(AD2 x){return fract(x);}
+  AD3 AFractD3(AD3 x){return fract(x);}
+  AD4 AFractD4(AD4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);}
+  AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);}
+  AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);}
+  AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;}
+  AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;}
+  AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;}
+  AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);}
+  AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);}
+  AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);}
+  AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));}
+  AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));}
+  AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));}
+  AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         GLSL LONG
+//==============================================================================================================================
+ #ifdef A_LONG
+  #define AL1 uint64_t
+  #define AL2 u64vec2
+  #define AL3 u64vec3
+  #define AL4 u64vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASL1 int64_t
+  #define ASL2 i64vec2
+  #define ASL3 i64vec3
+  #define ASL4 i64vec4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AL1_AU2(x) packUint2x32(AU2(x))
+  #define AU2_AL1(x) unpackUint2x32(AL1(x))
+//------------------------------------------------------------------------------------------------------------------------------
+  AL1 AL1_x(AL1 a){return AL1(a);}
+  AL2 AL2_x(AL1 a){return AL2(a,a);}
+  AL3 AL3_x(AL1 a){return AL3(a,a,a);}
+  AL4 AL4_x(AL1 a){return AL4(a,a,a,a);}
+  #define AL1_(a) AL1_x(AL1(a))
+  #define AL2_(a) AL2_x(AL1(a))
+  #define AL3_(a) AL3_x(AL1(a))
+  #define AL4_(a) AL4_x(AL1(a))
+//==============================================================================================================================
+  AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));}
+  AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));}
+  AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));}
+  AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));}
+  AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));}
+  AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));}
+  AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));}
+  AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));}
+  AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));}
+  AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      WAVE OPERATIONS
+//==============================================================================================================================
+ #ifdef A_WAVE
+  // Where 'x' must be a compile time literal.
+  AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);}
+  AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);}
+ #endif
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                            HLSL
+//
+//
+//==============================================================================================================================
+#if defined(A_HLSL) && defined(A_GPU)
+ #ifdef A_HLSL_6_2
+  #define AP1 bool
+  #define AP2 bool2
+  #define AP3 bool3
+  #define AP4 bool4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AF1 float32_t
+  #define AF2 float32_t2
+  #define AF3 float32_t3
+  #define AF4 float32_t4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AU1 uint32_t
+  #define AU2 uint32_t2
+  #define AU3 uint32_t3
+  #define AU4 uint32_t4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASU1 int32_t
+  #define ASU2 int32_t2
+  #define ASU3 int32_t3
+  #define ASU4 int32_t4
+ #else
+  #define AP1 bool
+  #define AP2 bool2
+  #define AP3 bool3
+  #define AP4 bool4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AF1 float
+  #define AF2 float2
+  #define AF3 float3
+  #define AF4 float4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define AU1 uint
+  #define AU2 uint2
+  #define AU3 uint3
+  #define AU4 uint4
+//------------------------------------------------------------------------------------------------------------------------------
+  #define ASU1 int
+  #define ASU2 int2
+  #define ASU3 int3
+  #define ASU4 int4
+ #endif
+//==============================================================================================================================
+ #define AF1_AU1(x) asfloat(AU1(x))
+ #define AF2_AU2(x) asfloat(AU2(x))
+ #define AF3_AU3(x) asfloat(AU3(x))
+ #define AF4_AU4(x) asfloat(AU4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AF1(x) asuint(AF1(x))
+ #define AU2_AF2(x) asuint(AF2(x))
+ #define AU3_AF3(x) asuint(AF3(x))
+ #define AU4_AF4(x) asuint(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);}
+ #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);}
+ #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a))
+ #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));}
+ #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x))
+//==============================================================================================================================
+ AF1 AF1_x(AF1 a){return AF1(a);}
+ AF2 AF2_x(AF1 a){return AF2(a,a);}
+ AF3 AF3_x(AF1 a){return AF3(a,a,a);}
+ AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
+ #define AF1_(a) AF1_x(AF1(a))
+ #define AF2_(a) AF2_x(AF1(a))
+ #define AF3_(a) AF3_x(AF1(a))
+ #define AF4_(a) AF4_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_x(AU1 a){return AU1(a);}
+ AU2 AU2_x(AU1 a){return AU2(a,a);}
+ AU3 AU3_x(AU1 a){return AU3(a,a,a);}
+ AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
+ #define AU1_(a) AU1_x(AU1(a))
+ #define AU2_(a) AU2_x(AU1(a))
+ #define AU3_(a) AU3_x(AU1(a))
+ #define AU4_(a) AU4_x(AU1(a))
+//==============================================================================================================================
+ AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));}
+ AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));}
+ AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));}
+ AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<<bits)-1;return (src>>off)&mask;}
+ AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));}
+ AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<<bits)-1;return (ins&mask)|(src&(~mask));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AClampF1(AF1 x,AF1 n,AF1 m){return max(n,min(x,m));}
+ AF2 AClampF2(AF2 x,AF2 n,AF2 m){return max(n,min(x,m));}
+ AF3 AClampF3(AF3 x,AF3 n,AF3 m){return max(n,min(x,m));}
+ AF4 AClampF4(AF4 x,AF4 n,AF4 m){return max(n,min(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AFractF1(AF1 x){return x-floor(x);}
+ AF2 AFractF2(AF2 x){return x-floor(x);}
+ AF3 AFractF3(AF3 x){return x-floor(x);}
+ AF4 AFractF4(AF4 x){return x-floor(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ALerpF1(AF1 x,AF1 y,AF1 a){return lerp(x,y,a);}
+ AF2 ALerpF2(AF2 x,AF2 y,AF2 a){return lerp(x,y,a);}
+ AF3 ALerpF3(AF3 x,AF3 y,AF3 a){return lerp(x,y,a);}
+ AF4 ALerpF4(AF4 x,AF4 y,AF4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));}
+ AF2 AMax3F2(AF2 x,AF2 y,AF2 z){return max(x,max(y,z));}
+ AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));}
+ AF4 AMax3F4(AF4 x,AF4 y,AF4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3SU1(AU1 x,AU1 y,AU1 z){return AU1(max(ASU1(x),max(ASU1(y),ASU1(z))));}
+ AU2 AMax3SU2(AU2 x,AU2 y,AU2 z){return AU2(max(ASU2(x),max(ASU2(y),ASU2(z))));}
+ AU3 AMax3SU3(AU3 x,AU3 y,AU3 z){return AU3(max(ASU3(x),max(ASU3(y),ASU3(z))));}
+ AU4 AMax3SU4(AU4 x,AU4 y,AU4 z){return AU4(max(ASU4(x),max(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMax3U1(AU1 x,AU1 y,AU1 z){return max(x,max(y,z));}
+ AU2 AMax3U2(AU2 x,AU2 y,AU2 z){return max(x,max(y,z));}
+ AU3 AMax3U3(AU3 x,AU3 y,AU3 z){return max(x,max(y,z));}
+ AU4 AMax3U4(AU4 x,AU4 y,AU4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMaxSU1(AU1 a,AU1 b){return AU1(max(ASU1(a),ASU1(b)));}
+ AU2 AMaxSU2(AU2 a,AU2 b){return AU2(max(ASU2(a),ASU2(b)));}
+ AU3 AMaxSU3(AU3 a,AU3 b){return AU3(max(ASU3(a),ASU3(b)));}
+ AU4 AMaxSU4(AU4 a,AU4 b){return AU4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AMed3F1(AF1 x,AF1 y,AF1 z){return max(min(x,y),min(max(x,y),z));}
+ AF2 AMed3F2(AF2 x,AF2 y,AF2 z){return max(min(x,y),min(max(x,y),z));}
+ AF3 AMed3F3(AF3 x,AF3 y,AF3 z){return max(min(x,y),min(max(x,y),z));}
+ AF4 AMed3F4(AF4 x,AF4 y,AF4 z){return max(min(x,y),min(max(x,y),z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));}
+ AF2 AMin3F2(AF2 x,AF2 y,AF2 z){return min(x,min(y,z));}
+ AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));}
+ AF4 AMin3F4(AF4 x,AF4 y,AF4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3SU1(AU1 x,AU1 y,AU1 z){return AU1(min(ASU1(x),min(ASU1(y),ASU1(z))));}
+ AU2 AMin3SU2(AU2 x,AU2 y,AU2 z){return AU2(min(ASU2(x),min(ASU2(y),ASU2(z))));}
+ AU3 AMin3SU3(AU3 x,AU3 y,AU3 z){return AU3(min(ASU3(x),min(ASU3(y),ASU3(z))));}
+ AU4 AMin3SU4(AU4 x,AU4 y,AU4 z){return AU4(min(ASU4(x),min(ASU4(y),ASU4(z))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMin3U1(AU1 x,AU1 y,AU1 z){return min(x,min(y,z));}
+ AU2 AMin3U2(AU2 x,AU2 y,AU2 z){return min(x,min(y,z));}
+ AU3 AMin3U3(AU3 x,AU3 y,AU3 z){return min(x,min(y,z));}
+ AU4 AMin3U4(AU4 x,AU4 y,AU4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AMinSU1(AU1 a,AU1 b){return AU1(min(ASU1(a),ASU1(b)));}
+ AU2 AMinSU2(AU2 a,AU2 b){return AU2(min(ASU2(a),ASU2(b)));}
+ AU3 AMinSU3(AU3 a,AU3 b){return AU3(min(ASU3(a),ASU3(b)));}
+ AU4 AMinSU4(AU4 a,AU4 b){return AU4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ANCosF1(AF1 x){return cos(x*AF1_(A_2PI));}
+ AF2 ANCosF2(AF2 x){return cos(x*AF2_(A_2PI));}
+ AF3 ANCosF3(AF3 x){return cos(x*AF3_(A_2PI));}
+ AF4 ANCosF4(AF4 x){return cos(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ANSinF1(AF1 x){return sin(x*AF1_(A_2PI));}
+ AF2 ANSinF2(AF2 x){return sin(x*AF2_(A_2PI));}
+ AF3 ANSinF3(AF3 x){return sin(x*AF3_(A_2PI));}
+ AF4 ANSinF4(AF4 x){return sin(x*AF4_(A_2PI));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARcpF1(AF1 x){return rcp(x);}
+ AF2 ARcpF2(AF2 x){return rcp(x);}
+ AF3 ARcpF3(AF3 x){return rcp(x);}
+ AF4 ARcpF4(AF4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ARsqF1(AF1 x){return rsqrt(x);}
+ AF2 ARsqF2(AF2 x){return rsqrt(x);}
+ AF3 ARsqF3(AF3 x){return rsqrt(x);}
+ AF4 ARsqF4(AF4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ASatF1(AF1 x){return saturate(x);}
+ AF2 ASatF2(AF2 x){return saturate(x);}
+ AF3 ASatF3(AF3 x){return saturate(x);}
+ AF4 ASatF4(AF4 x){return saturate(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AShrSU1(AU1 a,AU1 b){return AU1(ASU1(a)>>ASU1(b));}
+ AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));}
+ AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));}
+ AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          HLSL BYTE
+//==============================================================================================================================
+ #ifdef A_BYTE
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         HLSL DOUBLE
+//==============================================================================================================================
+ #ifdef A_DUBL
+  #ifdef A_HLSL_6_2
+   #define AD1 float64_t
+   #define AD2 float64_t2
+   #define AD3 float64_t3
+   #define AD4 float64_t4
+  #else
+   #define AD1 double
+   #define AD2 double2
+   #define AD3 double3
+   #define AD4 double4
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 AD1_x(AD1 a){return AD1(a);}
+  AD2 AD2_x(AD1 a){return AD2(a,a);}
+  AD3 AD3_x(AD1 a){return AD3(a,a,a);}
+  AD4 AD4_x(AD1 a){return AD4(a,a,a,a);}
+  #define AD1_(a) AD1_x(AD1(a))
+  #define AD2_(a) AD2_x(AD1(a))
+  #define AD3_(a) AD3_x(AD1(a))
+  #define AD4_(a) AD4_x(AD1(a))
+//==============================================================================================================================
+  AD1 AFractD1(AD1 a){return a-floor(a);}
+  AD2 AFractD2(AD2 a){return a-floor(a);}
+  AD3 AFractD3(AD3 a){return a-floor(a);}
+  AD4 AFractD4(AD4 a){return a-floor(a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);}
+  AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);}
+  AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);}
+  AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARcpD1(AD1 x){return rcp(x);}
+  AD2 ARcpD2(AD2 x){return rcp(x);}
+  AD3 ARcpD3(AD3 x){return rcp(x);}
+  AD4 ARcpD4(AD4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ARsqD1(AD1 x){return rsqrt(x);}
+  AD2 ARsqD2(AD2 x){return rsqrt(x);}
+  AD3 ARsqD3(AD3 x){return rsqrt(x);}
+  AD4 ARsqD4(AD4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD1 ASatD1(AD1 x){return saturate(x);}
+  AD2 ASatD2(AD2 x){return saturate(x);}
+  AD3 ASatD3(AD3 x){return saturate(x);}
+  AD4 ASatD4(AD4 x){return saturate(x);}
+ #endif
+//==============================================================================================================================
+//                                                         HLSL WAVE
+//==============================================================================================================================
+ #ifdef A_WAVE
+  // Where 'x' must be a compile time literal.
+  AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+  AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ #endif
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                          GPU COMMON
+//
+//
+//==============================================================================================================================
+#ifdef A_GPU
+ // Negative and positive infinity.
+ #define A_INFP_F AF1_AU1(0x7f800000u)
+ #define A_INFN_F AF1_AU1(0xff800000u)
+//------------------------------------------------------------------------------------------------------------------------------
+ // Copy sign from 's' to positive 'd'.
+ AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));}
+ AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));}
+ AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));}
+ AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Single operation to return (useful to create a mask to use in lerp for branch free logic),
+ //  m=NaN := 0
+ //  m>=0  := 0
+ //  m<0   := 1
+ // Uses the following useful floating point logic,
+ //  saturate(+a*(-INF)==-INF) := 0
+ //  saturate( 0*(-INF)== NaN) := 0
+ //  saturate(-a*(-INF)==+INF) := 1
+ AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));}
+ AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));}
+ AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));}
+ AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));}
+ AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));}
+ AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));}
+ AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                [FIS] FLOAT INTEGER SORTABLE
+//------------------------------------------------------------------------------------------------------------------------------
+// Float to integer sortable.
+//  - If sign bit=0, flip the sign bit (positives).
+//  - If sign bit=1, flip all bits     (negatives).
+// Integer sortable to float.
+//  - If sign bit=1, flip the sign bit (positives).
+//  - If sign bit=0, flip all bits     (negatives).
+// Has nice side effects.
+//  - Larger integers are more positive values.
+//  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+// Burns 3 ops for conversion {shift,or,xor}.
+//==============================================================================================================================
+ AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));}
+ AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value).
+ AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));}
+ AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                               [BUC] BYTE UNSIGNED CONVERSION
+//------------------------------------------------------------------------------------------------------------------------------
+// Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation.
+// Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively.
+//------------------------------------------------------------------------------------------------------------------------------
+// OPCODE NOTES
+// ============
+// GCN does not do UNORM or SNORM for bytes in opcodes.
+//  - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float.
+//  - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer).
+// V_PERM_B32 does byte packing with ability to zero fill bytes as well.
+//  - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo.
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops.
+// ====   =====
+//    0 : 0
+//    1 : 1
+//     ...
+//  255 : 255
+//      : 256 (just outside the encoding range)
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32.
+// ====   =====
+//    0 : 0
+//    1 : 1/512
+//    2 : 1/256
+//     ...
+//   64 : 1/8
+//  128 : 1/4
+//  255 : 255/512
+//      : 1/2 (just outside the encoding range)
+//------------------------------------------------------------------------------------------------------------------------------
+// OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES
+// ============================================
+// r=ABuc0FromU1(i)
+//   V_CVT_F32_UBYTE0 r,i
+// --------------------------------------------
+// r=ABuc0ToU1(d,i)
+//   V_CVT_PKACCUM_U8_F32 r,i,0,d
+// --------------------------------------------
+// d=ABuc0FromU2(i)
+//   Where 'k0' is an SGPR with 0x0E0A
+//   Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits
+//   V_PERM_B32 d,i.x,i.y,k0
+//   V_PK_FMA_F16 d,d,k1.x,0
+// --------------------------------------------
+// r=ABuc0ToU2(d,i)
+//   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+//   Where 'k1' is an SGPR with 0x????
+//   Where 'k2' is an SGPR with 0x????
+//   V_PK_FMA_F16 i,i,k0.x,0
+//   V_PERM_B32 r.x,i,i,k1
+//   V_PERM_B32 r.y,i,i,k2
+//==============================================================================================================================
+ // Peak range for 32-bit and 16-bit operations.
+ #define A_BUC_32 (255.0)
+ #define A_BUC_16 (255.0/512.0)
+//==============================================================================================================================
+ #if 1
+  // Designed to be one V_CVT_PKACCUM_U8_F32.
+  // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32.
+  AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u)    )&(0x000000ffu));}
+  AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));}
+  AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));}
+  AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Designed to be one V_CVT_F32_UBYTE*.
+  AF1 ABuc0FromU1(AU1 i){return AF1((i    )&255u);}
+  AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);}
+  AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);}
+  AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                 [BSC] BYTE SIGNED CONVERSION
+//------------------------------------------------------------------------------------------------------------------------------
+// Similar to [BUC].
+// Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively.
+//------------------------------------------------------------------------------------------------------------------------------
+// ENCODING (without zero-based encoding)
+// ========
+//   0 = unused (can be used to mean something else)
+//   1 = lowest value
+// 128 = exact zero center (zero based encoding
+// 255 = highest value
+//------------------------------------------------------------------------------------------------------------------------------
+// Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+// This is useful if there is a desire for cleared values to decode as zero.
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32.
+// ====   =====
+//    0 : -127/512 (unused)
+//    1 : -126/512
+//    2 : -125/512
+//     ...
+//  128 : 0
+//     ...
+//  255 : 127/512
+//      : 1/4 (just outside the encoding range)
+//==============================================================================================================================
+ // Peak range for 32-bit and 16-bit operations.
+ #define A_BSC_32 (127.0)
+ #define A_BSC_16 (127.0/512.0)
+//==============================================================================================================================
+ #if 1
+  AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u)    )&(0x000000ffu));}
+  AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));}
+  AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));}
+  AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u)    )&(0x000000ffu)))^0x00000080u;}
+  AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;}
+  AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;}
+  AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 ABsc0FromU1(AU1 i){return AF1((i    )&255u)-128.0;}
+  AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;}
+  AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;}
+  AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 ABsc0FromZbU1(AU1 i){return AF1(((i    )&255u)^0x80u)-128.0;}
+  AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;}
+  AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;}
+  AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    FLOAT APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN",
+//  - Idea dates back to SGI, then to Quake 3, etc.
+//  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+//     - sqrt(x)=rsqrt(x)*x
+//     - rcp(x)=rsqrt(x)*rsqrt(x) for positive x
+//  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+//------------------------------------------------------------------------------------------------------------------------------
+// These below are from perhaps less complete searching for optimal.
+// Used FP16 normal range for testing with +4096 32-bit step size for sampling error.
+// So these match up well with the half approximations.
+//==============================================================================================================================
+ AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));}
+ AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));}
+ AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));}
+ AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));}
+ AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));}
+ AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));}
+ AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));}
+ AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));}
+ AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));}
+ AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));}
+ AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));}
+ AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));}
+ AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    PQ APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do
+// PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%.
+//==============================================================================================================================
+// Helpers
+ AF1 Quart(AF1 a) { a = a * a; return a * a;}
+ AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; }
+ AF2 Quart(AF2 a) { a = a * a; return a * a; }
+ AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; }
+ AF3 Quart(AF3 a) { a = a * a; return a * a; }
+ AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; }
+ AF4 Quart(AF4 a) { a = a * a; return a * a; }
+ AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF1 APrxPQToGamma2(AF1 a) { return Quart(a); }
+ AF1 APrxPQToLinear(AF1 a) { return Oct(a); }
+ AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); }
+ AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); }
+ AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); }
+ AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF2 APrxPQToGamma2(AF2 a) { return Quart(a); }
+ AF2 APrxPQToLinear(AF2 a) { return Oct(a); }
+ AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); }
+ AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); }
+ AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); }
+ AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF3 APrxPQToGamma2(AF3 a) { return Quart(a); }
+ AF3 APrxPQToLinear(AF3 a) { return Oct(a); }
+ AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); }
+ AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); }
+ AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); }
+ AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF4 APrxPQToGamma2(AF4 a) { return Quart(a); }
+ AF4 APrxPQToLinear(AF4 a) { return Oct(a); }
+ AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); }
+ AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); }
+ AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); }
+ AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); }
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                    PARABOLIC SIN & COS
+//------------------------------------------------------------------------------------------------------------------------------
+// Approximate answers to transcendental questions.
+//------------------------------------------------------------------------------------------------------------------------------
+//==============================================================================================================================
+ #if 1
+  // Valid input range is {-1 to 1} representing {0 to 2 pi}.
+  // Output range is {-1/4 to 1/4} representing {-1 to 1}.
+  AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD.
+  AF2 APSinF2(AF2 x){return x*abs(x)-x;}
+  AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT
+  AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);}
+  AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     [ZOL] ZERO ONE LOGIC
+//------------------------------------------------------------------------------------------------------------------------------
+// Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit.
+//------------------------------------------------------------------------------------------------------------------------------
+// 0 := false
+// 1 := true
+//------------------------------------------------------------------------------------------------------------------------------
+// AndNot(x,y)   -> !(x&y) .... One op.
+// AndOr(x,y,z)  -> (x&y)|z ... One op.
+// GtZero(x)     -> x>0.0 ..... One op.
+// Sel(x,y,z)    -> x?y:z ..... Two ops, has no precision loss.
+// Signed(x)     -> x<0.0 ..... One op.
+// ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer.
+//------------------------------------------------------------------------------------------------------------------------------
+// OPTIMIZATION NOTES
+// ==================
+// - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'.
+//   For example 'a.xy*k.xx+k.yy'.
+//==============================================================================================================================
+ #if 1
+  AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);}
+  AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);}
+  AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);}
+  AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 AZolNotU1(AU1 x){return x^AU1_(1);}
+  AU2 AZolNotU2(AU2 x){return x^AU2_(1);}
+  AU3 AZolNotU3(AU3 x){return x^AU3_(1);}
+  AU4 AZolNotU4(AU4 x){return x^AU4_(1);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);}
+  AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);}
+  AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);}
+  AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);}
+//==============================================================================================================================
+  AU1 AZolF1ToU1(AF1 x){return AU1(x);}
+  AU2 AZolF2ToU2(AF2 x){return AU2(x);}
+  AU3 AZolF3ToU3(AF3 x){return AU3(x);}
+  AU4 AZolF4ToU4(AF4 x){return AU4(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled).
+  AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);}
+  AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);}
+  AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);}
+  AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolU1ToF1(AU1 x){return AF1(x);}
+  AF2 AZolU2ToF2(AU2 x){return AF2(x);}
+  AF3 AZolU3ToF3(AU3 x){return AF3(x);}
+  AF4 AZolU4ToF4(AU4 x){return AF4(x);}
+//==============================================================================================================================
+  AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);}
+  AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);}
+  AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);}
+  AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);}
+  AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);}
+  AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);}
+  AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);}
+  AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);}
+  AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);}
+  AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));}
+  AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));}
+  AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));}
+  AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;}
+  AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;}
+  AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;}
+  AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);}
+  AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);}
+  AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);}
+  AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;}
+  AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;}
+  AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;}
+  AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));}
+  AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));}
+  AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));}
+  AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));}
+  AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));}
+  AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));}
+  AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      COLOR CONVERSIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are all linear to/from some other space (where 'linear' has been shortened out of the function name).
+// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'.
+// These are branch free implementations.
+// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion.
+//------------------------------------------------------------------------------------------------------------------------------
+// TRANSFER FUNCTIONS
+// ==================
+// 709 ..... Rec709 used for some HDTVs
+// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native
+// Pq ...... PQ native for HDR10
+// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type
+// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations)
+// Three ... Gamma 3.0, less fast, but good for HDR.
+//------------------------------------------------------------------------------------------------------------------------------
+// KEEPING TO SPEC
+// ===============
+// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+//  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+//  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+// Also there is a slight step in the transition regions.
+// Precision of the coefficients in the spec being the likely cause.
+// Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store.
+// This is to work around lack of hardware (typically only ROP does the conversion for free).
+// To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free).
+// So this header keeps with the spec.
+// For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear.
+// Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear.
+//------------------------------------------------------------------------------------------------------------------------------
+// FOR PQ
+// ======
+// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2.
+// All constants are only specified to FP32 precision.
+// External PQ source reference,
+//  - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl
+//------------------------------------------------------------------------------------------------------------------------------
+// PACKED VERSIONS
+// ===============
+// These are the A*H2() functions.
+// There is no PQ functions as FP16 seemed to not have enough precision for the conversion.
+// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors.
+// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least).
+//------------------------------------------------------------------------------------------------------------------------------
+// NOTES
+// =====
+// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case.
+//==============================================================================================================================
+ #if 1
+  AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+   return clamp(j.x  ,c*j.y  ,pow(c,j.z  )*k.x  +k.y  );}
+  AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+   return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+  AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+   return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma().
+  AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));}
+  AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));}
+  AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302));
+   return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));}
+  AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302));
+   return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));}
+  AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302));
+   return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+   return clamp(j.x  ,c*j.y  ,pow(c,j.z  )*k.x  +k.y  );}
+  AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+   return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+  AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+   return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToTwoF1(AF1 c){return sqrt(c);}
+  AF2 AToTwoF2(AF2 c){return sqrt(c);}
+  AF3 AToTwoF3(AF3 c){return sqrt(c);}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));}
+  AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));}
+  AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));}
+ #endif
+//==============================================================================================================================
+ #if 1
+  // Unfortunately median won't work here.
+  AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+   return AZolSelF1(AZolSignedF1(c-j.x  ),c*j.y  ,pow(c*k.x  +k.y  ,j.z  ));}
+  AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+   return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+  AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+   return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));}
+  AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));}
+  AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833));
+   return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));}
+  AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833));
+   return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));}
+  AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833));
+   return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Unfortunately median won't work here.
+  AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+   return AZolSelF1(AZolSignedF1(c-j.x  ),c*j.y  ,pow(c*k.x  +k.y  ,j.z  ));}
+  AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+   return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+  AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+   return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromTwoF1(AF1 c){return c*c;}
+  AF2 AFromTwoF2(AF2 c){return c*c;}
+  AF3 AFromTwoF3(AF3 c){return c*c;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AF1 AFromThreeF1(AF1 c){return c*c*c;}
+  AF2 AFromThreeF2(AF2 c){return c*c*c;}
+  AF3 AFromThreeF3(AF3 c){return c*c*c;}
+ #endif
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                          CS REMAP
+//==============================================================================================================================
+ // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear.
+ //  543210
+ //  ======
+ //  ..xxx.
+ //  yy...y
+ AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));}
+//==============================================================================================================================
+ // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions.
+ //  543210
+ //  ======
+ //  .xx..x
+ //  y..yy.
+ // Details,
+ //  LANE TO 8x8 MAPPING
+ //  ===================
+ //  00 01 08 09 10 11 18 19
+ //  02 03 0a 0b 12 13 1a 1b
+ //  04 05 0c 0d 14 15 1c 1d
+ //  06 07 0e 0f 16 17 1e 1f
+ //  20 21 28 29 30 31 38 39
+ //  22 23 2a 2b 32 33 3a 3b
+ //  24 25 2c 2d 34 35 3c 3d
+ //  26 27 2e 2f 36 37 3e 3f
+ AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));}
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                                          REFERENCE
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// IEEE FLOAT RULES
+// ================
+//  - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1
+//  - {+/-}0 * {+/-}INF = NaN
+//  - -INF + (+INF) = NaN
+//  - {+/-}0 / {+/-}0 = NaN
+//  - {+/-}INF / {+/-}INF = NaN
+//  - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN)
+//  - 0 == -0
+//  - 4/0 = +INF
+//  - 4/-0 = -INF
+//  - 4+INF = +INF
+//  - 4-INF = -INF
+//  - 4*(+INF) = +INF
+//  - 4*(-INF) = -INF
+//  - -4*(+INF) = -INF
+//  - sqrt(+INF) = +INF
+//------------------------------------------------------------------------------------------------------------------------------
+// FP16 ENCODING
+// =============
+// fedcba9876543210
+// ----------------
+// ......mmmmmmmmmm  10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals)
+// .eeeee..........  5-bit exponent
+// .00000..........  denormals
+// .00001..........  -14 exponent
+// .11110..........   15 exponent
+// .111110000000000  infinity
+// .11111nnnnnnnnnn  NaN with n!=0
+// s...............  sign
+//------------------------------------------------------------------------------------------------------------------------------
+// FP16/INT16 ALIASING DENORMAL
+// ============================
+// 11-bit unsigned integers alias with half float denormal/normal values,
+//     1 = 2^(-24) = 1/16777216 ....................... first denormal value
+//     2 = 2^(-23)
+//   ...
+//  1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value
+//  1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers
+//  2047 .............................................. last normal value that still maps to integers
+// Scaling limits,
+//  2^15 = 32768 ...................................... largest power of 2 scaling
+// Largest pow2 conversion mapping is at *32768,
+//     1 : 2^(-9) = 1/512
+//     2 : 1/256
+//     4 : 1/128
+//     8 : 1/64
+//    16 : 1/32
+//    32 : 1/16
+//    64 : 1/8
+//   128 : 1/4
+//   256 : 1/2
+//   512 : 1
+//  1024 : 2
+//  2047 : a little less than 4
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+//                                                     GPU/CPU PORTABILITY
+//
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This is the GPU implementation.
+// See the CPU implementation for docs.
+//==============================================================================================================================
+#ifdef A_GPU
+ #define A_TRUE true
+ #define A_FALSE false
+ #define A_STATIC
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                     VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY
+//==============================================================================================================================
+ #define retAD2 AD2
+ #define retAD3 AD3
+ #define retAD4 AD4
+ #define retAF2 AF2
+ #define retAF3 AF3
+ #define retAF4 AF4
+ #define retAL2 AL2
+ #define retAL3 AL3
+ #define retAL4 AL4
+ #define retAU2 AU2
+ #define retAU3 AU3
+ #define retAU4 AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inAD2 in AD2
+ #define inAD3 in AD3
+ #define inAD4 in AD4
+ #define inAF2 in AF2
+ #define inAF3 in AF3
+ #define inAF4 in AF4
+ #define inAL2 in AL2
+ #define inAL3 in AL3
+ #define inAL4 in AL4
+ #define inAU2 in AU2
+ #define inAU3 in AU3
+ #define inAU4 in AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inoutAD2 inout AD2
+ #define inoutAD3 inout AD3
+ #define inoutAD4 inout AD4
+ #define inoutAF2 inout AF2
+ #define inoutAF3 inout AF3
+ #define inoutAF4 inout AF4
+ #define inoutAL2 inout AL2
+ #define inoutAL3 inout AL3
+ #define inoutAL4 inout AL4
+ #define inoutAU2 inout AU2
+ #define inoutAU3 inout AU3
+ #define inoutAU4 inout AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define outAD2 out AD2
+ #define outAD3 out AD3
+ #define outAD4 out AD4
+ #define outAF2 out AF2
+ #define outAF3 out AF3
+ #define outAF4 out AF4
+ #define outAL2 out AL2
+ #define outAL3 out AL3
+ #define outAL4 out AL4
+ #define outAU2 out AU2
+ #define outAU3 out AU3
+ #define outAU4 out AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define varAD2(x) AD2 x
+ #define varAD3(x) AD3 x
+ #define varAD4(x) AD4 x
+ #define varAF2(x) AF2 x
+ #define varAF3(x) AF3 x
+ #define varAF4(x) AF4 x
+ #define varAL2(x) AL2 x
+ #define varAL3(x) AL3 x
+ #define varAL4(x) AL4 x
+ #define varAU2(x) AU2 x
+ #define varAU3(x) AU3 x
+ #define varAU4(x) AU4 x
+//------------------------------------------------------------------------------------------------------------------------------
+ #define initAD2(x,y) AD2(x,y)
+ #define initAD3(x,y,z) AD3(x,y,z)
+ #define initAD4(x,y,z,w) AD4(x,y,z,w)
+ #define initAF2(x,y) AF2(x,y)
+ #define initAF3(x,y,z) AF3(x,y,z)
+ #define initAF4(x,y,z,w) AF4(x,y,z,w)
+ #define initAL2(x,y) AL2(x,y)
+ #define initAL3(x,y,z) AL3(x,y,z)
+ #define initAL4(x,y,z,w) AL4(x,y,z,w)
+ #define initAU2(x,y) AU2(x,y)
+ #define initAU3(x,y,z) AU3(x,y,z)
+ #define initAU4(x,y,z,w) AU4(x,y,z,w)
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     SCALAR RETURN OPS
+//==============================================================================================================================
+ #define AAbsD1(a) abs(AD1(a))
+ #define AAbsF1(a) abs(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ACosD1(a) cos(AD1(a))
+ #define ACosF1(a) cos(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ADotD2(a,b) dot(AD2(a),AD2(b))
+ #define ADotD3(a,b) dot(AD3(a),AD3(b))
+ #define ADotD4(a,b) dot(AD4(a),AD4(b))
+ #define ADotF2(a,b) dot(AF2(a),AF2(b))
+ #define ADotF3(a,b) dot(AF3(a),AF3(b))
+ #define ADotF4(a,b) dot(AF4(a),AF4(b))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AExp2D1(a) exp2(AD1(a))
+ #define AExp2F1(a) exp2(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AFloorD1(a) floor(AD1(a))
+ #define AFloorF1(a) floor(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ALog2D1(a) log2(AD1(a))
+ #define ALog2F1(a) log2(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AMaxD1(a,b) max(a,b)
+ #define AMaxF1(a,b) max(a,b)
+ #define AMaxL1(a,b) max(a,b)
+ #define AMaxU1(a,b) max(a,b)
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AMinD1(a,b) min(a,b)
+ #define AMinF1(a,b) min(a,b)
+ #define AMinL1(a,b) min(a,b)
+ #define AMinU1(a,b) min(a,b)
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASinD1(a) sin(AD1(a))
+ #define ASinF1(a) sin(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASqrtD1(a) sqrt(AD1(a))
+ #define ASqrtF1(a) sqrt(AF1(a))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                               SCALAR RETURN OPS - DEPENDENT
+//==============================================================================================================================
+ #define APowD1(a,b) pow(AD1(a),AF1(b))
+ #define APowF1(a,b) pow(AF1(a),AF1(b))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                         VECTOR OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are added as needed for production or prototyping, so not necessarily a complete set.
+// They follow a convention of taking in a destination and also returning the destination value to increase utility.
+//==============================================================================================================================
+ #ifdef A_DUBL
+  AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;}
+  AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;}
+  AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;}
+  AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;}
+  AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;}
+  AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;}
+  AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;}
+  AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;}
+  AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;}
+  AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;}
+  AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;}
+  AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;}
+  AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;}
+  AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;}
+  AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;}
+  AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;}
+  AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;}
+  AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;}
+  AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;}
+  AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;}
+  AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;}
+  AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;}
+  AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+  AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;}
+  AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;}
+  AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;}
+ #endif
+//==============================================================================================================================
+ AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;}
+ AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;}
+ AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;}
+ AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;}
+ AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;}
+ AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;}
+ AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;}
+ AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;}
+ AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;}
+ AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;}
+ AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;}
+ AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;}
+ AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;}
+ AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;}
+ AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;}
+ AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;}
+ AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;}
+ AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;}
+ AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;}
+ AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;}
+ AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;}
+ AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;}
+ AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;}
+ AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;}
+ AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;}
+#endif
+
+#endif
+
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                 [CAS] FIDELITY FX - CONSTRAST ADAPTIVE SHARPENING 1.20190610
+//
+//==============================================================================================================================
+// LICENSE
+// =======
+// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved.
+// -------
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+// -------
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+// Software.
+// -------
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+#define CAS_AREA_LIMIT 4.0
+//------------------------------------------------------------------------------------------------------------------------------
+// Pass in output and input resolution in pixels.
+// This returns true if CAS supports scaling in the given configuration.
+AP1 CasSupportScaling(AF1 outX,AF1 outY,AF1 inX,AF1 inY){return ((outX*outY)*ARcpF1(inX*inY))<=CAS_AREA_LIMIT;}
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+#ifndef A_GPU
+A_STATIC void CasSetup(
+ outAU4 const0,
+ outAU4 const1,
+ AF1 sharpness, // 0 := default (lower ringing), 1 := maximum (higest ringing)
+ AF1 inputSizeInPixelsX,
+ AF1 inputSizeInPixelsY,
+ AF1 outputSizeInPixelsX,
+ AF1 outputSizeInPixelsY){
+  // Scaling terms.
+  const0[0]=AU1_AF1(inputSizeInPixelsX*ARcpF1(outputSizeInPixelsX));
+  const0[1]=AU1_AF1(inputSizeInPixelsY*ARcpF1(outputSizeInPixelsY));
+  const0[2]=AU1_AF1(AF1_(0.5)*inputSizeInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5));
+  const0[3]=AU1_AF1(AF1_(0.5)*inputSizeInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5));
+  // Sharpness value.
+  AF1 sharp=-ARcpF1(ALerpF1(8.0,5.0,ASatF1(sharpness)));
+  varAF2(hSharp)=initAF2(sharp,0.0);
+  const1[0]=AU1_AF1(sharp);
+  const1[1]=AU1_AH2_AF2(hSharp);
+  const1[2]=AU1_AF1(AF1_(8.0)*inputSizeInPixelsX*ARcpF1(outputSizeInPixelsX));
+  const1[3]=0;}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     NON-PACKED VERSION
+//==============================================================================================================================
+#ifdef A_GPU
+ AF3 CasLoad(ASU2 p) { return texelFetch(diffuseRect, p, 0).rgb; }
+ void CasInput(inout AF1 r,inout AF1 g,inout AF1 b)
+ {
+    r = AFromSrgbF1(r);
+    g = AFromSrgbF1(g);
+    b = AFromSrgbF1(b);
+ }
+
+//------------------------------------------------------------------------------------------------------------------------------
+ void CasFilter(
+ out AF1 pixR, // Output values, non-vector so port between CasFilter() and CasFilterH() is easy.
+ out AF1 pixG,
+ out AF1 pixB,
+ AU2 ip, // Integer pixel position in output.
+ AU4 const0, // Constants generated by CasSetup().
+ AU4 const1,
+ AP1 noScaling){ // Must be a compile-time literal value, true = sharpen only (no resize).
+//------------------------------------------------------------------------------------------------------------------------------
+  // Debug a checker pattern of on/off tiles for visual inspection.
+  #ifdef CAS_DEBUG_CHECKER
+   if((((ip.x^ip.y)>>8u)&1u)==0u){AF3 pix0=CasLoad(ASU2(ip));
+    pixR=pix0.r;pixG=pix0.g;pixB=pix0.b;CasInput(pixR,pixG,pixB);return;}
+  #endif
+//------------------------------------------------------------------------------------------------------------------------------
+  // No scaling algorithm uses minimal 3x3 pixel neighborhood.
+  if(noScaling){
+   // a b c
+   // d e f
+   // g h i
+   ASU2 sp=ASU2(ip);
+   AF3 a=CasLoad(sp+ASU2(-1,-1));
+   AF3 b=CasLoad(sp+ASU2( 0,-1));
+   AF3 c=CasLoad(sp+ASU2( 1,-1));
+   AF3 d=CasLoad(sp+ASU2(-1, 0));
+   AF3 e=CasLoad(sp);
+   AF3 f=CasLoad(sp+ASU2( 1, 0));
+   AF3 g=CasLoad(sp+ASU2(-1, 1));
+   AF3 h=CasLoad(sp+ASU2( 0, 1));
+   AF3 i=CasLoad(sp+ASU2( 1, 1));
+   // Run optional input transform.
+   CasInput(a.r,a.g,a.b);
+   CasInput(b.r,b.g,b.b);
+   CasInput(c.r,c.g,c.b);
+   CasInput(d.r,d.g,d.b);
+   CasInput(e.r,e.g,e.b);
+   CasInput(f.r,f.g,f.b);
+   CasInput(g.r,g.g,g.b);
+   CasInput(h.r,h.g,h.b);
+   CasInput(i.r,i.g,i.b);
+   // Soft min and max.
+   //  a b c             b
+   //  d e f * 0.5  +  d e f * 0.5
+   //  g h i             h
+   // These are 2.0x bigger (factored out the extra multiply).
+   AF1 mnR=AMin3F1(AMin3F1(d.r,e.r,f.r),b.r,h.r);
+   AF1 mnG=AMin3F1(AMin3F1(d.g,e.g,f.g),b.g,h.g);
+   AF1 mnB=AMin3F1(AMin3F1(d.b,e.b,f.b),b.b,h.b);
+   #ifdef CAS_BETTER_DIAGONALS
+    AF1 mnR2=AMin3F1(AMin3F1(mnR,a.r,c.r),g.r,i.r);
+    AF1 mnG2=AMin3F1(AMin3F1(mnG,a.g,c.g),g.g,i.g);
+    AF1 mnB2=AMin3F1(AMin3F1(mnB,a.b,c.b),g.b,i.b);
+    mnR=mnR+mnR2;
+    mnG=mnG+mnG2;
+    mnB=mnB+mnB2;
+   #endif
+   AF1 mxR=AMax3F1(AMax3F1(d.r,e.r,f.r),b.r,h.r);
+   AF1 mxG=AMax3F1(AMax3F1(d.g,e.g,f.g),b.g,h.g);
+   AF1 mxB=AMax3F1(AMax3F1(d.b,e.b,f.b),b.b,h.b);
+   #ifdef CAS_BETTER_DIAGONALS
+    AF1 mxR2=AMax3F1(AMax3F1(mxR,a.r,c.r),g.r,i.r);
+    AF1 mxG2=AMax3F1(AMax3F1(mxG,a.g,c.g),g.g,i.g);
+    AF1 mxB2=AMax3F1(AMax3F1(mxB,a.b,c.b),g.b,i.b);
+    mxR=mxR+mxR2;
+    mxG=mxG+mxG2;
+    mxB=mxB+mxB2;
+   #endif
+   // Smooth minimum distance to signal limit divided by smooth max.
+   #ifdef CAS_GO_SLOWER
+    AF1 rcpMR=ARcpF1(mxR);
+    AF1 rcpMG=ARcpF1(mxG);
+    AF1 rcpMB=ARcpF1(mxB);
+   #else
+    AF1 rcpMR=APrxLoRcpF1(mxR);
+    AF1 rcpMG=APrxLoRcpF1(mxG);
+    AF1 rcpMB=APrxLoRcpF1(mxB);
+   #endif
+   #ifdef CAS_BETTER_DIAGONALS
+    AF1 ampR=ASatF1(min(mnR,AF1_(2.0)-mxR)*rcpMR);
+    AF1 ampG=ASatF1(min(mnG,AF1_(2.0)-mxG)*rcpMG);
+    AF1 ampB=ASatF1(min(mnB,AF1_(2.0)-mxB)*rcpMB);
+   #else
+    AF1 ampR=ASatF1(min(mnR,AF1_(1.0)-mxR)*rcpMR);
+    AF1 ampG=ASatF1(min(mnG,AF1_(1.0)-mxG)*rcpMG);
+    AF1 ampB=ASatF1(min(mnB,AF1_(1.0)-mxB)*rcpMB);
+   #endif
+   // Shaping amount of sharpening.
+   #ifdef CAS_GO_SLOWER
+    ampR=sqrt(ampR);
+    ampG=sqrt(ampG);
+    ampB=sqrt(ampB);
+   #else
+    ampR=APrxLoSqrtF1(ampR);
+    ampG=APrxLoSqrtF1(ampG);
+    ampB=APrxLoSqrtF1(ampB);
+   #endif
+   // Filter shape.
+   //  0 w 0
+   //  w 1 w
+   //  0 w 0
+   AF1 peak=AF1_AU1(const1.x);
+   AF1 wR=ampR*peak;
+   AF1 wG=ampG*peak;
+   AF1 wB=ampB*peak;
+   // Filter.
+   #ifndef CAS_SLOW
+    // Using green coef only, depending on dead code removal to strip out the extra overhead.
+    #ifdef CAS_GO_SLOWER
+     AF1 rcpWeight=ARcpF1(AF1_(1.0)+AF1_(4.0)*wG);
+    #else
+     AF1 rcpWeight=APrxMedRcpF1(AF1_(1.0)+AF1_(4.0)*wG);
+    #endif
+    pixR=ASatF1((b.r*wG+d.r*wG+f.r*wG+h.r*wG+e.r)*rcpWeight);
+    pixG=ASatF1((b.g*wG+d.g*wG+f.g*wG+h.g*wG+e.g)*rcpWeight);
+    pixB=ASatF1((b.b*wG+d.b*wG+f.b*wG+h.b*wG+e.b)*rcpWeight);
+   #else
+    #ifdef CAS_GO_SLOWER
+     AF1 rcpWeightR=ARcpF1(AF1_(1.0)+AF1_(4.0)*wR);
+     AF1 rcpWeightG=ARcpF1(AF1_(1.0)+AF1_(4.0)*wG);
+     AF1 rcpWeightB=ARcpF1(AF1_(1.0)+AF1_(4.0)*wB);
+    #else
+     AF1 rcpWeightR=APrxMedRcpF1(AF1_(1.0)+AF1_(4.0)*wR);
+     AF1 rcpWeightG=APrxMedRcpF1(AF1_(1.0)+AF1_(4.0)*wG);
+     AF1 rcpWeightB=APrxMedRcpF1(AF1_(1.0)+AF1_(4.0)*wB);
+    #endif
+    pixR=ASatF1((b.r*wR+d.r*wR+f.r*wR+h.r*wR+e.r)*rcpWeightR);
+    pixG=ASatF1((b.g*wG+d.g*wG+f.g*wG+h.g*wG+e.g)*rcpWeightG);
+    pixB=ASatF1((b.b*wB+d.b*wB+f.b*wB+h.b*wB+e.b)*rcpWeightB);
+   #endif
+   return;}
+//------------------------------------------------------------------------------------------------------------------------------
+  // Scaling algorithm adaptively interpolates between nearest 4 results of the non-scaling algorithm.
+  //  a b c d
+  //  e f g h
+  //  i j k l
+  //  m n o p
+  // Working these 4 results.
+  //  +-----+-----+
+  //  |     |     |
+  //  |  f..|..g  |
+  //  |  .  |  .  |
+  //  +-----+-----+
+  //  |  .  |  .  |
+  //  |  j..|..k  |
+  //  |     |     |
+  //  +-----+-----+
+  AF2 pp=AF2(ip)*AF2_AU2(const0.xy)+AF2_AU2(const0.zw);
+  AF2 fp=floor(pp);
+  pp-=fp;
+  ASU2 sp=ASU2(fp);
+  AF3 a=CasLoad(sp+ASU2(-1,-1));
+  AF3 b=CasLoad(sp+ASU2( 0,-1));
+  AF3 e=CasLoad(sp+ASU2(-1, 0));
+  AF3 f=CasLoad(sp);
+  AF3 c=CasLoad(sp+ASU2( 1,-1));
+  AF3 d=CasLoad(sp+ASU2( 2,-1));
+  AF3 g=CasLoad(sp+ASU2( 1, 0));
+  AF3 h=CasLoad(sp+ASU2( 2, 0));
+  AF3 i=CasLoad(sp+ASU2(-1, 1));
+  AF3 j=CasLoad(sp+ASU2( 0, 1));
+  AF3 m=CasLoad(sp+ASU2(-1, 2));
+  AF3 n=CasLoad(sp+ASU2( 0, 2));
+  AF3 k=CasLoad(sp+ASU2( 1, 1));
+  AF3 l=CasLoad(sp+ASU2( 2, 1));
+  AF3 o=CasLoad(sp+ASU2( 1, 2));
+  AF3 p=CasLoad(sp+ASU2( 2, 2));
+  // Run optional input transform.
+  CasInput(a.r,a.g,a.b);
+  CasInput(b.r,b.g,b.b);
+  CasInput(c.r,c.g,c.b);
+  CasInput(d.r,d.g,d.b);
+  CasInput(e.r,e.g,e.b);
+  CasInput(f.r,f.g,f.b);
+  CasInput(g.r,g.g,g.b);
+  CasInput(h.r,h.g,h.b);
+  CasInput(i.r,i.g,i.b);
+  CasInput(j.r,j.g,j.b);
+  CasInput(k.r,k.g,k.b);
+  CasInput(l.r,l.g,l.b);
+  CasInput(m.r,m.g,m.b);
+  CasInput(n.r,n.g,n.b);
+  CasInput(o.r,o.g,o.b);
+  CasInput(p.r,p.g,p.b);
+  // Soft min and max.
+  // These are 2.0x bigger (factored out the extra multiply).
+  //  a b c             b
+  //  e f g * 0.5  +  e f g * 0.5  [F]
+  //  i j k             j
+  AF1 mnfR=AMin3F1(AMin3F1(b.r,e.r,f.r),g.r,j.r);
+  AF1 mnfG=AMin3F1(AMin3F1(b.g,e.g,f.g),g.g,j.g);
+  AF1 mnfB=AMin3F1(AMin3F1(b.b,e.b,f.b),g.b,j.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mnfR2=AMin3F1(AMin3F1(mnfR,a.r,c.r),i.r,k.r);
+   AF1 mnfG2=AMin3F1(AMin3F1(mnfG,a.g,c.g),i.g,k.g);
+   AF1 mnfB2=AMin3F1(AMin3F1(mnfB,a.b,c.b),i.b,k.b);
+   mnfR=mnfR+mnfR2;
+   mnfG=mnfG+mnfG2;
+   mnfB=mnfB+mnfB2;
+  #endif
+  AF1 mxfR=AMax3F1(AMax3F1(b.r,e.r,f.r),g.r,j.r);
+  AF1 mxfG=AMax3F1(AMax3F1(b.g,e.g,f.g),g.g,j.g);
+  AF1 mxfB=AMax3F1(AMax3F1(b.b,e.b,f.b),g.b,j.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mxfR2=AMax3F1(AMax3F1(mxfR,a.r,c.r),i.r,k.r);
+   AF1 mxfG2=AMax3F1(AMax3F1(mxfG,a.g,c.g),i.g,k.g);
+   AF1 mxfB2=AMax3F1(AMax3F1(mxfB,a.b,c.b),i.b,k.b);
+   mxfR=mxfR+mxfR2;
+   mxfG=mxfG+mxfG2;
+   mxfB=mxfB+mxfB2;
+  #endif
+  //  b c d             c
+  //  f g h * 0.5  +  f g h * 0.5  [G]
+  //  j k l             k
+  AF1 mngR=AMin3F1(AMin3F1(c.r,f.r,g.r),h.r,k.r);
+  AF1 mngG=AMin3F1(AMin3F1(c.g,f.g,g.g),h.g,k.g);
+  AF1 mngB=AMin3F1(AMin3F1(c.b,f.b,g.b),h.b,k.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mngR2=AMin3F1(AMin3F1(mngR,b.r,d.r),j.r,l.r);
+   AF1 mngG2=AMin3F1(AMin3F1(mngG,b.g,d.g),j.g,l.g);
+   AF1 mngB2=AMin3F1(AMin3F1(mngB,b.b,d.b),j.b,l.b);
+   mngR=mngR+mngR2;
+   mngG=mngG+mngG2;
+   mngB=mngB+mngB2;
+  #endif
+  AF1 mxgR=AMax3F1(AMax3F1(c.r,f.r,g.r),h.r,k.r);
+  AF1 mxgG=AMax3F1(AMax3F1(c.g,f.g,g.g),h.g,k.g);
+  AF1 mxgB=AMax3F1(AMax3F1(c.b,f.b,g.b),h.b,k.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mxgR2=AMax3F1(AMax3F1(mxgR,b.r,d.r),j.r,l.r);
+   AF1 mxgG2=AMax3F1(AMax3F1(mxgG,b.g,d.g),j.g,l.g);
+   AF1 mxgB2=AMax3F1(AMax3F1(mxgB,b.b,d.b),j.b,l.b);
+   mxgR=mxgR+mxgR2;
+   mxgG=mxgG+mxgG2;
+   mxgB=mxgB+mxgB2;
+  #endif
+  //  e f g             f
+  //  i j k * 0.5  +  i j k * 0.5  [J]
+  //  m n o             n
+  AF1 mnjR=AMin3F1(AMin3F1(f.r,i.r,j.r),k.r,n.r);
+  AF1 mnjG=AMin3F1(AMin3F1(f.g,i.g,j.g),k.g,n.g);
+  AF1 mnjB=AMin3F1(AMin3F1(f.b,i.b,j.b),k.b,n.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mnjR2=AMin3F1(AMin3F1(mnjR,e.r,g.r),m.r,o.r);
+   AF1 mnjG2=AMin3F1(AMin3F1(mnjG,e.g,g.g),m.g,o.g);
+   AF1 mnjB2=AMin3F1(AMin3F1(mnjB,e.b,g.b),m.b,o.b);
+   mnjR=mnjR+mnjR2;
+   mnjG=mnjG+mnjG2;
+   mnjB=mnjB+mnjB2;
+  #endif
+  AF1 mxjR=AMax3F1(AMax3F1(f.r,i.r,j.r),k.r,n.r);
+  AF1 mxjG=AMax3F1(AMax3F1(f.g,i.g,j.g),k.g,n.g);
+  AF1 mxjB=AMax3F1(AMax3F1(f.b,i.b,j.b),k.b,n.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mxjR2=AMax3F1(AMax3F1(mxjR,e.r,g.r),m.r,o.r);
+   AF1 mxjG2=AMax3F1(AMax3F1(mxjG,e.g,g.g),m.g,o.g);
+   AF1 mxjB2=AMax3F1(AMax3F1(mxjB,e.b,g.b),m.b,o.b);
+   mxjR=mxjR+mxjR2;
+   mxjG=mxjG+mxjG2;
+   mxjB=mxjB+mxjB2;
+  #endif
+  //  f g h             g
+  //  j k l * 0.5  +  j k l * 0.5  [K]
+  //  n o p             o
+  AF1 mnkR=AMin3F1(AMin3F1(g.r,j.r,k.r),l.r,o.r);
+  AF1 mnkG=AMin3F1(AMin3F1(g.g,j.g,k.g),l.g,o.g);
+  AF1 mnkB=AMin3F1(AMin3F1(g.b,j.b,k.b),l.b,o.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mnkR2=AMin3F1(AMin3F1(mnkR,f.r,h.r),n.r,p.r);
+   AF1 mnkG2=AMin3F1(AMin3F1(mnkG,f.g,h.g),n.g,p.g);
+   AF1 mnkB2=AMin3F1(AMin3F1(mnkB,f.b,h.b),n.b,p.b);
+   mnkR=mnkR+mnkR2;
+   mnkG=mnkG+mnkG2;
+   mnkB=mnkB+mnkB2;
+  #endif
+  AF1 mxkR=AMax3F1(AMax3F1(g.r,j.r,k.r),l.r,o.r);
+  AF1 mxkG=AMax3F1(AMax3F1(g.g,j.g,k.g),l.g,o.g);
+  AF1 mxkB=AMax3F1(AMax3F1(g.b,j.b,k.b),l.b,o.b);
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 mxkR2=AMax3F1(AMax3F1(mxkR,f.r,h.r),n.r,p.r);
+   AF1 mxkG2=AMax3F1(AMax3F1(mxkG,f.g,h.g),n.g,p.g);
+   AF1 mxkB2=AMax3F1(AMax3F1(mxkB,f.b,h.b),n.b,p.b);
+   mxkR=mxkR+mxkR2;
+   mxkG=mxkG+mxkG2;
+   mxkB=mxkB+mxkB2;
+  #endif
+  // Smooth minimum distance to signal limit divided by smooth max.
+  #ifdef CAS_GO_SLOWER
+   AF1 rcpMfR=ARcpF1(mxfR);
+   AF1 rcpMfG=ARcpF1(mxfG);
+   AF1 rcpMfB=ARcpF1(mxfB);
+   AF1 rcpMgR=ARcpF1(mxgR);
+   AF1 rcpMgG=ARcpF1(mxgG);
+   AF1 rcpMgB=ARcpF1(mxgB);
+   AF1 rcpMjR=ARcpF1(mxjR);
+   AF1 rcpMjG=ARcpF1(mxjG);
+   AF1 rcpMjB=ARcpF1(mxjB);
+   AF1 rcpMkR=ARcpF1(mxkR);
+   AF1 rcpMkG=ARcpF1(mxkG);
+   AF1 rcpMkB=ARcpF1(mxkB);
+  #else
+   AF1 rcpMfR=APrxLoRcpF1(mxfR);
+   AF1 rcpMfG=APrxLoRcpF1(mxfG);
+   AF1 rcpMfB=APrxLoRcpF1(mxfB);
+   AF1 rcpMgR=APrxLoRcpF1(mxgR);
+   AF1 rcpMgG=APrxLoRcpF1(mxgG);
+   AF1 rcpMgB=APrxLoRcpF1(mxgB);
+   AF1 rcpMjR=APrxLoRcpF1(mxjR);
+   AF1 rcpMjG=APrxLoRcpF1(mxjG);
+   AF1 rcpMjB=APrxLoRcpF1(mxjB);
+   AF1 rcpMkR=APrxLoRcpF1(mxkR);
+   AF1 rcpMkG=APrxLoRcpF1(mxkG);
+   AF1 rcpMkB=APrxLoRcpF1(mxkB);
+  #endif
+  #ifdef CAS_BETTER_DIAGONALS
+   AF1 ampfR=ASatF1(min(mnfR,AF1_(2.0)-mxfR)*rcpMfR);
+   AF1 ampfG=ASatF1(min(mnfG,AF1_(2.0)-mxfG)*rcpMfG);
+   AF1 ampfB=ASatF1(min(mnfB,AF1_(2.0)-mxfB)*rcpMfB);
+   AF1 ampgR=ASatF1(min(mngR,AF1_(2.0)-mxgR)*rcpMgR);
+   AF1 ampgG=ASatF1(min(mngG,AF1_(2.0)-mxgG)*rcpMgG);
+   AF1 ampgB=ASatF1(min(mngB,AF1_(2.0)-mxgB)*rcpMgB);
+   AF1 ampjR=ASatF1(min(mnjR,AF1_(2.0)-mxjR)*rcpMjR);
+   AF1 ampjG=ASatF1(min(mnjG,AF1_(2.0)-mxjG)*rcpMjG);
+   AF1 ampjB=ASatF1(min(mnjB,AF1_(2.0)-mxjB)*rcpMjB);
+   AF1 ampkR=ASatF1(min(mnkR,AF1_(2.0)-mxkR)*rcpMkR);
+   AF1 ampkG=ASatF1(min(mnkG,AF1_(2.0)-mxkG)*rcpMkG);
+   AF1 ampkB=ASatF1(min(mnkB,AF1_(2.0)-mxkB)*rcpMkB);
+  #else
+   AF1 ampfR=ASatF1(min(mnfR,AF1_(1.0)-mxfR)*rcpMfR);
+   AF1 ampfG=ASatF1(min(mnfG,AF1_(1.0)-mxfG)*rcpMfG);
+   AF1 ampfB=ASatF1(min(mnfB,AF1_(1.0)-mxfB)*rcpMfB);
+   AF1 ampgR=ASatF1(min(mngR,AF1_(1.0)-mxgR)*rcpMgR);
+   AF1 ampgG=ASatF1(min(mngG,AF1_(1.0)-mxgG)*rcpMgG);
+   AF1 ampgB=ASatF1(min(mngB,AF1_(1.0)-mxgB)*rcpMgB);
+   AF1 ampjR=ASatF1(min(mnjR,AF1_(1.0)-mxjR)*rcpMjR);
+   AF1 ampjG=ASatF1(min(mnjG,AF1_(1.0)-mxjG)*rcpMjG);
+   AF1 ampjB=ASatF1(min(mnjB,AF1_(1.0)-mxjB)*rcpMjB);
+   AF1 ampkR=ASatF1(min(mnkR,AF1_(1.0)-mxkR)*rcpMkR);
+   AF1 ampkG=ASatF1(min(mnkG,AF1_(1.0)-mxkG)*rcpMkG);
+   AF1 ampkB=ASatF1(min(mnkB,AF1_(1.0)-mxkB)*rcpMkB);
+  #endif
+  // Shaping amount of sharpening.
+  #ifdef CAS_GO_SLOWER
+   ampfR=sqrt(ampfR);
+   ampfG=sqrt(ampfG);
+   ampfB=sqrt(ampfB);
+   ampgR=sqrt(ampgR);
+   ampgG=sqrt(ampgG);
+   ampgB=sqrt(ampgB);
+   ampjR=sqrt(ampjR);
+   ampjG=sqrt(ampjG);
+   ampjB=sqrt(ampjB);
+   ampkR=sqrt(ampkR);
+   ampkG=sqrt(ampkG);
+   ampkB=sqrt(ampkB);
+  #else
+   ampfR=APrxLoSqrtF1(ampfR);
+   ampfG=APrxLoSqrtF1(ampfG);
+   ampfB=APrxLoSqrtF1(ampfB);
+   ampgR=APrxLoSqrtF1(ampgR);
+   ampgG=APrxLoSqrtF1(ampgG);
+   ampgB=APrxLoSqrtF1(ampgB);
+   ampjR=APrxLoSqrtF1(ampjR);
+   ampjG=APrxLoSqrtF1(ampjG);
+   ampjB=APrxLoSqrtF1(ampjB);
+   ampkR=APrxLoSqrtF1(ampkR);
+   ampkG=APrxLoSqrtF1(ampkG);
+   ampkB=APrxLoSqrtF1(ampkB);
+  #endif
+  // Filter shape.
+  //  0 w 0
+  //  w 1 w
+  //  0 w 0
+  AF1 peak=AF1_AU1(const1.x);
+  AF1 wfR=ampfR*peak;
+  AF1 wfG=ampfG*peak;
+  AF1 wfB=ampfB*peak;
+  AF1 wgR=ampgR*peak;
+  AF1 wgG=ampgG*peak;
+  AF1 wgB=ampgB*peak;
+  AF1 wjR=ampjR*peak;
+  AF1 wjG=ampjG*peak;
+  AF1 wjB=ampjB*peak;
+  AF1 wkR=ampkR*peak;
+  AF1 wkG=ampkG*peak;
+  AF1 wkB=ampkB*peak;
+  // Blend between 4 results.
+  //  s t
+  //  u v
+  AF1 s=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y);
+  AF1 t=           pp.x *(AF1_(1.0)-pp.y);
+  AF1 u=(AF1_(1.0)-pp.x)*           pp.y ;
+  AF1 v=           pp.x *           pp.y ;
+  // Thin edges to hide bilinear interpolation (helps diagonals).
+  AF1 thinB=1.0/32.0;
+  #ifdef CAS_GO_SLOWER
+   s*=ARcpF1(thinB+(mxfG-mnfG));
+   t*=ARcpF1(thinB+(mxgG-mngG));
+   u*=ARcpF1(thinB+(mxjG-mnjG));
+   v*=ARcpF1(thinB+(mxkG-mnkG));
+  #else
+   s*=APrxLoRcpF1(thinB+(mxfG-mnfG));
+   t*=APrxLoRcpF1(thinB+(mxgG-mngG));
+   u*=APrxLoRcpF1(thinB+(mxjG-mnjG));
+   v*=APrxLoRcpF1(thinB+(mxkG-mnkG));
+  #endif
+  // Final weighting.
+  //    b c
+  //  e f g h
+  //  i j k l
+  //    n o
+  //  _____  _____  _____  _____
+  //         fs        gt
+  //
+  //  _____  _____  _____  _____
+  //  fs      s gt  fs  t     gt
+  //         ju        kv
+  //  _____  _____  _____  _____
+  //         fs        gt
+  //  ju      u kv  ju  v     kv
+  //  _____  _____  _____  _____
+  //
+  //         ju        kv
+  AF1 qbeR=wfR*s;
+  AF1 qbeG=wfG*s;
+  AF1 qbeB=wfB*s;
+  AF1 qchR=wgR*t;
+  AF1 qchG=wgG*t;
+  AF1 qchB=wgB*t;
+  AF1 qfR=wgR*t+wjR*u+s;
+  AF1 qfG=wgG*t+wjG*u+s;
+  AF1 qfB=wgB*t+wjB*u+s;
+  AF1 qgR=wfR*s+wkR*v+t;
+  AF1 qgG=wfG*s+wkG*v+t;
+  AF1 qgB=wfB*s+wkB*v+t;
+  AF1 qjR=wfR*s+wkR*v+u;
+  AF1 qjG=wfG*s+wkG*v+u;
+  AF1 qjB=wfB*s+wkB*v+u;
+  AF1 qkR=wgR*t+wjR*u+v;
+  AF1 qkG=wgG*t+wjG*u+v;
+  AF1 qkB=wgB*t+wjB*u+v;
+  AF1 qinR=wjR*u;
+  AF1 qinG=wjG*u;
+  AF1 qinB=wjB*u;
+  AF1 qloR=wkR*v;
+  AF1 qloG=wkG*v;
+  AF1 qloB=wkB*v;
+  // Filter.
+  #ifndef CAS_SLOW
+   // Using green coef only, depending on dead code removal to strip out the extra overhead.
+   #ifdef CAS_GO_SLOWER
+    AF1 rcpWG=ARcpF1(AF1_(2.0)*qbeG+AF1_(2.0)*qchG+AF1_(2.0)*qinG+AF1_(2.0)*qloG+qfG+qgG+qjG+qkG);
+   #else
+    AF1 rcpWG=APrxMedRcpF1(AF1_(2.0)*qbeG+AF1_(2.0)*qchG+AF1_(2.0)*qinG+AF1_(2.0)*qloG+qfG+qgG+qjG+qkG);
+   #endif
+   pixR=ASatF1((b.r*qbeG+e.r*qbeG+c.r*qchG+h.r*qchG+i.r*qinG+n.r*qinG+l.r*qloG+o.r*qloG+f.r*qfG+g.r*qgG+j.r*qjG+k.r*qkG)*rcpWG);
+   pixG=ASatF1((b.g*qbeG+e.g*qbeG+c.g*qchG+h.g*qchG+i.g*qinG+n.g*qinG+l.g*qloG+o.g*qloG+f.g*qfG+g.g*qgG+j.g*qjG+k.g*qkG)*rcpWG);
+   pixB=ASatF1((b.b*qbeG+e.b*qbeG+c.b*qchG+h.b*qchG+i.b*qinG+n.b*qinG+l.b*qloG+o.b*qloG+f.b*qfG+g.b*qgG+j.b*qjG+k.b*qkG)*rcpWG);
+  #else
+   #ifdef CAS_GO_SLOWER
+    AF1 rcpWR=ARcpF1(AF1_(2.0)*qbeR+AF1_(2.0)*qchR+AF1_(2.0)*qinR+AF1_(2.0)*qloR+qfR+qgR+qjR+qkR);
+    AF1 rcpWG=ARcpF1(AF1_(2.0)*qbeG+AF1_(2.0)*qchG+AF1_(2.0)*qinG+AF1_(2.0)*qloG+qfG+qgG+qjG+qkG);
+    AF1 rcpWB=ARcpF1(AF1_(2.0)*qbeB+AF1_(2.0)*qchB+AF1_(2.0)*qinB+AF1_(2.0)*qloB+qfB+qgB+qjB+qkB);
+   #else
+    AF1 rcpWR=APrxMedRcpF1(AF1_(2.0)*qbeR+AF1_(2.0)*qchR+AF1_(2.0)*qinR+AF1_(2.0)*qloR+qfR+qgR+qjR+qkR);
+    AF1 rcpWG=APrxMedRcpF1(AF1_(2.0)*qbeG+AF1_(2.0)*qchG+AF1_(2.0)*qinG+AF1_(2.0)*qloG+qfG+qgG+qjG+qkG);
+    AF1 rcpWB=APrxMedRcpF1(AF1_(2.0)*qbeB+AF1_(2.0)*qchB+AF1_(2.0)*qinB+AF1_(2.0)*qloB+qfB+qgB+qjB+qkB);
+   #endif
+   pixR=ASatF1((b.r*qbeR+e.r*qbeR+c.r*qchR+h.r*qchR+i.r*qinR+n.r*qinR+l.r*qloR+o.r*qloR+f.r*qfR+g.r*qgR+j.r*qjR+k.r*qkR)*rcpWR);
+   pixG=ASatF1((b.g*qbeG+e.g*qbeG+c.g*qchG+h.g*qchG+i.g*qinG+n.g*qinG+l.g*qloG+o.g*qloG+f.g*qfG+g.g*qgG+j.g*qjG+k.g*qkG)*rcpWG);
+   pixB=ASatF1((b.b*qbeB+e.b*qbeB+c.b*qchB+h.b*qchB+i.b*qinB+n.b*qinB+l.b*qloB+o.b*qloB+f.b*qfB+g.b*qgB+j.b*qjB+k.b*qkB)*rcpWB);
+  #endif
+ }
+#endif
+
+#ifdef A_GPU
+void main()
+{
+    vec4 diff = vec4(0.f);
+    uvec2 point = uvec2(vary_fragcoord * out_screen_res.xy);
+    CasFilter(diff.r, diff.g, diff.b, point, cas_param_0, cas_param_1, true);
+    frag_color = vec4(linear_to_srgb(diff.rgb), 1.0);
+}
+#endif
diff --git a/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl b/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl
index 94dac7e5a9..16e23a3da7 100644
--- a/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/fxaaF.glsl
@@ -256,6 +256,10 @@ A. Or use FXAA_GREEN_AS_LUMA.
     #define FXAA_GLSL_130 0
 #endif
 /*--------------------------------------------------------------------------*/
+#ifndef FXAA_GLSL_400
+    #define FXAA_GLSL_400 0
+#endif
+/*--------------------------------------------------------------------------*/
 #ifndef FXAA_HLSL_3
     #define FXAA_HLSL_3 0
 #endif
@@ -342,8 +346,8 @@ A. Or use FXAA_GREEN_AS_LUMA.
     // 1 = API supports gather4 on alpha channel.
     // 0 = API does not support gather4 on alpha channel.
     //
-    #if (FXAA_GLSL_130 == 0)
-        #define FXAA_GATHER4_ALPHA 0
+    #if (FXAA_GLSL_400 == 1)
+        #define FXAA_GATHER4_ALPHA 1
     #endif
     #if (FXAA_HLSL_5 == 1)
         #define FXAA_GATHER4_ALPHA 1
@@ -652,7 +656,7 @@ NOTE the other tuning knobs are now in the shader function inputs!
                                 API PORTING
 
 ============================================================================*/
-#if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1)
+#if (FXAA_GLSL_120 == 1) || (FXAA_GLSL_130 == 1) || (FXAA_GLSL_400 == 1)
     #define FxaaBool bool
     #define FxaaDiscard discard
     #define FxaaFloat float
@@ -714,6 +718,16 @@ NOTE the other tuning knobs are now in the shader function inputs!
     #endif
 #endif
 /*--------------------------------------------------------------------------*/
+#if (FXAA_GLSL_400 == 1)
+    // Requires "#version 400" or better
+    #define FxaaTexTop(t, p) textureLod(t, p, 0.0)
+    #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
+    #define FxaaTexAlpha4(t, p) textureGather(t, p, 3)
+    #define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)
+    #define FxaaTexGreen4(t, p) textureGather(t, p, 1)
+    #define FxaaTexOffGreen4(t, p, o) textureGatherOffset(t, p, o, 1)
+#endif
+/*--------------------------------------------------------------------------*/
 #if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) || (FXAA_PS3 == 1)
     #define FxaaInt2 float2
     #define FxaaTex sampler2D
diff --git a/indra/newview/featuretable.txt b/indra/newview/featuretable.txt
index 213f0ab845..9be7254a7a 100644
--- a/indra/newview/featuretable.txt
+++ b/indra/newview/featuretable.txt
@@ -79,6 +79,7 @@ RenderHeroProbeDistance		1	16
 RenderHeroProbeUpdateRate	1	4
 RenderHeroProbeConservativeUpdateMultiplier 1 16
 RenderDownScaleMethod       1   1
+RenderCASSharpness          1   1
 
 
 //
@@ -115,6 +116,7 @@ RenderHeroProbeResolution	1	256
 RenderHeroProbeDistance		1	4
 RenderHeroProbeUpdateRate	1	6
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   0
 
 //
 // Medium Low Graphics Settings
@@ -150,6 +152,7 @@ RenderHeroProbeResolution	1	256
 RenderHeroProbeDistance		1	6
 RenderHeroProbeUpdateRate	1	3
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   0
 
 //
 // Medium Graphics Settings (standard)
@@ -185,6 +188,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	6
 RenderHeroProbeUpdateRate	1	3
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   0
 
 //
 // Medium High Graphics Settings
@@ -220,6 +224,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	6
 RenderHeroProbeUpdateRate	1	2
 RenderHeroProbeConservativeUpdateMultiplier 1 8
+RenderCASSharpness          1   0
 
 //
 // High Graphics Settings (SSAO + sun shadows)
@@ -255,6 +260,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	8
 RenderHeroProbeUpdateRate	1	2
 RenderHeroProbeConservativeUpdateMultiplier 1 8
+RenderCASSharpness          1   0.4
 
 //
 // High Ultra Graphics Settings (deferred + SSAO + all shadows)
@@ -290,6 +296,7 @@ RenderHeroProbeResolution	1	1024
 RenderHeroProbeDistance		1	16
 RenderHeroProbeUpdateRate	1	1
 RenderHeroProbeConservativeUpdateMultiplier 1 4
+RenderCASSharpness          1   0.4
 
 //
 // Ultra graphics (REALLY PURTY!)
@@ -325,6 +332,7 @@ RenderHeroProbeResolution	1	2048
 RenderHeroProbeDistance		1	16
 RenderHeroProbeUpdateRate	1	1
 RenderHeroProbeConservativeUpdateMultiplier 1 4
+RenderCASSharpness          1   0.4
 
 //
 // Class Unknown Hardware (unknown)
diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt
index 008e4c8882..8eaf5894ab 100644
--- a/indra/newview/featuretable_mac.txt
+++ b/indra/newview/featuretable_mac.txt
@@ -77,6 +77,7 @@ RenderHeroProbeResolution	1	2048
 RenderHeroProbeDistance		1	16
 RenderHeroProbeUpdateRate	1	4
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   1
 
 //
 // Low Graphics Settings
@@ -112,6 +113,7 @@ RenderHeroProbeResolution	1	256
 RenderHeroProbeDistance		1	4
 RenderHeroProbeUpdateRate	1   6
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   0
 
 //
 // Medium Low Graphics Settings
@@ -147,6 +149,7 @@ RenderHeroProbeResolution	1	256
 RenderHeroProbeDistance		1	6
 RenderHeroProbeUpdateRate	1	3
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   0
 
 //
 // Medium Graphics Settings (standard)
@@ -182,6 +185,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	6
 RenderHeroProbeUpdateRate	1	3
 RenderHeroProbeConservativeUpdateMultiplier 1 16
+RenderCASSharpness          1   0
 
 //
 // Medium High Graphics Settings
@@ -217,6 +221,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	6
 RenderHeroProbeUpdateRate	1	2
 RenderHeroProbeConservativeUpdateMultiplier 1 8
+RenderCASSharpness          1   0
 
 //
 // High Graphics Settings (SSAO + sun shadows)
@@ -252,6 +257,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	8
 RenderHeroProbeUpdateRate	1	2
 RenderHeroProbeConservativeUpdateMultiplier 1 8
+RenderCASSharpness          1   0
 
 //
 // High Ultra Graphics Settings (SSAO + all shadows)
@@ -287,6 +293,7 @@ RenderHeroProbeResolution	1	512
 RenderHeroProbeDistance		1	16
 RenderHeroProbeUpdateRate	1	1
 RenderHeroProbeConservativeUpdateMultiplier 1 4
+RenderCASSharpness          1   0.4
 
 //
 // Ultra graphics (REALLY PURTY!)
@@ -322,6 +329,7 @@ RenderHeroProbeResolution	1	1024
 RenderHeroProbeDistance		1	16
 RenderHeroProbeUpdateRate	1	1
 RenderHeroProbeConservativeUpdateMultiplier 1 4
+RenderCASSharpness          1   0.4
 
 //
 // Class Unknown Hardware (unknown)
diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp
index 739975eab4..7bd5206453 100644
--- a/indra/newview/lldrawpool.cpp
+++ b/indra/newview/lldrawpool.cpp
@@ -407,22 +407,19 @@ void LLRenderPass::renderRiggedGroup(LLSpatialGroup* group, U32 type, bool textu
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
     LLSpatialGroup::drawmap_elem_t& draw_info = group->mDrawMap[type];
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     for (LLSpatialGroup::drawmap_elem_t::iterator k = draw_info.begin(); k != draw_info.end(); ++k)
     {
         LLDrawInfo* pparams = *k;
         if (pparams)
         {
-            if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
+            if (uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
             {
-                uploadMatrixPalette(*pparams);
-                lastAvatar = pparams->mAvatar;
-                lastMeshId = pparams->mSkinInfo->mHash;
+                pushBatch(*pparams, texture);
             }
-
-            pushBatch(*pparams, texture);
         }
     }
 }
@@ -468,8 +465,9 @@ void LLRenderPass::pushRiggedBatches(U32 type, bool texture, bool batch_textures
 
     if (texture)
     {
-        LLVOAvatar* lastAvatar = nullptr;
+        const LLVOAvatar* lastAvatar = nullptr;
         U64 lastMeshId = 0;
+        bool skipLastSkin = false;
         auto* begin = gPipeline.beginRenderMap(type);
         auto* end = gPipeline.endRenderMap(type);
         for (LLCullResult::drawinfo_iterator i = begin; i != end; )
@@ -477,14 +475,10 @@ void LLRenderPass::pushRiggedBatches(U32 type, bool texture, bool batch_textures
             LLDrawInfo* pparams = *i;
             LLCullResult::increment_iterator(i, end);
 
-            if (pparams->mAvatar.notNull() && (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash))
+            if (uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
             {
-                uploadMatrixPalette(*pparams);
-                lastAvatar = pparams->mAvatar;
-                lastMeshId = pparams->mSkinInfo->mHash;
+                pushBatch(*pparams, texture, batch_textures);
             }
-
-            pushBatch(*pparams, texture, batch_textures);
         }
     }
     else
@@ -496,8 +490,9 @@ void LLRenderPass::pushRiggedBatches(U32 type, bool texture, bool batch_textures
 void LLRenderPass::pushUntexturedRiggedBatches(U32 type)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
     for (LLCullResult::drawinfo_iterator i = begin; i != end; )
@@ -505,14 +500,10 @@ void LLRenderPass::pushUntexturedRiggedBatches(U32 type)
         LLDrawInfo* pparams = *i;
         LLCullResult::increment_iterator(i, end);
 
-        if (pparams->mAvatar.notNull() && (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash))
+        if (uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
         {
-            uploadMatrixPalette(*pparams);
-            lastAvatar = pparams->mAvatar;
-            lastMeshId = pparams->mSkinInfo->mHash;
+            pushUntexturedBatch(*pparams);
         }
-
-        pushUntexturedBatch(*pparams);
     }
 }
 
@@ -533,8 +524,9 @@ void LLRenderPass::pushMaskBatches(U32 type, bool texture, bool batch_textures)
 void LLRenderPass::pushRiggedMaskBatches(U32 type, bool texture, bool batch_textures)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
     for (LLCullResult::drawinfo_iterator i = begin; i != end; )
@@ -545,23 +537,12 @@ void LLRenderPass::pushRiggedMaskBatches(U32 type, bool texture, bool batch_text
 
         llassert(pparams);
 
-        if (LLGLSLShader::sCurBoundShaderPtr)
-        {
-            LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
-        }
-        else
-        {
-            gGL.flush();
-        }
+        LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(pparams->mAlphaMaskCutoff);
 
-        if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
+        if (uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
         {
-            uploadMatrixPalette(*pparams);
-            lastAvatar = pparams->mAvatar;
-            lastMeshId = pparams->mSkinInfo->mHash;
+            pushBatch(*pparams, texture, batch_textures);
         }
-
-        pushBatch(*pparams, texture, batch_textures);
     }
 }
 
@@ -667,6 +648,8 @@ bool LLRenderPass::uploadMatrixPalette(LLDrawInfo& params)
 //static
 bool LLRenderPass::uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinInfo)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
+
     if (!avatar)
     {
         return false;
@@ -688,6 +671,81 @@ bool LLRenderPass::uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinI
     return true;
 }
 
+// Returns true if rendering should proceed
+//static
+bool LLRenderPass::uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinInfo, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
+
+    llassert(skinInfo);
+    llassert(LLGLSLShader::sCurBoundShaderPtr);
+
+    if (!avatar)
+    {
+        return false;
+    }
+
+    if (avatar == lastAvatar && skinInfo->mHash == lastMeshId)
+    {
+        return !skipLastSkin;
+    }
+
+    const LLVOAvatar::MatrixPaletteCache& mpc = avatar->updateSkinInfoMatrixPalette(skinInfo);
+    U32 count = static_cast<U32>(mpc.mMatrixPalette.size());
+    // skipLastSkin -> skin info not loaded yet, don't render
+    skipLastSkin = !bool(count);
+    lastAvatar = avatar;
+    lastMeshId = skinInfo->mHash;
+
+    if (!skipLastSkin)
+    {
+        LLGLSLShader::sCurBoundShaderPtr->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
+            count,
+            false,
+            (GLfloat*)&(mpc.mGLMp[0]));
+    }
+
+    return !skipLastSkin;
+}
+
+// Returns true if rendering should proceed
+//static
+bool LLRenderPass::uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinInfo, const LLVOAvatar*& lastAvatar, U64& lastMeshId, const LLGLSLShader*& lastAvatarShader, bool& skipLastSkin)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
+
+    llassert(skinInfo);
+    llassert(LLGLSLShader::sCurBoundShaderPtr);
+
+    if (!avatar)
+    {
+        return false;
+    }
+
+    if (avatar == lastAvatar && skinInfo->mHash == lastMeshId && lastAvatarShader == LLGLSLShader::sCurBoundShaderPtr)
+    {
+        return !skipLastSkin;
+    }
+
+    const LLVOAvatar::MatrixPaletteCache& mpc = avatar->updateSkinInfoMatrixPalette(skinInfo);
+    U32 count = static_cast<U32>(mpc.mMatrixPalette.size());
+    // skipLastSkin -> skin info not loaded yet, don't render
+    skipLastSkin = !bool(count);
+    lastAvatar = avatar;
+    lastMeshId = skinInfo->mHash;
+    lastAvatarShader = LLGLSLShader::sCurBoundShaderPtr;
+
+    if (!skipLastSkin)
+    {
+        LLGLSLShader::sCurBoundShaderPtr->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
+            count,
+            false,
+            (GLfloat*)&(mpc.mGLMp[0]));
+    }
+
+    return !skipLastSkin;
+}
+
 void setup_texture_matrix(LLDrawInfo& params)
 {
     if (params.mTextureMatrix)
@@ -717,7 +775,7 @@ void LLRenderPass::pushGLTFBatches(U32 type, bool textured)
     }
     else
     {
-        pushRiggedGLTFBatches(type);
+        pushUntexturedGLTFBatches(type);
     }
 }
 
@@ -801,8 +859,9 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type, bool textured)
 void LLRenderPass::pushRiggedGLTFBatches(U32 type)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
@@ -812,15 +871,16 @@ void LLRenderPass::pushRiggedGLTFBatches(U32 type)
         LLDrawInfo& params = **i;
         LLCullResult::increment_iterator(i, end);
 
-        pushRiggedGLTFBatch(params, lastAvatar, lastMeshId);
+        pushRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin);
     }
 }
 
 void LLRenderPass::pushUntexturedRiggedGLTFBatches(U32 type)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL;
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
@@ -830,34 +890,26 @@ void LLRenderPass::pushUntexturedRiggedGLTFBatches(U32 type)
         LLDrawInfo& params = **i;
         LLCullResult::increment_iterator(i, end);
 
-        pushUntexturedRiggedGLTFBatch(params, lastAvatar, lastMeshId);
+        pushUntexturedRiggedGLTFBatch(params, lastAvatar, lastMeshId, skipLastSkin);
     }
 }
 
 
 // static
-void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, LLVOAvatar*& lastAvatar, U64& lastMeshId)
+void LLRenderPass::pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin)
 {
-    if (params.mAvatar.notNull() && (lastAvatar != params.mAvatar || lastMeshId != params.mSkinInfo->mHash))
+    if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
     {
-        uploadMatrixPalette(params);
-        lastAvatar = params.mAvatar;
-        lastMeshId = params.mSkinInfo->mHash;
+        pushGLTFBatch(params);
     }
-
-    pushGLTFBatch(params);
 }
 
 // static
-void LLRenderPass::pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, LLVOAvatar*& lastAvatar, U64& lastMeshId)
+void LLRenderPass::pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin)
 {
-    if (params.mAvatar.notNull() && (lastAvatar != params.mAvatar || lastMeshId != params.mSkinInfo->mHash))
+    if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
     {
-        uploadMatrixPalette(params);
-        lastAvatar = params.mAvatar;
-        lastMeshId = params.mSkinInfo->mHash;
+        pushUntexturedGLTFBatch(params);
     }
-
-    pushUntexturedGLTFBatch(params);
 }
 
diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h
index 01a29f71a0..bc412214c7 100644
--- a/indra/newview/lldrawpool.h
+++ b/indra/newview/lldrawpool.h
@@ -38,6 +38,7 @@ class LLViewerFetchedTexture;
 class LLSpatialGroup;
 class LLDrawInfo;
 class LLVOAvatar;
+class LLGLSLShader;
 class LLMeshSkinInfo;
 
 class LLDrawPool
@@ -375,9 +376,9 @@ public:
 
     // push a single GLTF draw call
     static void pushGLTFBatch(LLDrawInfo& params);
-    static void pushRiggedGLTFBatch(LLDrawInfo& params, LLVOAvatar*& lastAvatar, U64& lastMeshId);
+    static void pushRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);
     static void pushUntexturedGLTFBatch(LLDrawInfo& params);
-    static void pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, LLVOAvatar*& lastAvatar, U64& lastMeshId);
+    static void pushUntexturedRiggedGLTFBatch(LLDrawInfo& params, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);
 
     void pushMaskBatches(U32 type, bool texture = true, bool batch_textures = false);
     void pushRiggedMaskBatches(U32 type, bool texture = true, bool batch_textures = false);
@@ -386,6 +387,8 @@ public:
     void pushBumpBatch(LLDrawInfo& params, bool texture, bool batch_textures = false);
     static bool uploadMatrixPalette(LLDrawInfo& params);
     static bool uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinInfo);
+    static bool uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinInfo, const LLVOAvatar*& lastAvatar, U64& lastMeshId, bool& skipLastSkin);
+    static bool uploadMatrixPalette(LLVOAvatar* avatar, LLMeshSkinInfo* skinInfo, const LLVOAvatar*& lastAvatar, U64& lastMeshId, const LLGLSLShader*& lastAvatarShader, bool& skipLastSkin);
     virtual void renderGroup(LLSpatialGroup* group, U32 type, bool texture = true);
     virtual void renderRiggedGroup(LLSpatialGroup* group, U32 type, bool texture = true);
 };
diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp
index cb1439b403..b4d14e22f3 100644
--- a/indra/newview/lldrawpoolalpha.cpp
+++ b/indra/newview/lldrawpoolalpha.cpp
@@ -327,8 +327,9 @@ void LLDrawPoolAlpha::renderAlphaHighlight()
 {
     for (int pass = 0; pass < 2; ++pass)
     { //two passes, one rigged and one not
-        LLVOAvatar* lastAvatar = nullptr;
+        const LLVOAvatar* lastAvatar = nullptr;
         U64 lastMeshId = 0;
+        bool skipLastSkin = false;
 
         LLCullResult::sg_iterator begin = pass == 0 ? gPipeline.beginAlphaGroups() : gPipeline.beginRiggedAlphaGroups();
         LLCullResult::sg_iterator end = pass == 0 ? gPipeline.endAlphaGroups() : gPipeline.endRiggedAlphaGroups();
@@ -347,22 +348,16 @@ void LLDrawPoolAlpha::renderAlphaHighlight()
 
                     bool rigged = (params.mAvatar != nullptr);
                     gHighlightProgram.bind(rigged);
-                    gGL.diffuseColor4f(1, 0, 0, 1);
 
                     if (rigged)
                     {
-                        if (lastAvatar != params.mAvatar ||
-                            lastMeshId != params.mSkinInfo->mHash)
-                        {
-                            if (!uploadMatrixPalette(params))
-                            {
-                                continue;
-                            }
-                            lastAvatar = params.mAvatar;
-                            lastMeshId = params.mSkinInfo->mHash;
+                        if (!uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
+                        { // failed to upload matrix palette, skip rendering
+                            continue;
                         }
                     }
 
+                    gGL.diffuseColor4f(1, 0, 0, 1);
                     LLRenderPass::applyModelMatrix(params);
                     params.mVertexBuffer->setBuffer();
                     params.mVertexBuffer->drawRange(LLRender::TRIANGLES, params.mStart, params.mEnd, params.mCount, params.mOffset);
@@ -526,25 +521,20 @@ void LLDrawPoolAlpha::renderRiggedEmissives(std::vector<LLDrawInfo*>& emissives)
     shader->bind();
     shader->uniform1f(LLShaderMgr::EMISSIVE_BRIGHTNESS, 1.f);
 
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     for (LLDrawInfo* draw : emissives)
     {
         LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL("Emissives");
 
-        bool tex_setup = TexSetup(draw, false);
-        if (lastAvatar != draw->mAvatar || lastMeshId != draw->mSkinInfo->mHash)
+        if (uploadMatrixPalette(draw->mAvatar, draw->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
         {
-            if (!uploadMatrixPalette(*draw))
-            { // failed to upload matrix palette, skip rendering
-                continue;
-            }
-            lastAvatar = draw->mAvatar;
-            lastMeshId = draw->mSkinInfo->mHash;
+            bool tex_setup = TexSetup(draw, false);
+            drawEmissive(draw);
+            RestoreTexSetup(tex_setup);
         }
-        drawEmissive(draw);
-        RestoreTexSetup(tex_setup);
     }
 }
 
@@ -553,19 +543,15 @@ void LLDrawPoolAlpha::renderRiggedPbrEmissives(std::vector<LLDrawInfo*>& emissiv
     LLGLDepthTest depth(GL_TRUE, GL_FALSE); //disable depth writes since "emissive" is additive so sorting doesn't matter
     pbr_emissive_shader->bind(true);
 
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     for (LLDrawInfo* draw : emissives)
     {
-        if (lastAvatar != draw->mAvatar || lastMeshId != draw->mSkinInfo->mHash)
-        {
-            if (!uploadMatrixPalette(*draw))
-            { // failed to upload matrix palette, skip rendering
-                continue;
-            }
-            lastAvatar = draw->mAvatar;
-            lastMeshId = draw->mSkinInfo->mHash;
+        if (!uploadMatrixPalette(draw->mAvatar, draw->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
+        { // failed to upload matrix palette, skip rendering
+            continue;
         }
 
         LLGLDisable cull_face(draw->mGLTFMaterial->mDoubleSided ? GL_CULL_FACE : 0);
@@ -581,9 +567,10 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, bool depth_only, bool rigged)
     bool initialized_lighting = false;
     bool light_enabled = true;
 
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
-    LLGLSLShader* lastAvatarShader = nullptr;
+    const LLGLSLShader* lastAvatarShader = nullptr;
+    bool skipLastSkin = false;
 
     LLCullResult::sg_iterator begin;
     LLCullResult::sg_iterator end;
@@ -776,20 +763,9 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, bool depth_only, bool rigged)
                     }
                 }
 
-                if (params.mAvatar != nullptr)
+                if (params.mAvatar && !uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, lastAvatarShader, skipLastSkin))
                 {
-                    if (lastAvatar != params.mAvatar ||
-                        lastMeshId != params.mSkinInfo->mHash ||
-                        lastAvatarShader != LLGLSLShader::sCurBoundShaderPtr)
-                    {
-                        if (!uploadMatrixPalette(params))
-                        {
-                            continue;
-                        }
-                        lastAvatar = params.mAvatar;
-                        lastMeshId = params.mSkinInfo->mHash;
-                        lastAvatarShader = LLGLSLShader::sCurBoundShaderPtr;
-                    }
+                    continue;
                 }
 
                 bool tex_setup = TexSetup(&params, (mat != nullptr));
@@ -912,26 +888,3 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, bool depth_only, bool rigged)
         gPipeline.enableLightsDynamic();
     }
 }
-
-bool LLDrawPoolAlpha::uploadMatrixPalette(const LLDrawInfo& params)
-{
-    if (params.mAvatar.isNull())
-    {
-        return false;
-    }
-    const LLVOAvatar::MatrixPaletteCache& mpc = params.mAvatar.get()->updateSkinInfoMatrixPalette(params.mSkinInfo);
-    U32 count = static_cast<U32>(mpc.mMatrixPalette.size());
-
-    if (count == 0)
-    {
-        //skin info not loaded yet, don't render
-        return false;
-    }
-
-    LLGLSLShader::sCurBoundShaderPtr->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
-        count,
-        false,
-        (GLfloat*)&(mpc.mGLMp[0]));
-
-    return true;
-}
diff --git a/indra/newview/lldrawpoolalpha.h b/indra/newview/lldrawpoolalpha.h
index 0abe001714..25044beda0 100644
--- a/indra/newview/lldrawpoolalpha.h
+++ b/indra/newview/lldrawpoolalpha.h
@@ -66,7 +66,6 @@ public:
     void renderGroupAlpha(LLSpatialGroup* group, U32 type, U32 mask, bool texture = true);
     void renderAlpha(U32 mask, bool depth_only = false, bool rigged = false);
     void renderAlphaHighlight();
-    bool uploadMatrixPalette(const LLDrawInfo& params);
 
     static bool sShowDebugAlpha;
 
diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp
index 7289e95b6e..bf593bff07 100644
--- a/indra/newview/lldrawpoolbump.cpp
+++ b/indra/newview/lldrawpoolbump.cpp
@@ -558,8 +558,9 @@ void LLDrawPoolBump::renderDeferred(S32 pass)
         LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type);
         LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type);
 
-        LLVOAvatar* avatar = nullptr;
-        U64 skin = 0;
+        const LLVOAvatar* lastAvatar = nullptr;
+        U64 lastMeshId = 0;
+        bool skipLastSkin = false;
 
         for (LLCullResult::drawinfo_iterator i = begin; i != end; )
         {
@@ -572,13 +573,10 @@ void LLDrawPoolBump::renderDeferred(S32 pass)
 
             if (rigged)
             {
-                if (avatar != params.mAvatar || skin != params.mSkinInfo->mHash)
+                if (uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
                 {
-                    uploadMatrixPalette(params);
-                    avatar = params.mAvatar;
-                    skin = params.mSkinInfo->mHash;
+                    pushBumpBatch(params, true, false);
                 }
-                pushBumpBatch(params, true, false);
             }
             else
             {
@@ -974,8 +972,9 @@ void LLBumpImageList::onSourceUpdated(LLViewerTexture* src, EBumpEffect bump_cod
 
 void LLDrawPoolBump::pushBumpBatches(U32 type)
 {
-    LLVOAvatar* avatar = nullptr;
-    U64 skin = 0;
+    const LLVOAvatar* lastAvatar = nullptr;
+    U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     if (mRigged)
     { // nudge type enum and include skinweights for rigged pass
@@ -993,17 +992,9 @@ void LLDrawPoolBump::pushBumpBatches(U32 type)
         {
             if (mRigged)
             {
-                if (avatar != params.mAvatar || skin != params.mSkinInfo->mHash)
+                if (!uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
                 {
-                    if (uploadMatrixPalette(params))
-                    {
-                        avatar = params.mAvatar;
-                        skin = params.mSkinInfo->mHash;
-                    }
-                    else
-                    {
-                        continue;
-                    }
+                    continue;
                 }
             }
             pushBumpBatch(params, false);
diff --git a/indra/newview/lldrawpoolmaterials.cpp b/indra/newview/lldrawpoolmaterials.cpp
index bbc93c2af9..e7ec2022d2 100644
--- a/indra/newview/lldrawpoolmaterials.cpp
+++ b/indra/newview/lldrawpoolmaterials.cpp
@@ -183,7 +183,9 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
         glUniform4fv(specular, 1, lastSpecular.mV);
     }
 
-    LLVOAvatar* lastAvatar = nullptr;
+    const LLVOAvatar* lastAvatar = nullptr;
+    U64 lastMeshId = 0;
+    bool skipLastSkin = false;
 
     for (LLCullResult::drawinfo_iterator i = begin; i != end; )
     {
@@ -245,23 +247,11 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
         }
 
         // upload matrix palette to shader
-        if (rigged && params.mAvatar.notNull())
+        if (rigged)
         {
-            if (params.mAvatar != lastAvatar)
+            if (!uploadMatrixPalette(params.mAvatar, params.mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
             {
-                const LLVOAvatar::MatrixPaletteCache& mpc = params.mAvatar->updateSkinInfoMatrixPalette(params.mSkinInfo);
-                U32 count = static_cast<U32>(mpc.mMatrixPalette.size());
-
-                if (count == 0)
-                {
-                    //skin info not loaded yet, don't render
-                    return;
-                }
-
-                mShader->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
-                    count,
-                    false,
-                    (GLfloat*)&(mpc.mGLMp[0]));
+                continue;
             }
         }
 
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 581328b3cb..ccfef09b09 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -2147,7 +2147,7 @@ F32 LLFace::getTextureVirtualSize()
         face_area =  mPixelArea / llclamp(texel_area, 0.015625f, 128.f);
     }
 
-    face_area = LLFace::adjustPixelArea(mImportanceToCamera, face_area) ;
+    face_area = LLFace::adjustPixelArea(mImportanceToCamera, face_area);
     if(face_area > LLViewerTexture::sMinLargeImageSize) //if is large image, shrink face_area by considering the partial overlapping.
     {
         if(mImportanceToCamera > LEAST_IMPORTANCE_FOR_LARGE_IMAGE && mTexture[LLRender::DIFFUSE_MAP].notNull() && mTexture[LLRender::DIFFUSE_MAP]->isLargeImage())
@@ -2165,7 +2165,6 @@ bool LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE;
 
-    //VECTORIZE THIS
     //get area of circle around face
 
     LLVector4a center;
@@ -2290,6 +2289,7 @@ const F32 FACE_IMPORTANCE_TO_CAMERA_OVER_ANGLE[FACE_IMPORTANCE_LEVEL][2] =    //
 //static
 F32 LLFace::calcImportanceToCamera(F32 cos_angle_to_view_dir, F32 dist)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE;
     F32 importance = 0.f ;
 
     if(cos_angle_to_view_dir > LLViewerCamera::getInstance()->getCosHalfFov() &&
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index 7cf256f731..99642016f7 100644
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -209,7 +209,6 @@ public:
     void        setDrawInfo(LLDrawInfo* draw_info);
 
     F32         getTextureVirtualSize() ;
-    F32         getImportanceToCamera()const {return mImportanceToCamera ;}
     void        resetVirtualSize();
 
     void        setHasMedia(bool has_media)  { mHasMedia = has_media ;}
@@ -265,6 +264,11 @@ public:
     // return mSkinInfo->mHash or 0 if mSkinInfo is null
     U64 getSkinHash();
 
+    // true if face was recently in the main camera frustum according to LLViewerTextureList updates
+    bool mInFrustum = false;
+    // value of gFrameCount the last time the face was touched by LLViewerTextureList::updateImageDecodePriority
+    U32 mLastTextureUpdate = 0;
+
 private:
     LLPointer<LLVertexBuffer> mVertexBuffer;
     LLPointer<LLVertexBuffer> mVertexBufferGLTF;
diff --git a/indra/newview/llfloatereditextdaycycle.cpp b/indra/newview/llfloatereditextdaycycle.cpp
index 60343a4e2a..d6e7cd97f3 100644
--- a/indra/newview/llfloatereditextdaycycle.cpp
+++ b/indra/newview/llfloatereditextdaycycle.cpp
@@ -184,7 +184,6 @@ LLFloaterEditExtDayCycle::LLFloaterEditExtDayCycle(const LLSD &key) :
     mLoadTrack(nullptr),
     mClearTrack(nullptr)
 {
-
     mCommitCallbackRegistrar.add(EVNT_DAYTRACK, [this](LLUICtrl *ctrl, const LLSD &data) { onTrackSelectionCallback(data); });
     mCommitCallbackRegistrar.add(EVNT_PLAY, [this](LLUICtrl *ctrl, const LLSD &data) { onPlayActionCallback(data); });
 
@@ -205,8 +204,8 @@ LLFloaterEditExtDayCycle::~LLFloaterEditExtDayCycle()
 // virtual
 bool LLFloaterEditExtDayCycle::postBuild()
 {
-    getChild<LLLineEditor>(TXT_DAY_NAME)->setKeystrokeCallback(boost::bind(&LLFloaterEditExtDayCycle::onCommitName, this, _1, _2), NULL);
-
+    mNameEditor = getChild<LLLineEditor>(TXT_DAY_NAME, true);
+    mCancelButton = getChild<LLButton>(BTN_CANCEL, true);
     mAddFrameButton = getChild<LLButton>(BTN_ADDFRAME, true);
     mDeleteFrameButton = getChild<LLButton>(BTN_DELFRAME, true);
     mTimeSlider = getChild<LLMultiSliderCtrl>(SLDR_TIME);
@@ -221,24 +220,24 @@ bool LLFloaterEditExtDayCycle::postBuild()
     mClearTrack = getChild<LLButton>(BTN_CLEARTRACK, true);
 
     mFlyoutControl = new LLFlyoutComboBtnCtrl(this, BTN_SAVE, BTN_FLYOUT, XML_FLYOUTMENU_FILE, false);
-    mFlyoutControl->setAction([this](LLUICtrl *ctrl, const LLSD &data) { onButtonApply(ctrl, data); });
+    mFlyoutControl->setAction([this](LLUICtrl *ctrl, const LLSD&) { onButtonApply(ctrl); });
 
-    getChild<LLButton>(BTN_CANCEL, true)->setCommitCallback([this](LLUICtrl *ctrl, const LLSD &data) { onClickCloseBtn(); });
-    mTimeSlider->setCommitCallback([this](LLUICtrl *ctrl, const LLSD &data) { onTimeSliderCallback(); });
-    mAddFrameButton->setCommitCallback([this](LLUICtrl *ctrl, const LLSD &data) { onAddFrame(); });
-    mDeleteFrameButton->setCommitCallback([this](LLUICtrl *ctrl, const LLSD &data) { onRemoveFrame(); });
-    mImportButton->setCommitCallback([this](LLUICtrl *, const LLSD &){ onButtonImport(); });
-    mLoadFrame->setCommitCallback([this](LLUICtrl *, const LLSD &){ onButtonLoadFrame(); });
+    mNameEditor->setKeystrokeCallback([this](LLLineEditor*, void*) { onNameKeystroke(); }, NULL);
+    mCancelButton->setCommitCallback([this](LLUICtrl*, const LLSD&) { onClickCloseBtn(); });
+    mTimeSlider->setCommitCallback([this](LLUICtrl*, const LLSD&) { onTimeSliderCallback(); });
+    mAddFrameButton->setCommitCallback([this](LLUICtrl*, const LLSD&) { onAddFrame(); });
+    mDeleteFrameButton->setCommitCallback([this](LLUICtrl*, const LLSD&) { onRemoveFrame(); });
+    mImportButton->setCommitCallback([this](LLUICtrl*, const LLSD&) { onButtonImport(); });
+    mLoadFrame->setCommitCallback([this](LLUICtrl*, const LLSD&) { onButtonLoadFrame(); });
 
-    mCloneTrack->setCommitCallback([this](LLUICtrl *, const LLSD&){ onCloneTrack(); });
-    mLoadTrack->setCommitCallback([this](LLUICtrl *, const LLSD&){  onLoadTrack();});
-    mClearTrack->setCommitCallback([this](LLUICtrl *, const LLSD&){ onClearTrack(); });
+    mCloneTrack->setCommitCallback([this](LLUICtrl*, const LLSD&) { onCloneTrack(); });
+    mLoadTrack->setCommitCallback([this](LLUICtrl*, const LLSD&) { onLoadTrack();});
+    mClearTrack->setCommitCallback([this](LLUICtrl*, const LLSD&) { onClearTrack(); });
 
-
-    mFramesSlider->setCommitCallback([this](LLUICtrl *, const LLSD &data) { onFrameSliderCallback(data); });
-    mFramesSlider->setDoubleClickCallback([this](LLUICtrl*, S32 x, S32 y, MASK mask){ onFrameSliderDoubleClick(x, y, mask); });
-    mFramesSlider->setMouseDownCallback([this](LLUICtrl*, S32 x, S32 y, MASK mask){ onFrameSliderMouseDown(x, y, mask); });
-    mFramesSlider->setMouseUpCallback([this](LLUICtrl*, S32 x, S32 y, MASK mask){ onFrameSliderMouseUp(x, y, mask); });
+    mFramesSlider->setCommitCallback([this](LLUICtrl*, const LLSD &data) { onFrameSliderCallback(data); });
+    mFramesSlider->setDoubleClickCallback([this](LLUICtrl*, S32 x, S32 y, MASK mask) { onFrameSliderDoubleClick(x, y, mask); });
+    mFramesSlider->setMouseDownCallback([this](LLUICtrl*, S32 x, S32 y, MASK mask) { onFrameSliderMouseDown(x, y, mask); });
+    mFramesSlider->setMouseUpCallback([this](LLUICtrl*, S32 x, S32 y, MASK mask) { onFrameSliderMouseUp(x, y, mask); });
 
     mTimeSlider->addSlider(0);
 
@@ -431,12 +430,10 @@ void LLFloaterEditExtDayCycle::refresh()
 {
     if (mEditDay)
     {
-        LLLineEditor* name_field = getChild<LLLineEditor>(TXT_DAY_NAME);
-        name_field->setText(mEditDay->getName());
-        name_field->setEnabled(mCanMod);
+        mNameEditor->setText(mEditDay->getName());
+        mNameEditor->setEnabled(mCanMod && mCanSave && mInventoryId.notNull());
     }
 
-
     bool is_inventory_avail = canUseInventory();
 
     bool show_commit = ((mEditContext == CONTEXT_PARCEL) || (mEditContext == CONTEXT_REGION));
@@ -461,7 +458,7 @@ void LLFloaterEditExtDayCycle::refresh()
     mFlyoutControl->setMenuItemVisible(ACTION_APPLY_REGION, show_apply);
 
     mFlyoutControl->setMenuItemEnabled(ACTION_COMMIT, show_commit && !mCommitSignal.empty());
-    mFlyoutControl->setMenuItemEnabled(ACTION_SAVE, is_inventory_avail && mCanMod && !mInventoryId.isNull() && mCanSave);
+    mFlyoutControl->setMenuItemEnabled(ACTION_SAVE, is_inventory_avail && mCanMod && mCanSave && mInventoryId.notNull());
     mFlyoutControl->setMenuItemEnabled(ACTION_SAVEAS, is_inventory_avail && mCanCopy && mCanSave);
     mFlyoutControl->setMenuItemEnabled(ACTION_APPLY_LOCAL, true);
     mFlyoutControl->setMenuItemEnabled(ACTION_APPLY_PARCEL, canApplyParcel() && show_apply);
@@ -569,7 +566,7 @@ bool LLFloaterEditExtDayCycle::handleKeyUp(KEY key, MASK mask, bool called_from_
     return LLFloater::handleKeyUp(key, mask, called_from_parent);
 }
 
-void LLFloaterEditExtDayCycle::onButtonApply(LLUICtrl *ctrl, const LLSD &data)
+void LLFloaterEditExtDayCycle::onButtonApply(LLUICtrl *ctrl)
 {
     std::string ctrl_action = ctrl->getName();
 
@@ -832,7 +829,7 @@ void LLFloaterEditExtDayCycle::onClearTrack()
     refresh();
 }
 
-void LLFloaterEditExtDayCycle::onCommitName(class LLLineEditor* caller, void* user_data)
+void LLFloaterEditExtDayCycle::onNameKeystroke()
 {
     if (!mEditDay)
     {
@@ -840,7 +837,7 @@ void LLFloaterEditExtDayCycle::onCommitName(class LLLineEditor* caller, void* us
         return;
     }
 
-    mEditDay->setName(caller->getText());
+    mEditDay->setName(mNameEditor->getText());
 }
 
 void LLFloaterEditExtDayCycle::onTrackSelectionCallback(const LLSD& user_data)
diff --git a/indra/newview/llfloatereditextdaycycle.h b/indra/newview/llfloatereditextdaycycle.h
index 655915b6e8..926a24f8a4 100644
--- a/indra/newview/llfloatereditextdaycycle.h
+++ b/indra/newview/llfloatereditextdaycycle.h
@@ -110,7 +110,7 @@ private:
     F32 getCurrentFrame() const;
 
     // flyout response/click
-    void                        onButtonApply(LLUICtrl *ctrl, const LLSD &data);
+    void                        onButtonApply(LLUICtrl *ctrl);
     //virtual void                onClickCloseBtn(bool app_quitting = false) override;
     //void                        onButtonImport();
     void                        onButtonLoadFrame();
@@ -119,7 +119,7 @@ private:
     void                        onCloneTrack();
     void                        onLoadTrack();
     void                        onClearTrack();
-    void                        onCommitName(class LLLineEditor* caller, void* user_data);
+    void                        onNameKeystroke();
     void                        onTrackSelectionCallback(const LLSD& user_data);
     void                        onPlayActionCallback(const LLSD& user_data);
     // time slider clicked
@@ -196,6 +196,8 @@ private:
     std::string                 mLastFrameSlider;
     bool                        mShiftCopyEnabled;
 
+    LLLineEditor*               mNameEditor;
+    LLButton*                   mCancelButton;
     LLButton*                   mAddFrameButton;
     LLButton*                   mDeleteFrameButton;
     LLButton*                   mImportButton;
diff --git a/indra/newview/llfloaterimsession.cpp b/indra/newview/llfloaterimsession.cpp
index 557b3f27c5..97e0d01b52 100644
--- a/indra/newview/llfloaterimsession.cpp
+++ b/indra/newview/llfloaterimsession.cpp
@@ -368,8 +368,6 @@ bool LLFloaterIMSession::postBuild()
     add_btn->setEnabled(isInviteAllowed());
     add_btn->setClickedCallback(boost::bind(&LLFloaterIMSession::onAddButtonClicked, this));
 
-    childSetAction("voice_call_btn", boost::bind(&LLFloaterIMSession::onCallButtonClicked, this));
-
     LLVoiceClient::addObserver(this);
 
     //*TODO if session is not initialized yet, add some sort of a warning message like "starting session...blablabla"
@@ -551,23 +549,6 @@ void LLFloaterIMSession::boundVoiceChannel()
     }
 }
 
-void LLFloaterIMSession::onCallButtonClicked()
-{
-    LLVoiceChannel* voice_channel = LLIMModel::getInstance()->getVoiceChannel(mSessionID);
-    if (voice_channel)
-    {
-        bool is_call_active = voice_channel->getState() >= LLVoiceChannel::STATE_CALL_STARTED;
-        if (is_call_active)
-        {
-            gIMMgr->endCall(mSessionID);
-        }
-        else
-        {
-            gIMMgr->startCall(mSessionID);
-        }
-    }
-}
-
 void LLFloaterIMSession::onChange(EStatusType status, const LLSD& channelInfo, bool proximal)
 {
     if(status != STATUS_JOINING && status != STATUS_LEFT_CHANNEL)
diff --git a/indra/newview/llfloaterimsession.h b/indra/newview/llfloaterimsession.h
index 7bc54cf3da..a442099ad1 100644
--- a/indra/newview/llfloaterimsession.h
+++ b/indra/newview/llfloaterimsession.h
@@ -159,8 +159,6 @@ private:
     void sendParticipantsAddedNotification(const uuid_vec_t& uuids);
     bool canAddSelectedToChat(const uuid_vec_t& uuids);
 
-    void onCallButtonClicked();
-
     void onVoiceChannelChanged(const LLUUID &session_id);
 
     void boundVoiceChannel();
diff --git a/indra/newview/llfloaterimsessiontab.cpp b/indra/newview/llfloaterimsessiontab.cpp
index 2621ce576c..00e7dae657 100644
--- a/indra/newview/llfloaterimsessiontab.cpp
+++ b/indra/newview/llfloaterimsessiontab.cpp
@@ -302,6 +302,7 @@ bool LLFloaterIMSessionTab::postBuild()
     mGearBtn = getChild<LLButton>("gear_btn");
     mAddBtn = getChild<LLButton>("add_btn");
     mVoiceButton = getChild<LLButton>("voice_call_btn");
+    mVoiceButton->setClickedCallback([this](LLUICtrl*, const LLSD&) { onCallButtonClicked(); });
 
     mParticipantListPanel = getChild<LLLayoutPanel>("speakers_list_panel");
     mRightPartPanel = getChild<LLLayoutPanel>("right_part_holder");
@@ -434,16 +435,34 @@ void LLFloaterIMSessionTab::draw()
 
 void LLFloaterIMSessionTab::enableDisableCallBtn()
 {
-    if (LLVoiceClient::instanceExists() && mVoiceButton)
+    if (!mVoiceButton)
+        return;
+
+    bool enable = false;
+
+    if (mSessionID.notNull() && mSession && mSession->mSessionInitialized && mSession->mCallBackEnabled)
     {
-        mVoiceButton->setEnabled(
-            mSessionID.notNull()
-            && mSession
-            && mSession->mSessionInitialized
-            && LLVoiceClient::getInstance()->voiceEnabled()
-            && LLVoiceClient::getInstance()->isVoiceWorking()
-            && mSession->mCallBackEnabled);
+        if (mVoiceButtonHangUpMode)
+        {
+            // We allow to hang up from any state
+            enable = true;
+        }
+        else
+        {
+            // We allow to start call from this state only
+            if (mSession->mVoiceChannel->getState() == LLVoiceChannel::STATE_NO_CHANNEL_INFO &&
+                LLVoiceClient::instanceExists())
+            {
+                LLVoiceClient* client = LLVoiceClient::getInstance();
+                if (client->voiceEnabled() && client->isVoiceWorking())
+                {
+                    enable = true;
+                }
+            }
+        }
     }
+
+    mVoiceButton->setEnabled(enable);
 }
 
 // virtual
@@ -466,6 +485,25 @@ void LLFloaterIMSessionTab::onFocusLost()
     super::onFocusLost();
 }
 
+void LLFloaterIMSessionTab::onCallButtonClicked()
+{
+    if (mVoiceButtonHangUpMode)
+    {
+        // We allow to hang up from any state
+        gIMMgr->endCall(mSessionID);
+    }
+    else
+    {
+        LLVoiceChannel::EState channel_state = mSession && mSession->mVoiceChannel ?
+            mSession->mVoiceChannel->getState() : LLVoiceChannel::STATE_NO_CHANNEL_INFO;
+        // We allow to start call from this state only
+        if (channel_state == LLVoiceChannel::STATE_NO_CHANNEL_INFO)
+        {
+            gIMMgr->startCall(mSessionID);
+        }
+    }
+}
+
 void LLFloaterIMSessionTab::onInputEditorClicked()
 {
     LLFloaterIMContainer* im_box = LLFloaterIMContainer::findInstance();
@@ -1040,6 +1078,7 @@ void LLFloaterIMSessionTab::updateCallBtnState(bool callIsActive)
 {
     mVoiceButton->setImageOverlay(callIsActive? getString("call_btn_stop") : getString("call_btn_start"));
     mVoiceButton->setToolTip(callIsActive? getString("end_call_button_tooltip") : getString("start_call_button_tooltip"));
+    mVoiceButtonHangUpMode = callIsActive;
 
     enableDisableCallBtn();
 }
diff --git a/indra/newview/llfloaterimsessiontab.h b/indra/newview/llfloaterimsessiontab.h
index 6dd8e62482..bee5c8c2c4 100644
--- a/indra/newview/llfloaterimsessiontab.h
+++ b/indra/newview/llfloaterimsessiontab.h
@@ -198,6 +198,11 @@ protected:
     LLButton* mAddBtn;
     LLButton* mVoiceButton;
 
+    // Since mVoiceButton can work in one of two modes, "Start call" or "Hang up",
+    // (with different images and tooltips depending on the currently chosen mode)
+    // we should track the mode we're currently using to react on click accordingly
+    bool mVoiceButtonHangUpMode { false };
+
 private:
     // Handling selection and contextual menu
     void doToSelected(const LLSD& userdata);
@@ -216,6 +221,8 @@ private:
      */
     void reshapeChatLayoutPanel();
 
+    void onCallButtonClicked();
+
     void onInputEditorClicked();
 
     void onEmojiRecentPanelToggleBtnClicked();
diff --git a/indra/newview/llhudnametag.cpp b/indra/newview/llhudnametag.cpp
index 205089c662..11f049564a 100644
--- a/indra/newview/llhudnametag.cpp
+++ b/indra/newview/llhudnametag.cpp
@@ -229,35 +229,20 @@ void LLHUDNameTag::render()
     if (sDisplayText)
     {
         LLGLDepthTest gls_depth(GL_TRUE, GL_FALSE);
-        //LLGLDisable gls_stencil(GL_STENCIL_TEST);
-        renderText(false);
+        renderText();
     }
 }
 
-void LLHUDNameTag::renderText(bool for_select)
+void LLHUDNameTag::renderText()
 {
     if (!mVisible || mHidden)
     {
         return;
     }
 
-    // don't pick text that isn't bound to a viewerobject
-    if (for_select &&
-        (!mSourceObject || mSourceObject->mDrawable.isNull()))
-    {
-        return;
-    }
-
-    if (for_select)
-    {
-        gGL.getTexUnit(0)->disable();
-    }
-    else
-    {
-        gGL.getTexUnit(0)->enable(LLTexUnit::TT_TEXTURE);
-    }
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
 
-    LLGLState gls_blend(GL_BLEND, !for_select);
+    gGL.getTexUnit(0)->enable(LLTexUnit::TT_TEXTURE);
 
     LLColor4 shadow_color(0.f, 0.f, 0.f, 1.f);
     F32 alpha_factor = 1.f;
@@ -393,10 +378,6 @@ void LLHUDNameTag::renderText(bool for_select)
     }
     /// Reset the default color to white.  The renderer expects this to be the default.
     gGL.color4f(1.0f, 1.0f, 1.0f, 1.0f);
-    if (for_select)
-    {
-        gGL.getTexUnit(0)->enable(LLTexUnit::TT_TEXTURE);
-    }
 }
 
 void LLHUDNameTag::setString(const std::string &text_utf8)
diff --git a/indra/newview/llhudnametag.h b/indra/newview/llhudnametag.h
index 9abd8f33cc..5cb7fa877e 100644
--- a/indra/newview/llhudnametag.h
+++ b/indra/newview/llhudnametag.h
@@ -145,7 +145,7 @@ protected:
     LLHUDNameTag(const U8 type);
 
     /*virtual*/ void render();
-    void renderText(bool for_select);
+    void renderText();
     static void updateAll();
     void setLOD(S32 lod);
     S32 getMaxLines();
diff --git a/indra/newview/llpanelenvironment.cpp b/indra/newview/llpanelenvironment.cpp
index 51e2c05070..c1d8ce50ff 100644
--- a/indra/newview/llpanelenvironment.cpp
+++ b/indra/newview/llpanelenvironment.cpp
@@ -58,14 +58,6 @@ namespace
     const std::string FLOATER_DAY_CYCLE_EDIT("env_edit_extdaycycle");
     const std::string STRING_REGION_ENV("str_region_env");
     const std::string STRING_EMPTY_NAME("str_empty");
-
-    inline bool ends_with(std::string const & value, std::string const & ending)
-    {
-        if (ending.size() > value.size())
-            return false;
-        return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
-    }
-
 }
 
 //=========================================================================
@@ -296,7 +288,7 @@ void LLPanelEnvironmentInfo::refresh()
     F32Hours dayoffset(mCurrentEnvironment->mDayOffset);
 
     if (dayoffset.value() > 12.0f)
-        dayoffset -= F32Hours(24.0);
+        dayoffset -= daylength;
 
     mSliderDayLength->setValue(daylength.value());
     mSliderDayOffset->setValue(dayoffset.value());
@@ -723,6 +715,11 @@ void LLPanelEnvironmentInfo::onSldDayLengthChanged(F32 value)
         F32Hours daylength(value);
 
         mCurrentEnvironment->mDayLength = daylength;
+        F32 offset = (F32)mSliderDayOffset->getValue().asReal();
+        if (offset <= 0.0f)
+        {
+            onSldDayOffsetChanged(offset);
+        }
         setDirtyFlag(DIRTY_FLAG_DAYLENGTH);
 
         udpateApparentTimeOfDay();
@@ -736,7 +733,8 @@ void LLPanelEnvironmentInfo::onSldDayOffsetChanged(F32 value)
         F32Hours dayoffset(value);
 
         if (dayoffset.value() <= 0.0f)
-            dayoffset += F32Hours(24.0);
+            // if day cycle is 5 hours long, we want -1h offset to result in 4h
+            dayoffset += mCurrentEnvironment->mDayLength;
 
         mCurrentEnvironment->mDayOffset = dayoffset;
         setDirtyFlag(DIRTY_FLAG_DAYOFFSET);
@@ -870,16 +868,16 @@ void LLPanelEnvironmentInfo::onBtnEdit()
             (LLFloaterEditExtDayCycle::KEY_CANMOD,      LLSD::Boolean(true)));
 
     dayeditor->openFloater(params);
+
     if (mCurrentEnvironment && mCurrentEnvironment->mDayCycle)
     {
         dayeditor->setEditDayCycle(mCurrentEnvironment->mDayCycle);
-        if (!ends_with(mCurrentEnvironment->mDayCycle->getName(), "(customized)"))
-        {
-            dayeditor->setEditName(mCurrentEnvironment->mDayCycle->getName() + "(customized)");
-        }
+        dayeditor->setEditName(mCurrentEnvironment->mDayCycleName);
     }
     else
+    {
         dayeditor->setEditDefaultDayCycle();
+    }
 }
 
 void LLPanelEnvironmentInfo::onBtnSelect()
@@ -929,7 +927,7 @@ void LLPanelEnvironmentInfo::udpateApparentTimeOfDay()
 {
     static const F32 SECONDSINDAY(24.0 * 60.0 * 60.0);
 
-    if ((!mCurrentEnvironment) || (mCurrentEnvironment->mDayLength.value() < 1.0) || (mCurrentEnvironment->mDayOffset.value() < 1.0))
+    if ((!mCurrentEnvironment) || (mCurrentEnvironment->mDayLength.value() < 1.0))
     {
         mLabelApparentTime->setVisible(false);
         return;
diff --git a/indra/newview/llpanelprofile.cpp b/indra/newview/llpanelprofile.cpp
index 9711729498..08605f7cf4 100644
--- a/indra/newview/llpanelprofile.cpp
+++ b/indra/newview/llpanelprofile.cpp
@@ -1017,7 +1017,7 @@ void LLPanelProfileSecondLife::fillCommonData(const LLAvatarData* avatar_data)
 
     setDescriptionText(avatar_data->about_text);
 
-        mSecondLifePic->setValue(avatar_data->image_id);
+    mSecondLifePic->setValue(avatar_data->image_id);
 
     if (getSelfProfile())
     {
@@ -1052,7 +1052,7 @@ void LLPanelProfileSecondLife::fillAccountStatus(const LLAvatarData* avatar_data
     std::string caption_text = getString("CaptionTextAcctInfo", args);
     getChild<LLUICtrl>("account_info")->setValue(caption_text);
 
-    const S32 LINDEN_EMPLOYEE_INDEX = 3;
+    constexpr S32 LINDEN_EMPLOYEE_INDEX = 3;
     LLDate sl_release;
     sl_release.fromYMDHMS(2003, 6, 23, 0, 0, 0);
     std::string customer_lower = avatar_data->customer_type;
@@ -1117,7 +1117,7 @@ void LLPanelProfileSecondLife::fillRightsData()
     // If true - we are viewing friend's profile, enable check boxes and set values.
     if (relation)
     {
-        S32 rights = relation->getRightsGrantedTo();
+        const S32 rights = relation->getRightsGrantedTo();
         bool can_see_online = LLRelationship::GRANT_ONLINE_STATUS & rights;
         bool can_see_on_map = LLRelationship::GRANT_MAP_LOCATION & rights;
         bool can_edit_objects = LLRelationship::GRANT_MODIFY_OBJECTS & rights;
@@ -1165,10 +1165,10 @@ void LLPanelProfileSecondLife::fillAgeData(const LLAvatarData* avatar_data)
     }
     else
     {
-    std::string register_date = getString("age_format");
-    LLSD args_age;
+        std::string register_date = getString("age_format");
+        LLSD args_age;
         args_age["[AGE]"] = LLDateUtil::ageFromDate(avatar_data->born_on, LLDate::now());
-    LLStringUtil::format(register_date, args_age);
+        LLStringUtil::format(register_date, args_age);
         userAgeCtrl->setValue(register_date);
     }
 
@@ -1247,7 +1247,7 @@ void LLPanelProfileSecondLife::setAvatarId(const LLUUID& avatar_id)
 void LLPanelProfileSecondLife::updateOnlineStatus()
 {
     const LLRelationship* relationship = LLAvatarTracker::instance().getBuddyInfo(getAvatarId());
-    if (relationship != NULL)
+    if (relationship)
     {
         // For friend let check if he allowed me to see his status
         bool online = relationship->isOnline();
@@ -1328,7 +1328,7 @@ void LLProfileImagePicker::notify(const std::vector<std::string>& filenames)
     // generate a temp texture file for coroutine
     std::string temp_file = gDirUtilp->getTempFilename();
     U32 codec = LLImageBase::getCodecFromExtension(gDirUtilp->getExtension(file_path));
-    const S32 MAX_DIM = 256;
+    constexpr S32 MAX_DIM = 256;
     if (!LLViewerTextureList::createUploadFile(file_path, temp_file, codec, MAX_DIM))
     {
         LLSD notif_args;
@@ -1611,12 +1611,12 @@ void LLPanelProfileSecondLife::onShowInSearchCallback()
     if (value == mAllowPublish)
         return;
 
-        mAllowPublish = value;
+    mAllowPublish = value;
     saveAgentUserInfoCoro("allow_publish", value);
-    }
+}
 
 void LLPanelProfileSecondLife::onHideAgeCallback()
-    {
+{
     bool value = mHideAgeCombo->getValue().asInteger();
     if (value == mHideAge)
         return;
@@ -1642,7 +1642,7 @@ void LLPanelProfileSecondLife::onDiscardDescriptionChanges()
 
 void LLPanelProfileSecondLife::onShowAgentPermissionsDialog()
 {
-    LLFloater *floater = mFloaterPermissionsHandle.get();
+    LLFloater* floater = mFloaterPermissionsHandle.get();
     if (!floater)
     {
         LLFloater* parent_floater = gFloaterView->getParentFloater(this);
@@ -1670,7 +1670,7 @@ void LLPanelProfileSecondLife::onShowAgentProfileTexture()
         return;
     }
 
-    LLFloater *floater = mFloaterProfileTextureHandle.get();
+    LLFloater* floater = mFloaterProfileTextureHandle.get();
     if (!floater)
     {
         LLFloater* parent_floater = gFloaterView->getParentFloater(this);
@@ -1765,43 +1765,47 @@ void LLPanelProfileSecondLife::onCommitProfileImage(const LLUUID& id)
     if (mSecondLifePic->getImageAssetId() == id)
         return;
 
-        std::function<void(bool)> callback = [id](bool result)
+    std::function<void(bool)> callback = [id](bool result)
+    {
+        if (result)
         {
-            if (result)
-            {
-                LLAvatarIconIDCache::getInstance()->add(gAgentID, id);
+            LLAvatarIconIDCache::getInstance()->add(gAgentID, id);
             // Should trigger callbacks in icon controls (or request Legacy)
-                LLAvatarPropertiesProcessor::getInstance()->sendAvatarPropertiesRequest(gAgentID);
-            }
-        };
+            LLAvatarPropertiesProcessor::getInstance()->sendAvatarPropertiesRequest(gAgentID);
+        }
+    };
 
     if (!saveAgentUserInfoCoro("sl_image_id", id, callback))
         return;
 
     mSecondLifePic->setValue(id);
 
-        LLFloater *floater = mFloaterProfileTextureHandle.get();
-        if (floater)
+    LLFloater* floater = mFloaterProfileTextureHandle.get();
+    if (floater)
+    {
+        LLFloaterProfileTexture* texture_view = dynamic_cast<LLFloaterProfileTexture*>(floater);
+        if (texture_view)
         {
-            LLFloaterProfileTexture * texture_view = dynamic_cast<LLFloaterProfileTexture*>(floater);
-        if (id == LLUUID::null)
+            if (id.isNull())
             {
                 texture_view->resetAsset();
             }
             else
             {
-            texture_view->loadAsset(id);
+                texture_view->loadAsset(id);
             }
         }
     }
+}
 
 //////////////////////////////////////////////////////////////////////////
 // LLPanelProfileWeb
 
 LLPanelProfileWeb::LLPanelProfileWeb()
  : LLPanelProfileTab()
- , mWebBrowser(NULL)
+ , mWebBrowser(nullptr)
  , mAvatarNameCacheConnection()
+ , mFirstNavigate(false)
 {
 }
 
diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp
index 3526004071..f083747bfe 100644
--- a/indra/newview/llreflectionmapmanager.cpp
+++ b/indra/newview/llreflectionmapmanager.cpp
@@ -80,6 +80,9 @@ void load_exr(const std::string& filename)
         gGL.getTexUnit(0)->bind(gEXRImage);
 
         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB16F, width, height, 0, GL_RGBA, GL_FLOAT, out);
+
+        LLImageGLMemory::alloc_tex_image(width, height, GL_RGB16F, 1);
+
         free(out); // release memory of image data
 
         glGenerateMipmap(GL_TEXTURE_2D);
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index c2cce8c5ec..e517d009f5 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -1667,8 +1667,9 @@ void renderOctree(LLSpatialGroup* group)
             glLineWidth(1.f);
             gGL.flush();
 
-            LLVOAvatar* lastAvatar = nullptr;
+            const LLVOAvatar* lastAvatar = nullptr;
             U64 lastMeshId = 0;
+            bool skipLastSkin = false;
 
             for (LLSpatialGroup::element_iter i = group->getDataBegin(); i != group->getDataEnd(); ++i)
             {
@@ -1697,15 +1698,9 @@ void renderOctree(LLSpatialGroup* group)
                 {
                     gGL.pushMatrix();
                     gGL.loadMatrix(gGLModelView);
-                    if (lastAvatar != face->mAvatar ||
-                        lastMeshId != face->mSkinInfo->mHash)
+                    if (!LLRenderPass::uploadMatrixPalette(face->mAvatar, face->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
                     {
-                        if (!LLRenderPass::uploadMatrixPalette(face->mAvatar, face->mSkinInfo))
-                        {
-                            continue;
-                        }
-                        lastAvatar = face->mAvatar;
-                        lastMeshId = face->mSkinInfo->mHash;
+                        continue;
                     }
                 }
                 for (S32 j = 0; j < drawable->getNumFaces(); j++)
diff --git a/indra/newview/lltextureview.cpp b/indra/newview/lltextureview.cpp
index ca3386948d..bda53f66eb 100644
--- a/indra/newview/lltextureview.cpp
+++ b/indra/newview/lltextureview.cpp
@@ -525,9 +525,9 @@ void LLGLTexMemBar::draw()
    F64 raw_image_bytes_MB = raw_image_bytes / (1024.0 * 1024.0);
    F64 saved_raw_image_bytes_MB = saved_raw_image_bytes / (1024.0 * 1024.0);
    F64 aux_raw_image_bytes_MB = aux_raw_image_bytes / (1024.0 * 1024.0);
-   F64 texture_bytes_alloc = LLImageGL::getTextureBytesAllocated() / 1024.0 / 1024.0 * 1.3333f; // add 33% for mipmaps
-   F64 vertex_bytes_alloc = LLVertexBuffer::getBytesAllocated() / 1024.0 / 1024.0;
-   F64 render_bytes_alloc = LLRenderTarget::sBytesAllocated / 1024.0 / 1024.0;
+   F64 texture_bytes_alloc = LLImageGL::getTextureBytesAllocated() / 1024.0 / 512.0;
+   F64 vertex_bytes_alloc = LLVertexBuffer::getBytesAllocated() / 1024.0 / 512.0;
+   F64 render_bytes_alloc = LLRenderTarget::sBytesAllocated / 1024.0 / 512.0;
 
     //----------------------------------------------------------------------------
     LLGLSUIDefault gls_ui;
@@ -579,7 +579,7 @@ void LLGLTexMemBar::draw()
                     texture_bytes_alloc,
                     vertex_bytes_alloc,
                     render_bytes_alloc,
-        texture_bytes_alloc+vertex_bytes_alloc+render_bytes_alloc);
+        texture_bytes_alloc+vertex_bytes_alloc);
     LLFontGL::getFontMonospace()->renderUTF8(text, 0, 0, v_offset + line_height * 6,
         text_color, LLFontGL::LEFT, LLFontGL::TOP);
 
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 8c6a38876a..9bd0973cc0 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -1502,6 +1502,7 @@ void draw_axes()
 
 void render_ui_3d()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
     LLGLSPipeline gls_pipeline;
 
     //////////////////////////////////////
@@ -1550,6 +1551,7 @@ void render_ui_3d()
 
 void render_ui_2d()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
     LLGLSUIDefault gls_ui;
 
     /////////////////////////////////////////////////////////////
diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp
index 5d33db591a..e36a03a749 100644
--- a/indra/newview/llviewerobject.cpp
+++ b/indra/newview/llviewerobject.cpp
@@ -6020,6 +6020,7 @@ bool LLViewerObject::isParticleSource() const
 
 void LLViewerObject::setParticleSource(const LLPartSysData& particle_parameters, const LLUUID& owner_id)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER;
     if (mPartSourcep)
     {
         deleteParticleSource();
@@ -6051,6 +6052,7 @@ void LLViewerObject::setParticleSource(const LLPartSysData& particle_parameters,
 
 void LLViewerObject::unpackParticleSource(const S32 block_num, const LLUUID& owner_id)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER;
     if (!mPartSourcep.isNull() && mPartSourcep->isDead())
     {
         mPartSourcep = NULL;
@@ -6086,7 +6088,7 @@ void LLViewerObject::unpackParticleSource(const S32 block_num, const LLUUID& own
             LLViewerTexture* image;
             if (mPartSourcep->mPartSysData.mPartImageID == LLUUID::null)
             {
-                image = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+                image = LLViewerFetchedTexture::sDefaultParticleImagep;
             }
             else
             {
@@ -6099,6 +6101,7 @@ void LLViewerObject::unpackParticleSource(const S32 block_num, const LLUUID& own
 
 void LLViewerObject::unpackParticleSource(LLDataPacker &dp, const LLUUID& owner_id, bool legacy)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER;
     if (!mPartSourcep.isNull() && mPartSourcep->isDead())
     {
         mPartSourcep = NULL;
@@ -6133,7 +6136,7 @@ void LLViewerObject::unpackParticleSource(LLDataPacker &dp, const LLUUID& owner_
             LLViewerTexture* image;
             if (mPartSourcep->mPartSysData.mPartImageID == LLUUID::null)
             {
-                image = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+                image = LLViewerFetchedTexture::sDefaultParticleImagep;
             }
             else
             {
diff --git a/indra/newview/llviewerpartsource.cpp b/indra/newview/llviewerpartsource.cpp
index dd6a404836..54e0470604 100644
--- a/indra/newview/llviewerpartsource.cpp
+++ b/indra/newview/llviewerpartsource.cpp
@@ -114,7 +114,7 @@ LLViewerPartSourceScript::LLViewerPartSourceScript(LLViewerObject *source_objp)
     llassert(source_objp);
     mSourceObjectp = source_objp;
     mPosAgent = mSourceObjectp->getPositionAgent();
-    mImagep = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+    mImagep = LLViewerFetchedTexture::sDefaultParticleImagep;
 
     mImagep->setAddressMode(LLTexUnit::TAM_CLAMP);
 }
@@ -619,7 +619,7 @@ void LLViewerPartSourceSpiral::update(const F32 dt)
 {
     if (!mImagep)
     {
-        mImagep = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+        mImagep = LLViewerFetchedTexture::sDefaultParticleImagep;
     }
 
     const F32 RATE = 0.025f;
@@ -789,7 +789,7 @@ void LLViewerPartSourceBeam::update(const F32 dt)
 
         if (!mImagep)
         {
-            mImagep = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+            mImagep = LLViewerFetchedTexture::sDefaultParticleImagep;
         }
 
         LLViewerPart* part = new LLViewerPart();
@@ -876,7 +876,7 @@ void LLViewerPartSourceChat::update(const F32 dt)
 {
     if (!mImagep)
     {
-        mImagep = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+        mImagep = LLViewerFetchedTexture::sDefaultParticleImagep;
     }
 
 
diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp
index c98bd9b7e2..5ddc00f3a0 100644
--- a/indra/newview/llviewershadermgr.cpp
+++ b/indra/newview/llviewershadermgr.cpp
@@ -158,6 +158,7 @@ LLGLSLShader            gDeferredMultiLightProgram[16];
 LLGLSLShader            gDeferredSpotLightProgram;
 LLGLSLShader            gDeferredMultiSpotLightProgram;
 LLGLSLShader            gDeferredSunProgram;
+LLGLSLShader            gDeferredSunProbeProgram;
 LLGLSLShader            gHazeProgram;
 LLGLSLShader            gHazeWaterProgram;
 LLGLSLShader            gDeferredBlurLightProgram;
@@ -200,6 +201,7 @@ LLGLSLShader            gExposureProgram;
 LLGLSLShader            gExposureProgramNoFade;
 LLGLSLShader            gLuminanceProgram;
 LLGLSLShader            gFXAAProgram;
+LLGLSLShader            gCASProgram;
 LLGLSLShader            gDeferredPostNoDoFProgram;
 LLGLSLShader            gDeferredWLSkyProgram;
 LLGLSLShader            gEnvironmentMapProgram;
@@ -400,6 +402,7 @@ void LLViewerShaderMgr::finalizeShaderList()
     mShaderList.push_back(&gObjectAlphaMaskNoColorProgram);
     mShaderList.push_back(&gUnderWaterProgram);
     mShaderList.push_back(&gDeferredSunProgram);
+    mShaderList.push_back(&gDeferredSunProbeProgram);
     mShaderList.push_back(&gHazeProgram);
     mShaderList.push_back(&gHazeWaterProgram);
     mShaderList.push_back(&gDeferredSoftenProgram);
@@ -1607,10 +1610,7 @@ bool LLViewerShaderMgr::loadShadersDeferred()
     if (success)
     {
         std::string fragment;
-        std::string vertex = "deferred/sunLightV.glsl";
-
         bool use_ao = gSavedSettings.getBOOL("RenderDeferredSSAO");
-
         if (use_ao)
         {
             fragment = "deferred/sunLightSSAOF.glsl";
@@ -1618,10 +1618,6 @@ bool LLViewerShaderMgr::loadShadersDeferred()
         else
         {
             fragment = "deferred/sunLightF.glsl";
-            if (mShaderLevel[SHADER_DEFERRED] == 1)
-            { //no shadows, no SSAO, no frag coord
-                vertex = "deferred/sunLightNoFragCoordV.glsl";
-            }
         }
 
         gDeferredSunProgram.mName = "Deferred Sun Shader";
@@ -1630,7 +1626,7 @@ bool LLViewerShaderMgr::loadShadersDeferred()
         gDeferredSunProgram.mFeatures.hasAmbientOcclusion = use_ao;
 
         gDeferredSunProgram.mShaderFiles.clear();
-        gDeferredSunProgram.mShaderFiles.push_back(make_pair(vertex, GL_VERTEX_SHADER));
+        gDeferredSunProgram.mShaderFiles.push_back(make_pair("deferred/sunLightV.glsl", GL_VERTEX_SHADER));
         gDeferredSunProgram.mShaderFiles.push_back(make_pair(fragment, GL_FRAGMENT_SHADER));
         gDeferredSunProgram.mShaderLevel = mShaderLevel[SHADER_DEFERRED];
 
@@ -1640,6 +1636,21 @@ bool LLViewerShaderMgr::loadShadersDeferred()
 
     if (success)
     {
+        gDeferredSunProbeProgram.mName = "Deferred Sun Probe Shader";
+        gDeferredSunProbeProgram.mFeatures.isDeferred = true;
+        gDeferredSunProbeProgram.mFeatures.hasShadows = true;
+
+        gDeferredSunProbeProgram.mShaderFiles.clear();
+        gDeferredSunProbeProgram.mShaderFiles.push_back(make_pair("deferred/sunLightV.glsl", GL_VERTEX_SHADER));
+        gDeferredSunProbeProgram.mShaderFiles.push_back(make_pair("deferred/sunLightF.glsl", GL_FRAGMENT_SHADER));
+        gDeferredSunProbeProgram.mShaderLevel = mShaderLevel[SHADER_DEFERRED];
+
+        success = gDeferredSunProbeProgram.createShader();
+        llassert(success);
+    }
+
+    if (success)
+    {
         gDeferredBlurLightProgram.mName = "Deferred Blur Light Shader";
         gDeferredBlurLightProgram.mFeatures.isDeferred = true;
 
@@ -2341,6 +2352,16 @@ bool LLViewerShaderMgr::loadShadersDeferred()
         gFXAAProgram.mShaderFiles.clear();
         gFXAAProgram.mShaderFiles.push_back(make_pair("deferred/postDeferredV.glsl", GL_VERTEX_SHADER));
         gFXAAProgram.mShaderFiles.push_back(make_pair("deferred/fxaaF.glsl", GL_FRAGMENT_SHADER));
+
+        if (gGLManager.mGLVersion > 3.9)
+        {
+            gFXAAProgram.addPermutation("FXAA_GLSL_400", "1");
+        }
+        else
+        {
+            gFXAAProgram.addPermutation("FXAA_GLSL_130", "1");
+        }
+
         gFXAAProgram.mShaderLevel = mShaderLevel[SHADER_DEFERRED];
         success = gFXAAProgram.createShader();
         llassert(success);
@@ -2348,6 +2369,17 @@ bool LLViewerShaderMgr::loadShadersDeferred()
 
     if (success)
     {
+        gCASProgram.mName = "Contrast Adaptive Sharpening Shader";
+        gCASProgram.mFeatures.hasSrgb = true;
+        gCASProgram.mShaderFiles.clear();
+        gCASProgram.mShaderFiles.push_back(make_pair("deferred/postDeferredNoTCV.glsl", GL_VERTEX_SHADER));
+        gCASProgram.mShaderFiles.push_back(make_pair("deferred/CASF.glsl", GL_FRAGMENT_SHADER));
+        gCASProgram.mShaderLevel = mShaderLevel[SHADER_DEFERRED];
+        gCASProgram.createShader();
+    }
+
+    if (success)
+    {
         gDeferredPostProgram.mName = "Deferred Post Shader";
         gDeferredPostProgram.mFeatures.isDeferred = true;
         gDeferredPostProgram.mShaderFiles.clear();
diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h
index af47014a43..b858530c3e 100644
--- a/indra/newview/llviewershadermgr.h
+++ b/indra/newview/llviewershadermgr.h
@@ -230,6 +230,7 @@ extern LLGLSLShader         gDeferredMultiLightProgram[LL_DEFERRED_MULTI_LIGHT_C
 extern LLGLSLShader         gDeferredSpotLightProgram;
 extern LLGLSLShader         gDeferredMultiSpotLightProgram;
 extern LLGLSLShader         gDeferredSunProgram;
+extern LLGLSLShader         gDeferredSunProbeProgram;
 extern LLGLSLShader         gHazeProgram;
 extern LLGLSLShader         gHazeWaterProgram;
 extern LLGLSLShader         gDeferredBlurLightProgram;
@@ -245,6 +246,7 @@ extern LLGLSLShader         gDeferredPostProgram;
 extern LLGLSLShader         gDeferredCoFProgram;
 extern LLGLSLShader         gDeferredDoFCombineProgram;
 extern LLGLSLShader         gFXAAProgram;
+extern LLGLSLShader         gCASProgram;
 extern LLGLSLShader         gDeferredPostNoDoFProgram;
 extern LLGLSLShader         gDeferredPostGammaCorrectProgram;
 extern LLGLSLShader         gNoPostGammaCorrectProgram;
diff --git a/indra/newview/llviewerstats.cpp b/indra/newview/llviewerstats.cpp
index d1ee9ea17c..3641738d9d 100644
--- a/indra/newview/llviewerstats.cpp
+++ b/indra/newview/llviewerstats.cpp
@@ -137,7 +137,6 @@ LLTrace::CountStatHandle<>  FPS("FPS", "Frames rendered"),
                             UPLOAD_TEXTURE("uploadtexture", "Textures uploaded"),
                             EDIT_TEXTURE("edittexture", "Changes to textures on objects"),
                             KILLED("killed", "Number of times killed"),
-                            FRAMETIME_DOUBLED("frametimedoubled", "Ratio of frames 2x longer than previous"),
                             TEX_BAKES("texbakes", "Number of times avatar textures have been baked"),
                             TEX_REBAKES("texrebakes", "Number of times avatar textures have been forced to rebake"),
                             NUM_NEW_OBJECTS("numnewobjectsstat", "Number of objects in scene that were not previously in cache");
@@ -157,11 +156,6 @@ LLTrace::CountStatHandle<F64Kilobytes >
                             MESSAGE_SYSTEM_DATA_IN("messagedatain", "Incoming message system network data"),
                             MESSAGE_SYSTEM_DATA_OUT("messagedataout", "Outgoing message system network data");
 
-LLTrace::CountStatHandle<F64Seconds >
-                            SIM_20_FPS_TIME("sim20fpstime", "Seconds with sim FPS below 20"),
-                            SIM_PHYSICS_20_FPS_TIME("simphysics20fpstime", "Seconds with physics FPS below 20"),
-                            LOSS_5_PERCENT_TIME("loss5percenttime", "Seconds with packet loss > 5%");
-
 SimMeasurement<>            SIM_TIME_DILATION("simtimedilation", "Simulator time scale", LL_SIM_STAT_TIME_DILATION),
                             SIM_FPS("simfps", "Simulator framerate", LL_SIM_STAT_FPS),
                             SIM_PHYSICS_FPS("simphysicsfps", "Simulator physics framerate", LL_SIM_STAT_PHYSFPS),
@@ -205,9 +199,6 @@ static LLTrace::SampleStatHandle<bool>
                             CHAT_BUBBLES("chatbubbles", "Chat Bubbles Enabled");
 
 LLTrace::SampleStatHandle<F64Megabytes > FORMATTED_MEM("formattedmemstat");
-LLTrace::SampleStatHandle<F64Kilobytes >    DELTA_BANDWIDTH("deltabandwidth", "Increase/Decrease in bandwidth based on packet loss"),
-                                                            MAX_BANDWIDTH("maxbandwidth", "Max bandwidth setting");
-
 
 SimMeasurement<F64Milliseconds >    SIM_FRAME_TIME("simframemsec", "", LL_SIM_STAT_FRAMEMS),
                                                     SIM_NET_TIME("simnetmsec", "", LL_SIM_STAT_NETMS),
@@ -228,7 +219,6 @@ SimMeasurement<F64Kilobytes >   SIM_UNACKED_BYTES("simtotalunackedbytes", "", LL
 SimMeasurement<F64Megabytes >   SIM_PHYSICS_MEM("physicsmemoryallocated", "", LL_SIM_STAT_SIMPHYSICSMEMORY);
 
 LLTrace::SampleStatHandle<F64Milliseconds > FRAMETIME_JITTER("frametimejitter", "Average delta between successive frame times"),
-                                            FRAMETIME_SLEW("frametimeslew", "Average delta between frame time and mean"),
                                             FRAMETIME("frametime", "Measured frame time"),
                                             SIM_PING("simpingstat");
 
@@ -246,10 +236,7 @@ LLTrace::EventStatHandle<F64Milliseconds >  REGION_CROSSING_TIME("regioncrossing
 
 LLTrace::EventStatHandle<F64Seconds >   AVATAR_EDIT_TIME("avataredittime", "Seconds in Edit Appearance"),
                                                             TOOLBOX_TIME("toolboxtime", "Seconds using Toolbox"),
-                                                            MOUSELOOK_TIME("mouselooktime", "Seconds in Mouselook"),
-                                                            FPS_10_TIME("fps10time", "Seconds below 10 FPS"),
-                                                            FPS_8_TIME("fps8time", "Seconds below 8 FPS"),
-                                                            FPS_2_TIME("fps2time", "Seconds below 2 FPS");
+                                                            MOUSELOOK_TIME("mouselooktime", "Seconds in Mouselook");
 
 LLTrace::EventStatHandle<LLUnit<F32, LLUnits::Percent> > OBJECT_CACHE_HIT_RATE("object_cache_hits");
 
@@ -279,55 +266,12 @@ void LLViewerStats::resetStats()
 
 void LLViewerStats::updateFrameStats(const F64Seconds time_diff)
 {
-    if (getRecording().getLastValue(LLStatViewer::PACKETS_LOST_PERCENT) > F32Percent(5.0))
-    {
-        add(LLStatViewer::LOSS_5_PERCENT_TIME, time_diff);
-    }
-
-    F32 sim_fps = (F32)getRecording().getLastValue(LLStatViewer::SIM_FPS);
-    if (0.f < sim_fps && sim_fps < 20.f)
-    {
-        add(LLStatViewer::SIM_20_FPS_TIME, time_diff);
-    }
-
-    F32 sim_physics_fps = (F32)getRecording().getLastValue(LLStatViewer::SIM_PHYSICS_FPS);
-
-    if (0.f < sim_physics_fps && sim_physics_fps < 20.f)
-    {
-        add(LLStatViewer::SIM_PHYSICS_20_FPS_TIME, time_diff);
-    }
-
-    if (time_diff >= (F64Seconds)0.5)
-    {
-        record(LLStatViewer::FPS_2_TIME, time_diff);
-    }
-    if (time_diff >= (F64Seconds)0.125)
-    {
-        record(LLStatViewer::FPS_8_TIME, time_diff);
-    }
-    if (time_diff >= (F64Seconds)0.1)
-    {
-        record(LLStatViewer::FPS_10_TIME, time_diff);
-    }
-
     if (gFrameCount && mLastTimeDiff > (F64Seconds)0.0)
     {
-        // new "stutter" meter
-        add(LLStatViewer::FRAMETIME_DOUBLED, time_diff >= 2.0 * mLastTimeDiff ? 1 : 0);
-
         sample(LLStatViewer::FRAMETIME, time_diff);
-
         // old stats that were never really used
-        F64Seconds jit = (F64Seconds) std::fabs((mLastTimeDiff - time_diff));
+        F64Seconds jit = (F64Seconds)std::fabs((mLastTimeDiff - time_diff));
         sample(LLStatViewer::FRAMETIME_JITTER, jit);
-
-        F32Seconds average_frametime = gRenderStartTime.getElapsedTimeF32() / (F32)gFrameCount;
-        sample(LLStatViewer::FRAMETIME_SLEW, F64Milliseconds (average_frametime - time_diff));
-
-        F32 max_bandwidth = gViewerThrottle.getMaxBandwidth();
-        F32 delta_bandwidth = gViewerThrottle.getCurrentBandwidth() - max_bandwidth;
-        sample(LLStatViewer::DELTA_BANDWIDTH, F64Bits(delta_bandwidth));
-        sample(LLStatViewer::MAX_BANDWIDTH, F64Bits(max_bandwidth));
     }
 
     mLastTimeDiff = time_diff;
diff --git a/indra/newview/llviewerstats.h b/indra/newview/llviewerstats.h
index a857af53cc..8aed1c537e 100644
--- a/indra/newview/llviewerstats.h
+++ b/indra/newview/llviewerstats.h
@@ -135,7 +135,6 @@ extern LLTrace::CountStatHandle<>           FPS,
                                             UPLOAD_TEXTURE,
                                             EDIT_TEXTURE,
                                             KILLED,
-                                            FRAMETIME_DOUBLED,
                                             TEX_BAKES,
                                             TEX_REBAKES,
                                             NUM_NEW_OBJECTS;
@@ -150,10 +149,6 @@ extern LLTrace::CountStatHandle<F64Kilobytes >  ACTIVE_MESSAGE_DATA_RECEIVED,
                                                                     MESSAGE_SYSTEM_DATA_IN,
                                                                     MESSAGE_SYSTEM_DATA_OUT;
 
-extern LLTrace::CountStatHandle<F64Seconds >        SIM_20_FPS_TIME,
-                                                                    SIM_PHYSICS_20_FPS_TIME,
-                                                                    LOSS_5_PERCENT_TIME;
-
 extern SimMeasurement<>                     SIM_TIME_DILATION,
                                             SIM_FPS,
                                             SIM_PHYSICS_FPS,
@@ -194,8 +189,6 @@ extern LLTrace::SampleStatHandle<LLUnit<F32, LLUnits::Percent> > PACKETS_LOST_PE
 
 extern LLTrace::SampleStatHandle<F64Megabytes > FORMATTED_MEM;
 
-extern LLTrace::SampleStatHandle<F64Kilobytes > DELTA_BANDWIDTH,
-                                                                    MAX_BANDWIDTH;
 extern SimMeasurement<F64Milliseconds > SIM_FRAME_TIME,
                                                             SIM_NET_TIME,
                                                             SIM_OTHER_TIME,
@@ -216,7 +209,6 @@ extern SimMeasurement<F64Megabytes >    SIM_PHYSICS_MEM;
 
 
 extern LLTrace::SampleStatHandle<F64Milliseconds >  FRAMETIME_JITTER,
-                                                    FRAMETIME_SLEW,
                                                     SIM_PING;
 
 extern LLTrace::EventStatHandle<LLUnit<F64, LLUnits::Meters> > AGENT_POSITION_SNAP;
@@ -233,10 +225,7 @@ extern LLTrace::EventStatHandle<F64Milliseconds >   REGION_CROSSING_TIME,
 
 extern LLTrace::EventStatHandle<F64Seconds >    AVATAR_EDIT_TIME,
                                                                 TOOLBOX_TIME,
-                                                                MOUSELOOK_TIME,
-                                                                FPS_10_TIME,
-                                                                FPS_8_TIME,
-                                                                FPS_2_TIME;
+                                                                MOUSELOOK_TIME;
 
 extern LLTrace::EventStatHandle<LLUnit<F32, LLUnits::Percent> > OBJECT_CACHE_HIT_RATE;
 
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index 452d6f2c04..bb16fec32a 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -70,6 +70,7 @@ LLPointer<LLViewerTexture>        LLViewerTexture::sBlackImagep = nullptr;
 LLPointer<LLViewerTexture>        LLViewerTexture::sCheckerBoardImagep = nullptr;
 LLPointer<LLViewerFetchedTexture> LLViewerFetchedTexture::sMissingAssetImagep = nullptr;
 LLPointer<LLViewerFetchedTexture> LLViewerFetchedTexture::sWhiteImagep = nullptr;
+LLPointer<LLViewerFetchedTexture> LLViewerFetchedTexture::sDefaultParticleImagep = nullptr;
 LLPointer<LLViewerFetchedTexture> LLViewerFetchedTexture::sDefaultImagep = nullptr;
 LLPointer<LLViewerFetchedTexture> LLViewerFetchedTexture::sSmokeImagep = nullptr;
 LLPointer<LLViewerFetchedTexture> LLViewerFetchedTexture::sFlatNormalImagep = nullptr;
@@ -497,11 +498,10 @@ void LLViewerTexture::updateClass()
 
     F64 texture_bytes_alloc = LLImageGL::getTextureBytesAllocated() / 1024.0 / 512.0;
     F64 vertex_bytes_alloc = LLVertexBuffer::getBytesAllocated() / 1024.0 / 512.0;
-    F64 render_bytes_alloc = LLRenderTarget::sBytesAllocated / 1024.0 / 512.0;
 
     // get an estimate of how much video memory we're using
     // NOTE: our metrics miss about half the vram we use, so this biases high but turns out to typically be within 5% of the real number
-    F32 used = (F32)ll_round(texture_bytes_alloc + vertex_bytes_alloc + render_bytes_alloc);
+    F32 used = (F32)ll_round(texture_bytes_alloc + vertex_bytes_alloc);
 
     F32 budget = max_vram_budget == 0 ? (F32)gGLManager.mVRAM : (F32)max_vram_budget;
 
diff --git a/indra/newview/llviewertexture.h b/indra/newview/llviewertexture.h
index 3866c898a6..65fa633f81 100644
--- a/indra/newview/llviewertexture.h
+++ b/indra/newview/llviewertexture.h
@@ -507,6 +507,7 @@ public:
     static LLPointer<LLViewerFetchedTexture> sDefaultImagep; // "Default" texture for error cases, the only case of fetched texture which is generated in local.
     static LLPointer<LLViewerFetchedTexture> sFlatNormalImagep; // Flat normal map denoting no bumpiness on a surface
     static LLPointer<LLViewerFetchedTexture> sDefaultIrradiancePBRp; // PBR: irradiance
+    static LLPointer<LLViewerFetchedTexture> sDefaultParticleImagep; // Default particle texture
 
     // not sure why, but something is iffy about the loading of this particular texture, use the accessor instead of accessing directly
     static LLPointer<LLViewerFetchedTexture> sSmokeImagep; // Old "Default" translucent texture
diff --git a/indra/newview/llviewertexturelist.cpp b/indra/newview/llviewertexturelist.cpp
index a5700ab264..2df60dbf6c 100644
--- a/indra/newview/llviewertexturelist.cpp
+++ b/indra/newview/llviewertexturelist.cpp
@@ -123,6 +123,9 @@ void LLViewerTextureList::doPreloadImages()
     LLTexUnit::sWhiteTexture = LLViewerFetchedTexture::sWhiteImagep->getTexName();
     LLUIImageList* image_list = LLUIImageList::getInstance();
 
+    // Set default particle texture
+    LLViewerFetchedTexture::sDefaultParticleImagep = LLViewerTextureManager::getFetchedTextureFromFile("pixiesmall.j2c");
+
     // Set the default flat normal map
     // BLANK_OBJECT_NORMAL has a version on dataserver, but it has compression artifacts
     LLViewerFetchedTexture::sFlatNormalImagep =
@@ -403,6 +406,7 @@ LLViewerFetchedTexture* LLViewerTextureList::getImageFromFile(const std::string&
                                                    const LLUUID& force_id)
 {
     LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+    LL_PROFILE_ZONE_TEXT(filename.c_str(), filename.size());
     if(!mInitialized)
     {
         return NULL ;
@@ -885,79 +889,90 @@ void LLViewerTextureList::updateImageDecodePriority(LLViewerFetchedTexture* imag
 {
     llassert(!gCubeSnapshot);
 
-    static LLCachedControl<F32> bias_distance_scale(gSavedSettings, "TextureBiasDistanceScale", 1.f);
-    static LLCachedControl<F32> texture_scale_min(gSavedSettings, "TextureScaleMinAreaFactor", 0.04f);
-    static LLCachedControl<F32> texture_scale_max(gSavedSettings, "TextureScaleMaxAreaFactor", 25.f);
+    if (imagep->getBoostLevel() < LLViewerFetchedTexture::BOOST_HIGH)  // don't bother checking face list for boosted textures
+    {
+        static LLCachedControl<F32> bias_distance_scale(gSavedSettings, "TextureBiasDistanceScale", 1.f);
+        static LLCachedControl<F32> texture_scale_min(gSavedSettings, "TextureScaleMinAreaFactor", 0.04f);
+        static LLCachedControl<F32> texture_scale_max(gSavedSettings, "TextureScaleMaxAreaFactor", 25.f);
 
+        F32 max_vsize = 0.f;
+        bool on_screen = false;
 
-    F32 max_vsize = 0.f;
-    bool on_screen = false;
+        U32 face_count = 0;
 
-    LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
-    for (U32 i = 0; i < LLRender::NUM_TEXTURE_CHANNELS; ++i)
-    {
-        for (S32 fi = 0; fi < imagep->getNumFaces(i); ++fi)
-        {
-            LLFace* face = (*(imagep->getFaceList(i)))[fi];
+        F32 bias = (F32) llroundf(powf(4, LLViewerTexture::sDesiredDiscardBias - 1.f));
 
-            if (face && face->getViewerObject())
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
+        for (U32 i = 0; i < LLRender::NUM_TEXTURE_CHANNELS; ++i)
+        {
+            for (S32 fi = 0; fi < imagep->getNumFaces(i); ++fi)
             {
-                F32 radius;
-                F32 cos_angle_to_view_dir;
-                static LLCachedControl<F32> bias_unimportant_threshold(gSavedSettings, "TextureBiasUnimportantFactor", 0.25f);
-                F32 vsize = face->getPixelArea();
-                bool in_frustum = face->calcPixelArea(cos_angle_to_view_dir, radius);
-
-                on_screen = in_frustum;
-
-                // Scale desired texture resolution higher or lower depending on texture scale
-                //
-                // Minimum usage examples: a 1024x1024 texture with aplhabet, runing string
-                // shows one letter at a time
-                //
-                // Maximum usage examples: huge chunk of terrain repeats texture
-                S32 te_offset = face->getTEOffset();  // offset is -1 if not inited
-                LLViewerObject* objp = face->getViewerObject();
-                const LLTextureEntry* te = (te_offset < 0 || te_offset >= objp->getNumTEs()) ? nullptr : objp->getTE(te_offset);
-                F32 min_scale = te ? llmin(fabsf(te->getScaleS()), fabsf(te->getScaleT())) : 1.f;
-                min_scale = llclamp(min_scale * min_scale, texture_scale_min(), texture_scale_max());
-                vsize /= min_scale;
-
-                // if bias is > 2, apply to on-screen textures as well
-                bool apply_bias = LLViewerTexture::sDesiredDiscardBias > 2.f;
-
-                // apply bias to off screen objects or objects that are small on screen all the time
-                if (!in_frustum || !face->getDrawable()->isVisible() || face->getImportanceToCamera() < bias_unimportant_threshold)
-                { // further reduce by discard bias when off screen or occluded
-                    apply_bias = true;
-                }
+                LLFace* face = (*(imagep->getFaceList(i)))[fi];
 
-                if (apply_bias)
+                if (face && face->getViewerObject())
                 {
-                    F32 bias = powf(4, LLViewerTexture::sDesiredDiscardBias - 1.f);
-                    bias = (F32) llround(bias);
-                    vsize /= bias;
+                    ++face_count;
+                    F32 radius;
+                    F32 cos_angle_to_view_dir;
+                    static LLCachedControl<F32> bias_unimportant_threshold(gSavedSettings, "TextureBiasUnimportantFactor", 0.25f);
+
+                    if ((gFrameCount - face->mLastTextureUpdate) > 10)
+                    { // only call calcPixelArea at most once every 10 frames for a given face
+                        // this helps eliminate redundant calls to calcPixelArea for faces that have multiple textures
+                        // assigned to them, such as is the case with GLTF materials or Blinn-Phong materials
+                        face->mInFrustum = face->calcPixelArea(cos_angle_to_view_dir, radius);
+                        face->mLastTextureUpdate = gFrameCount;
+                    }
+
+                    F32 vsize = face->getPixelArea();
+
+                    on_screen = face->mInFrustum;
+
+                    // Scale desired texture resolution higher or lower depending on texture scale
+                    //
+                    // Minimum usage examples: a 1024x1024 texture with aplhabet, runing string
+                    // shows one letter at a time
+                    //
+                    // Maximum usage examples: huge chunk of terrain repeats texture
+                    // TODO: make this work with the GLTF texture transforms
+                    S32 te_offset = face->getTEOffset();  // offset is -1 if not inited
+                    LLViewerObject* objp = face->getViewerObject();
+                    const LLTextureEntry* te = (te_offset < 0 || te_offset >= objp->getNumTEs()) ? nullptr : objp->getTE(te_offset);
+                    F32 min_scale = te ? llmin(fabsf(te->getScaleS()), fabsf(te->getScaleT())) : 1.f;
+                    min_scale = llclamp(min_scale * min_scale, texture_scale_min(), texture_scale_max());
+                    vsize /= min_scale;
+
+                    // apply bias to offscreen faces all the time, but only to onscreen faces when bias is large
+                    if (!face->mInFrustum || LLViewerTexture::sDesiredDiscardBias > 2.f)
+                    {
+                        vsize /= bias;
+                    }
+
+                    max_vsize = llmax(max_vsize, vsize);
                 }
-
-                max_vsize = llmax(max_vsize, vsize);
             }
         }
-    }
 
-    if (imagep->getType() == LLViewerTexture::LOD_TEXTURE && imagep->getBoostLevel() == LLViewerTexture::BOOST_NONE)
-    { // conditionally reset max virtual size for unboosted LOD_TEXTURES
-      // this is an alternative to decaying mMaxVirtualSize over time
-      // that keeps textures from continously downrezzing and uprezzing in the background
-
-        if (LLViewerTexture::sDesiredDiscardBias > 2.f ||
-            (!on_screen && LLViewerTexture::sDesiredDiscardBias > 1.f))
-        {
-            imagep->mMaxVirtualSize = 0.f;
+        if (face_count > 1024)
+        { // this texture is used in so many places we should just boost it and not bother checking its vsize
+            // this is especially important because the above is not time sliced and can hit multiple ms for a single texture
+            imagep->setBoostLevel(LLViewerFetchedTexture::BOOST_HIGH);
         }
-    }
 
+        if (imagep->getType() == LLViewerTexture::LOD_TEXTURE && imagep->getBoostLevel() == LLViewerTexture::BOOST_NONE)
+        { // conditionally reset max virtual size for unboosted LOD_TEXTURES
+          // this is an alternative to decaying mMaxVirtualSize over time
+          // that keeps textures from continously downrezzing and uprezzing in the background
+
+            if (LLViewerTexture::sDesiredDiscardBias > 2.f ||
+                (!on_screen && LLViewerTexture::sDesiredDiscardBias > 1.f))
+            {
+                imagep->mMaxVirtualSize = 0.f;
+            }
+        }
 
-    imagep->addTextureStats(max_vsize);
+        imagep->addTextureStats(max_vsize);
+    }
 
 #if 0
     imagep->setDebugText(llformat("%d/%d - %d/%d -- %d/%d",
diff --git a/indra/newview/llvoavatarself.cpp b/indra/newview/llvoavatarself.cpp
index 49859bb585..746ef7cacb 100644
--- a/indra/newview/llvoavatarself.cpp
+++ b/indra/newview/llvoavatarself.cpp
@@ -775,6 +775,7 @@ void LLVOAvatarSelf::updateVisualParams()
 
 void LLVOAvatarSelf::writeWearablesToAvatar()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
     for (U32 type = 0; type < LLWearableType::WT_COUNT; type++)
     {
         LLWearable *wearable = gAgentWearables.getTopWearable((LLWearableType::EType)type);
diff --git a/indra/newview/llvoicechannel.cpp b/indra/newview/llvoicechannel.cpp
index eb1cd00940..55769f567b 100644
--- a/indra/newview/llvoicechannel.cpp
+++ b/indra/newview/llvoicechannel.cpp
@@ -328,6 +328,16 @@ void LLVoiceChannel::setState(EState state)
 
 void LLVoiceChannel::doSetState(const EState& new_state)
 {
+    LL_DEBUGS("Voice") << "session '" << mSessionName << "' state " << mState << ", new_state " << new_state << ": "
+        << (new_state == STATE_ERROR ? "ERROR" :
+            new_state == STATE_HUNG_UP ? "HUNG_UP" :
+            new_state == STATE_READY ? "READY" :
+            new_state == STATE_CALL_STARTED ? "CALL_STARTED" :
+            new_state == STATE_RINGING ? "RINGING" :
+            new_state == STATE_CONNECTED ? "CONNECTED" :
+            "NO_INFO")
+        << LL_ENDL;
+
     EState old_state = mState;
     mState = new_state;
 
diff --git a/indra/newview/llvoicevisualizer.cpp b/indra/newview/llvoicevisualizer.cpp
index 305fd77126..9412136272 100644
--- a/indra/newview/llvoicevisualizer.cpp
+++ b/indra/newview/llvoicevisualizer.cpp
@@ -342,6 +342,8 @@ void LLVoiceVisualizer::render()
         return;
     }
 
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
+
     if ( mSoundSymbol.mActive )
     {
         mPreviousTime = mCurrentTime;
diff --git a/indra/newview/llvoicevivox.cpp b/indra/newview/llvoicevivox.cpp
index 2358c04236..d2a8b4e5cf 100644
--- a/indra/newview/llvoicevivox.cpp
+++ b/indra/newview/llvoicevivox.cpp
@@ -6459,7 +6459,6 @@ LLVivoxVoiceClient::voiceFontEntry::voiceFontEntry(LLUUID& id) :
     mIsNew(false)
 {
     mExpiryTimer.stop();
-    mExpiryWarningTimer.stop();
 }
 
 LLVivoxVoiceClient::voiceFontEntry::~voiceFontEntry()
@@ -6570,20 +6569,6 @@ void LLVivoxVoiceClient::addVoiceFont(const S32 font_index,
             font->mExpiryTimer.start();
             font->mExpiryTimer.setExpiryAt(expiration_date.secondsSinceEpoch() - VOICE_FONT_EXPIRY_INTERVAL);
 
-            // Set the warning timer to some interval before actual expiry.
-            S32 warning_time = gSavedSettings.getS32("VoiceEffectExpiryWarningTime");
-            if (warning_time != 0)
-            {
-                font->mExpiryWarningTimer.start();
-                F64 expiry_time = (expiration_date.secondsSinceEpoch() - (F64)warning_time);
-                font->mExpiryWarningTimer.setExpiryAt(expiry_time - VOICE_FONT_EXPIRY_INTERVAL);
-            }
-            else
-            {
-                // Disable the warning timer.
-                font->mExpiryWarningTimer.stop();
-            }
-
              // Only flag new session fonts after the first time we have fetched the list.
             if (mVoiceFontsReceived)
             {
@@ -6625,7 +6610,6 @@ void LLVivoxVoiceClient::expireVoiceFonts()
     // than checking each font individually.
 
     bool have_expired = false;
-    bool will_expire = false;
     bool expired_in_use = false;
 
     LLUUID current_effect = LLVoiceClient::instance().getVoiceEffectDefault();
@@ -6635,7 +6619,6 @@ void LLVivoxVoiceClient::expireVoiceFonts()
     {
         voiceFontEntry* voice_font = iter->second;
         LLFrameTimer& expiry_timer  = voice_font->mExpiryTimer;
-        LLFrameTimer& warning_timer = voice_font->mExpiryWarningTimer;
 
         // Check for expired voice fonts
         if (expiry_timer.getStarted() && expiry_timer.hasExpired())
@@ -6652,14 +6635,6 @@ void LLVivoxVoiceClient::expireVoiceFonts()
             deleteVoiceFont(voice_font->mID);
             have_expired = true;
         }
-
-        // Check for voice fonts that will expire in less that the warning time
-        if (warning_timer.getStarted() && warning_timer.hasExpired())
-        {
-            LL_DEBUGS("VoiceFont") << "Voice Font " << voice_font->mName << " will expire soon." << LL_ENDL;
-            will_expire = true;
-            warning_timer.stop();
-        }
     }
 
     LLSD args;
@@ -6681,15 +6656,6 @@ void LLVivoxVoiceClient::expireVoiceFonts()
         // Refresh voice font lists in the UI.
         notifyVoiceFontObservers();
     }
-
-    // Give a warning notification if any voice fonts are due to expire.
-    if (will_expire)
-    {
-        S32Seconds seconds(gSavedSettings.getS32("VoiceEffectExpiryWarningTime"));
-        args["INTERVAL"] = llformat("%d", LLUnit<S32, LLUnits::Days>(seconds).value());
-
-        LLNotificationsUtil::add("VoiceEffectsWillExpire", args);
-    }
 }
 
 void LLVivoxVoiceClient::deleteVoiceFont(const LLUUID& id)
diff --git a/indra/newview/llvoicevivox.h b/indra/newview/llvoicevivox.h
index 7862e492b2..3167705528 100644
--- a/indra/newview/llvoicevivox.h
+++ b/indra/newview/llvoicevivox.h
@@ -880,7 +880,6 @@ private:
         bool        mIsNew;
 
         LLFrameTimer    mExpiryTimer;
-        LLFrameTimer    mExpiryWarningTimer;
     };
 
     bool mVoiceFontsReceived;
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 669ccb0924..7da4358f86 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -3219,6 +3219,7 @@ void LLVOVolume::updateSpotLightPriority()
     {
         return;
     }
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME;
 
     F32 r = getLightRadius();
     LLVector3 pos = mDrawable->getPositionAgent();
diff --git a/indra/newview/llworld.cpp b/indra/newview/llworld.cpp
index 40bfa8ec83..e0d8c25731 100644
--- a/indra/newview/llworld.cpp
+++ b/indra/newview/llworld.cpp
@@ -768,6 +768,7 @@ void LLWorld::updateParticles()
 
 void LLWorld::renderPropertyLines()
 {
+    LL_PROFILE_ZONE_SCOPED;
     for (region_list_t::iterator iter = mVisibleRegionList.begin();
          iter != mVisibleRegionList.end(); ++iter)
     {
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 98fdb68222..ce43982f35 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -117,6 +117,17 @@
 #include "llenvironment.h"
 #include "llsettingsvo.h"
 
+#ifndef LL_WINDOWS
+#define A_GCC 1
+#pragma GCC diagnostic ignored "-Wunused-function"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#if LL_LINUX
+#pragma GCC diagnostic ignored "-Wrestrict"
+#endif
+#endif
+#define A_CPU 1
+#include "app_settings/shaders/class1/deferred/CASF.glsl" // This is also C++
+
 extern bool gSnapshot;
 bool gShiftFrame = false;
 
@@ -4521,33 +4532,51 @@ void LLPipeline::renderDebug()
     }
 
     // Debug stuff.
-    for (LLWorld::region_list_t::const_iterator iter = LLWorld::getInstance()->getRegionList().begin();
-            iter != LLWorld::getInstance()->getRegionList().end(); ++iter)
-    {
-        LLViewerRegion* region = *iter;
-        for (U32 i = 0; i < LLViewerRegion::NUM_PARTITIONS; i++)
+    if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCTREE |
+        LLPipeline::RENDER_DEBUG_OCCLUSION |
+        LLPipeline::RENDER_DEBUG_LIGHTS |
+        LLPipeline::RENDER_DEBUG_BATCH_SIZE |
+        LLPipeline::RENDER_DEBUG_UPDATE_TYPE |
+        LLPipeline::RENDER_DEBUG_BBOXES |
+        LLPipeline::RENDER_DEBUG_NORMALS |
+        LLPipeline::RENDER_DEBUG_POINTS |
+        LLPipeline::RENDER_DEBUG_TEXTURE_AREA |
+        LLPipeline::RENDER_DEBUG_TEXTURE_ANIM |
+        LLPipeline::RENDER_DEBUG_RAYCAST |
+        LLPipeline::RENDER_DEBUG_AVATAR_VOLUME |
+        LLPipeline::RENDER_DEBUG_AVATAR_JOINTS |
+        LLPipeline::RENDER_DEBUG_AGENT_TARGET |
+        LLPipeline::RENDER_DEBUG_SHADOW_FRUSTA |
+        LLPipeline::RENDER_DEBUG_TEXEL_DENSITY))
+    {
+        LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("render debug bridges");
+
+        for (LLViewerRegion* region : LLWorld::getInstance()->getRegionList())
         {
-            LLSpatialPartition* part = region->getSpatialPartition(i);
-            if (part)
+            for (U32 i = 0; i < LLViewerRegion::NUM_PARTITIONS; i++)
             {
-                if ( (hud_only && (part->mDrawableType == RENDER_TYPE_HUD || part->mDrawableType == RENDER_TYPE_HUD_PARTICLES)) ||
-                     (!hud_only && hasRenderType(part->mDrawableType)) )
+                LLSpatialPartition* part = region->getSpatialPartition(i);
+                if (part)
                 {
-                    part->renderDebug();
+                    if ((hud_only && (part->mDrawableType == RENDER_TYPE_HUD || part->mDrawableType == RENDER_TYPE_HUD_PARTICLES)) ||
+                        (!hud_only && hasRenderType(part->mDrawableType)))
+                    {
+                        part->renderDebug();
+                    }
                 }
             }
         }
-    }
 
-    for (LLCullResult::bridge_iterator i = sCull->beginVisibleBridge(); i != sCull->endVisibleBridge(); ++i)
-    {
-        LLSpatialBridge* bridge = *i;
-        if (!bridge->isDead() && hasRenderType(bridge->mDrawableType))
+        for (LLCullResult::bridge_iterator i = sCull->beginVisibleBridge(); i != sCull->endVisibleBridge(); ++i)
         {
-            gGL.pushMatrix();
-            gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix);
-            bridge->renderDebug();
-            gGL.popMatrix();
+            LLSpatialBridge* bridge = *i;
+            if (!bridge->isDead() && hasRenderType(bridge->mDrawableType))
+            {
+                gGL.pushMatrix();
+                gGL.multMatrix((F32*)bridge->mDrawable->getRenderMatrix().mMatrix);
+                bridge->renderDebug();
+                gGL.popMatrix();
+            }
         }
     }
 
@@ -6624,8 +6653,14 @@ void LLPipeline::renderAlphaObjects(bool rigged)
     S32 sun_up = LLEnvironment::instance().getIsSunUp() ? 1 : 0;
     U32 target_width = LLRenderTarget::sCurResX;
     U32 type = LLRenderPass::PASS_ALPHA;
-    LLVOAvatar* lastAvatar = nullptr;
+    // for gDeferredShadowAlphaMaskProgram
+    const LLVOAvatar* lastAvatar = nullptr;
     U64 lastMeshId = 0;
+    bool skipLastSkin;
+    // for gDeferredShadowGLTFAlphaBlendProgram
+    const LLVOAvatar* lastAvatarGLTF = nullptr;
+    U64 lastMeshIdGLTF = 0;
+    bool skipLastSkinGLTF;
     auto* begin = gPipeline.beginRenderMap(type);
     auto* end = gPipeline.endRenderMap(type);
 
@@ -6649,7 +6684,7 @@ void LLPipeline::renderAlphaObjects(bool rigged)
                 LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up);
                 LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width);
                 LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF);
-                LLRenderPass::pushRiggedGLTFBatch(*pparams, lastAvatar, lastMeshId);
+                LLRenderPass::pushRiggedGLTFBatch(*pparams, lastAvatarGLTF, lastMeshIdGLTF, skipLastSkinGLTF);
             }
             else
             {
@@ -6657,14 +6692,10 @@ void LLPipeline::renderAlphaObjects(bool rigged)
                 LLGLSLShader::sCurBoundShaderPtr->uniform1i(LLShaderMgr::SUN_UP_FACTOR, sun_up);
                 LLGLSLShader::sCurBoundShaderPtr->uniform1f(LLShaderMgr::DEFERRED_SHADOW_TARGET_WIDTH, (float)target_width);
                 LLGLSLShader::sCurBoundShaderPtr->setMinimumAlpha(ALPHA_BLEND_CUTOFF);
-                if (lastAvatar != pparams->mAvatar || lastMeshId != pparams->mSkinInfo->mHash)
+                if (mSimplePool->uploadMatrixPalette(pparams->mAvatar, pparams->mSkinInfo, lastAvatar, lastMeshId, skipLastSkin))
                 {
-                    mSimplePool->uploadMatrixPalette(*pparams);
-                    lastAvatar = pparams->mAvatar;
-                    lastMeshId = pparams->mSkinInfo->mHash;
+                    mSimplePool->pushBatch(*pparams, true, true);
                 }
-
-                mSimplePool->pushBatch(*pparams, true, true);
             }
         }
         else
@@ -7131,6 +7162,51 @@ void LLPipeline::generateGlow(LLRenderTarget* src)
     }
 }
 
+void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)
+{
+    static LLCachedControl<F32> cas_sharpness(gSavedSettings, "RenderCASSharpness", 0.4f);
+    if (cas_sharpness == 0.0f)
+    {
+        gPipeline.copyRenderTarget(src, dst);
+        return;
+    }
+
+    LLGLSLShader* sharpen_shader = &gCASProgram;
+
+    // Bind setup:
+    dst->bindTarget();
+
+    sharpen_shader->bind();
+
+    {
+        static LLStaticHashedString cas_param_0("cas_param_0");
+        static LLStaticHashedString cas_param_1("cas_param_1");
+        static LLStaticHashedString out_screen_res("out_screen_res");
+
+        varAU4(const0);
+        varAU4(const1);
+        CasSetup(const0, const1,
+            cas_sharpness(),             // Sharpness tuning knob (0.0 to 1.0).
+            (AF1)src->getWidth(), (AF1)src->getHeight(),  // Input size.
+            (AF1)dst->getWidth(), (AF1)dst->getHeight()); // Output size.
+
+        sharpen_shader->uniform4uiv(cas_param_0, 1, const0);
+        sharpen_shader->uniform4uiv(cas_param_1, 1, const1);
+
+        sharpen_shader->uniform2f(out_screen_res, (AF1)dst->getWidth(), (AF1)dst->getHeight());
+    }
+
+    sharpen_shader->bindTexture(LLShaderMgr::DEFERRED_DIFFUSE, src, false, LLTexUnit::TFO_POINT);
+
+    // Draw
+    gPipeline.mScreenTriangleVB->setBuffer();
+    gPipeline.mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3);
+
+    sharpen_shader->unbind();
+
+    dst->flush();
+}
+
 void LLPipeline::applyFXAA(LLRenderTarget* src, LLRenderTarget* dst)
 {
     {
@@ -7500,13 +7576,15 @@ void LLPipeline::renderFinalize()
     gGLViewport[3] = gViewerWindow->getWorldViewRectRaw().getHeight();
     glViewport(gGLViewport[0], gGLViewport[1], gGLViewport[2], gGLViewport[3]);
 
-    renderDoF(&mRT->screen, &mPostMap);
+    applyCAS(&mRT->screen, &mPostMap);
 
-    applyFXAA(&mPostMap, &mRT->screen);
-    LLRenderTarget* finalBuffer = &mRT->screen;
+    renderDoF(&mPostMap, &mRT->screen);
+
+    applyFXAA(&mRT->screen, &mPostMap);
+    LLRenderTarget* finalBuffer = &mPostMap;
     if (RenderBufferVisualization > -1)
     {
-        finalBuffer = &mPostMap;
+        finalBuffer = &mRT->screen;
         switch (RenderBufferVisualization)
         {
         case 0:
@@ -7928,13 +8006,15 @@ void LLPipeline::renderDeferredLighting()
         mat.mult_matrix_vec(tc_moon);
         mTransformedMoonDir.set(tc_moon.v);
 
-        if (RenderDeferredSSAO || RenderShadowDetail > 0)
+        if ((RenderDeferredSSAO && !gCubeSnapshot) || RenderShadowDetail > 0)
         {
             LL_PROFILE_GPU_ZONE("sun program");
             deferred_light_target->bindTarget();
             {  // paint shadow/SSAO light map (direct lighting lightmap)
                 LL_PROFILE_ZONE_NAMED_CATEGORY_PIPELINE("renderDeferredLighting - sun shadow");
-                bindDeferredShader(gDeferredSunProgram, deferred_light_target);
+
+                LLGLSLShader& sun_shader = gCubeSnapshot ? gDeferredSunProbeProgram : gDeferredSunProgram;
+                bindDeferredShader(sun_shader, deferred_light_target);
                 mScreenTriangleVB->setBuffer();
                 glClearColor(1, 1, 1, 1);
                 deferred_light_target->clear(GL_COLOR_BUFFER_BIT);
@@ -7959,8 +8039,8 @@ void LLPipeline::renderDeferredLighting()
                     }
                 }
 
-                gDeferredSunProgram.uniform3fv(sOffset, slice, offset);
-                gDeferredSunProgram.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES,
+                sun_shader.uniform3fv(sOffset, slice, offset);
+                sun_shader.uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES,
                                               (GLfloat)deferred_light_target->getWidth(),
                                               (GLfloat)deferred_light_target->getHeight());
 
@@ -7970,12 +8050,12 @@ void LLPipeline::renderDeferredLighting()
                     mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3);
                 }
 
-                unbindDeferredShader(gDeferredSunProgram);
+                unbindDeferredShader(sun_shader);
             }
             deferred_light_target->flush();
         }
 
-        if (RenderDeferredSSAO)
+        if (RenderDeferredSSAO && !gCubeSnapshot)
         {
             // soften direct lighting lightmap
             LL_PROFILE_ZONE_NAMED_CATEGORY_PIPELINE("renderDeferredLighting - soften shadow");
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index 024303d5f0..1c87b71d06 100644
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -157,6 +157,7 @@ public:
     void generateExposure(LLRenderTarget* src, LLRenderTarget* dst, bool use_history = true);
     void gammaCorrect(LLRenderTarget* src, LLRenderTarget* dst);
     void generateGlow(LLRenderTarget* src);
+    void applyCAS(LLRenderTarget* src, LLRenderTarget* dst);
     void applyFXAA(LLRenderTarget* src, LLRenderTarget* dst);
     void renderDoF(LLRenderTarget* src, LLRenderTarget* dst);
     void copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst);
diff --git a/indra/newview/skins/default/xui/da/notifications.xml b/indra/newview/skins/default/xui/da/notifications.xml
index 283a7b2a43..4a4b7269dc 100644
--- a/indra/newview/skins/default/xui/da/notifications.xml
+++ b/indra/newview/skins/default/xui/da/notifications.xml
@@ -1574,10 +1574,6 @@ Klik på Acceptér for at deltage eller Afvis for at afvise invitationen. Klik p
 		Den aktive stemme &quot;morph&quot; er udløbet og din normale stemme opsætning er genaktiveret.
 [[URL] Click here] for at forny dit abbonnement.
 	</notification>
-	<notification name="VoiceEffectsWillExpire">
-		En eller flere af dine stemme &quot;morphs&quot; vil udløbe om mindre end [INTERVAL] dage.
-[[URL] Click here] for at forny dit abbonnement.
-	</notification>
 	<notification name="VoiceEffectsNew">
 		Nye stemme &quot;morphs&quot; er tilgængelige!
 	</notification>
diff --git a/indra/newview/skins/default/xui/de/notifications.xml b/indra/newview/skins/default/xui/de/notifications.xml
index 6ad71e0ad1..76bebedeec 100644
--- a/indra/newview/skins/default/xui/de/notifications.xml
+++ b/indra/newview/skins/default/xui/de/notifications.xml
@@ -2466,10 +2466,6 @@ Wenn Sie Premium-Mitglied sind, [[PREMIUM_URL] klicken Sie hier], um Ihren Voice
 [[URL] Klicken Sie hier], um Ihr Abo zu erneuern. 
 
 Wenn Sie Premium-Mitglied sind, [[PREMIUM_URL] klicken Sie hier], um Ihren Voice-Morphing-Vorteil zu nutzen.</notification>
-	<notification name="VoiceEffectsWillExpire">Ein oder mehrere Ihrer Voice-Morph-Abos laufen in weniger als [INTERVAL] Tagen ab. 
-[[URL] Klicken Sie hier], um Ihr Abo zu erneuern. 
-
-Wenn Sie Premium-Mitglied sind, [[PREMIUM_URL] klicken Sie hier], um Ihren Voice-Morphing-Vorteil zu nutzen.</notification>
 	<notification name="VoiceEffectsNew">Neue Voice-Morph-Effekte sind erhältlich!</notification>
 	<notification name="Cannot enter parcel: not a group member">Nur Mitglieder einer bestimmten Gruppe dürfen diesen Bereich betreten.</notification>
 	<notification name="Cannot enter parcel: banned">Zugang zur Parzelle verweigert. Sie wurden verbannt.</notification>
diff --git a/indra/newview/skins/default/xui/en/floater_edit_ext_day_cycle.xml b/indra/newview/skins/default/xui/en/floater_edit_ext_day_cycle.xml
index 1500c96b8d..787b2f8be4 100644
--- a/indra/newview/skins/default/xui/en/floater_edit_ext_day_cycle.xml
+++ b/indra/newview/skins/default/xui/en/floater_edit_ext_day_cycle.xml
@@ -50,7 +50,7 @@
                     layout="topleft"
                     name="label"
                     left="15"
-                    top="5"
+                    top="8"
                     width="105">
                 Day Cycle Name:
             </text>
diff --git a/indra/newview/skins/default/xui/en/floater_preferences_graphics_advanced.xml b/indra/newview/skins/default/xui/en/floater_preferences_graphics_advanced.xml
index 7bc81a1f79..3e578e94f4 100644
--- a/indra/newview/skins/default/xui/en/floater_preferences_graphics_advanced.xml
+++ b/indra/newview/skins/default/xui/en/floater_preferences_graphics_advanced.xml
@@ -881,7 +881,26 @@
       value="1"/>
   </combo_box>
   <!-- End of mirror settings -->
-
+  <!-- Sharpening Settings -->
+  <slider
+    control_name="RenderCASSharpness"
+    decimal_digits="1"
+    follows="left|top"
+    height="16"
+    increment="0.1"
+    initial_value="160"
+    label="Sharpening:"
+    label_width="145"
+    layout="topleft"
+    left="420"
+    min_val="0.0"
+    max_val="1.0"
+    name="RenderSharpness"
+    show_text="true"
+    top_delta="24"
+    width="260">
+  </slider>
+  <!-- End of Sharpening Settings-->
   <!-- End of Advanced Settings block -->
 	<view_border
       bevel_style="in"
diff --git a/indra/newview/skins/default/xui/en/notifications.xml b/indra/newview/skins/default/xui/en/notifications.xml
index 5ce73b2cfa..848d9aca7c 100644
--- a/indra/newview/skins/default/xui/en/notifications.xml
+++ b/indra/newview/skins/default/xui/en/notifications.xml
@@ -8860,21 +8860,6 @@ If you are a Premium Member, [[PREMIUM_URL] click here] to receive your voice mo
 
   <notification
    icon="notify.tga"
-   name="VoiceEffectsWillExpire"
-   sound="UISndAlert"
-   persist="true"
-   type="notify">
-    <unique/>    
-One or more of your Voice Morphs will expire in less than [INTERVAL] days.
-[[URL] Click here] to renew your subscription.
-
-If you are a Premium Member, [[PREMIUM_URL] click here] to receive your voice morphing perk.
-  <tag>fail</tag>
-    <tag>voice</tag>
-  </notification>
-
-  <notification
-   icon="notify.tga"
    name="VoiceEffectsNew"
    sound="UISndAlert"
    persist="true"
diff --git a/indra/newview/skins/default/xui/es/notifications.xml b/indra/newview/skins/default/xui/es/notifications.xml
index 739391b965..bf55e2c443 100644
--- a/indra/newview/skins/default/xui/es/notifications.xml
+++ b/indra/newview/skins/default/xui/es/notifications.xml
@@ -2452,10 +2452,6 @@ Si eres un miembro Premium [[PREMIUM_URL] pulsa aquí] para recibir tu beneficio
 [[URL] Pulsa aquí] para renovar la suscripción. 
 
 Si eres un miembro Premium [[PREMIUM_URL] pulsa aquí] para recibir tu beneficio de transformación de voz.</notification>
-	<notification name="VoiceEffectsWillExpire">Una o más de tus transformaciones de voz caducarán en menos de [INTERVAL] días. 
-[[URL] Pulsa aquí] para renovar la suscripción 
-
-Si eres un miembro Premium [[PREMIUM_URL] pulsa aquí] para recibir tu beneficio de transformación de voz.</notification>
 	<notification name="VoiceEffectsNew">Están disponibles nuevas transformaciones de voz.</notification>
 	<notification name="Cannot enter parcel: not a group member">Sólo los miembros de un grupo determinado pueden visitar esta zona.</notification>
 	<notification name="Cannot enter parcel: banned">No puedes entrar en esta parcela, se te ha prohibido el acceso.</notification>
diff --git a/indra/newview/skins/default/xui/fr/notifications.xml b/indra/newview/skins/default/xui/fr/notifications.xml
index 587c88faad..17cf18633f 100644
--- a/indra/newview/skins/default/xui/fr/notifications.xml
+++ b/indra/newview/skins/default/xui/fr/notifications.xml
@@ -2451,10 +2451,6 @@ Si vous êtes un membre Premium,  [[PREMIUM_URL] cliquez ici] pour recevoir votr
 [[URL] Cliquez ici] pour renouveler votre abonnement. 
 
 Si vous êtes un membre Premium, [[PREMIUM_URL] cliquez ici] pour recevoir votre effet de voix.</notification>
-	<notification name="VoiceEffectsWillExpire">Au moins l'un de vos effets de voix expirera dans moins de [INTERVAL] jours.  
-[[URL] Cliquez ici] pour renouveler votre abonnement. 
-
-Si vous êtes un membre Premium,  [[PREMIUM_URL] cliquez ici] pour recevoir votre effet de voix.</notification>
 	<notification name="VoiceEffectsNew">De nouveaux effets de voix sont disponibles !</notification>
 	<notification name="Cannot enter parcel: not a group member">Seuls les membres d'un certain groupe peuvent visiter cette zone.</notification>
 	<notification name="Cannot enter parcel: banned">Vous ne pouvez pas pénétrer sur ce terrain car l'accès vous y est interdit.</notification>
diff --git a/indra/newview/skins/default/xui/it/notifications.xml b/indra/newview/skins/default/xui/it/notifications.xml
index f79cc1515b..1c40e7304a 100644
--- a/indra/newview/skins/default/xui/it/notifications.xml
+++ b/indra/newview/skins/default/xui/it/notifications.xml
@@ -2454,10 +2454,6 @@ Se sei un membro Premium, [[PREMIUM_URL] fai clic qui] per ricevere in regalo la
 [[URL] Fai clic qui] per rinnovare l'abbonamento. 
 
 Se sei un membro Premium, [[PREMIUM_URL] fai clic qui] per ricevere in regalo la manipolazione vocale.</notification>
-	<notification name="VoiceEffectsWillExpire">Almeno una delle tue manipolazioni vocali scadrà tra meno di [INTERVAL] giorni. 
-[[URL] Fai clic qui] per rinnovare l'abbonamento. 
-
-Se sei un membro Premium, [[PREMIUM_URL] fai clic qui] per ricevere in regalo la manipolazione vocale.</notification>
 	<notification name="VoiceEffectsNew">Sono disponibili nuove manipolazioni vocali.</notification>
 	<notification name="Cannot enter parcel: not a group member">Soltanto i membri di un determinato gruppo possono visitare questa zona.</notification>
 	<notification name="Cannot enter parcel: banned">Non puoi entrare nel terreno, sei stato bloccato.</notification>
diff --git a/indra/newview/skins/default/xui/ja/notifications.xml b/indra/newview/skins/default/xui/ja/notifications.xml
index 123e95df04..fbd56e118c 100644
--- a/indra/newview/skins/default/xui/ja/notifications.xml
+++ b/indra/newview/skins/default/xui/ja/notifications.xml
@@ -4663,17 +4663,6 @@ Webページにリンクすると、他人がこの場所に簡単にアクセ�
 			voice
 		</tag>
 	</notification>
-	<notification name="VoiceEffectsWillExpire">ボイスモーフィング効果の１つ、または複数の有効期限が[INTERVAL]日以内に終了します。
-期限を延長・更新するには[[URL] ここ]をクリックしてください。
-
-プレミアム会員の方は、[[PREMIUM_URL] ここ]をクリックしてボイスモーフィング特典をお受け取りください。
-    <tag>
-			fail
-		</tag>
-		<tag>
-			voice
-		</tag>
-	</notification>
 	<notification name="VoiceEffectsNew">新しいボイスモーフィング効果が登場！
     <tag>
 			voice
diff --git a/indra/newview/skins/default/xui/pl/notifications.xml b/indra/newview/skins/default/xui/pl/notifications.xml
index e668c6cc20..17c11bc75f 100644
--- a/indra/newview/skins/default/xui/pl/notifications.xml
+++ b/indra/newview/skins/default/xui/pl/notifications.xml
@@ -3118,11 +3118,6 @@ Jeśli jesteś użytkownikiem premium, to [[PREMIUM_URL] kliknij tutaj] aby otrz
 [[URL] Kliknij tutaj] aby odnowić subskrypcję.
 Jeśli jesteś użytkownikiem premium, to [[PREMIUM_URL] kliknij tutaj] aby otrzymać swój perk Przekształceń Głosu.
 	</notification>
-	<notification name="VoiceEffectsWillExpire">
-		Jedno lub więcej z Twoich Przekształceń Głosu wygaśnie za mniej niż [INTERVAL] dni.
-[[URL] Kliknij tutaj] aby odnowić subskrypcję.
-Jeśli jesteś użytkownikiem premium, to [[PREMIUM_URL] kliknij tutaj] aby otrzymać swój perk Przekształceń Głosu.
-	</notification>
 	<notification name="VoiceEffectsNew">
 		Nowe Przekształcenia Głosu są dostępne!
 	</notification>
diff --git a/indra/newview/skins/default/xui/pt/notifications.xml b/indra/newview/skins/default/xui/pt/notifications.xml
index a3220bca54..0390239669 100644
--- a/indra/newview/skins/default/xui/pt/notifications.xml
+++ b/indra/newview/skins/default/xui/pt/notifications.xml
@@ -2441,10 +2441,6 @@ Se você é um Membro Premium, [[PREMIUM_URL] clique aqui] para receber o seu ap
 [[URL] Clique aqui] para renovar o serviço. 
 
 Se você é um Membro Premium, [[PREMIUM_URL] clique aqui] para receber o seu app de distorção de voz.</notification>
-	<notification name="VoiceEffectsWillExpire">Uma ou mais das suas distorções de voz tem vencimento em menos de [INTERVAL] dias. 
-[[URL] Clique aqui] para renovar o serviço. 
-
-Se você é um Membro Premium, [[PREMIUM_URL] clique aqui] para receber o seu app de distorção de voz.</notification>
 	<notification name="VoiceEffectsNew">Novas Distorções de voz!</notification>
 	<notification name="Cannot enter parcel: not a group member">Só membros de um grupo podem acessar esta área.</notification>
 	<notification name="Cannot enter parcel: banned">Você não pode entrar nessa terra, você foi banido.</notification>
diff --git a/indra/newview/skins/default/xui/ru/notifications.xml b/indra/newview/skins/default/xui/ru/notifications.xml
index e75fd1fd82..bde18edc23 100644
--- a/indra/newview/skins/default/xui/ru/notifications.xml
+++ b/indra/newview/skins/default/xui/ru/notifications.xml
@@ -3232,12 +3232,6 @@
 
 Если вы - владелец премиум-аккаунта, [[PREMIUM_URL] щелкните здесь], чтобы получить право на анимационное изменение голоса.
 	</notification>
-	<notification name="VoiceEffectsWillExpire">
-		Срок действия одного или нескольких ваших типов анимационного изменения голоса истекает через [INTERVAL] дней или раньше. 
-[[URL] Щелкните здесь], чтобы обновить подписку. 
-
-Если вы - владелец премиум-аккаунта, [[PREMIUM_URL] щелкните здесь], чтобы получить право на анимационное изменение голоса.
-	</notification>
 	<notification name="VoiceEffectsNew">
 		Появились новые типы изменения голоса!
 	</notification>
diff --git a/indra/newview/skins/default/xui/tr/notifications.xml b/indra/newview/skins/default/xui/tr/notifications.xml
index 17d2969d19..30aa0c0342 100644
--- a/indra/newview/skins/default/xui/tr/notifications.xml
+++ b/indra/newview/skins/default/xui/tr/notifications.xml
@@ -3232,12 +3232,6 @@ Aboneliğinizi yenilemek için [[URL] buraya tıklayın].
 
 Özel Üye iseniz, ses dönüştürme özelliğini almak için [[PREMIUM_URL] buraya tıklayın].
 	</notification>
-	<notification name="VoiceEffectsWillExpire">
-		Ses Dönüşümlerinizden birinin ya da daha fazlasının süresi [INTERVAL] günden daha az bir zamanda dolacak. 
-Aboneliğinizi yenilemek için [[URL] buraya tıklayın]. 
-
-Özel Üye iseniz, ses dönüştürme özelliğini almak için [[PREMIUM_URL] buraya tıklayın].
-	</notification>
 	<notification name="VoiceEffectsNew">
 		Yeni Ses Şekilleri kullanılabilir!
 	</notification>
diff --git a/indra/newview/skins/default/xui/zh/notifications.xml b/indra/newview/skins/default/xui/zh/notifications.xml
index 4d0f1cb85b..3ebea7dc27 100644
--- a/indra/newview/skins/default/xui/zh/notifications.xml
+++ b/indra/newview/skins/default/xui/zh/notifications.xml
@@ -3216,12 +3216,6 @@ SHA1 指紋：[MD5_DIGEST]
 
 付費用戶請[[PREMIUM_URL] 點按這裡]領取免費變聲工具。
 	</notification>
-	<notification name="VoiceEffectsWillExpire">
-		至少一個你訂用的變聲效果將在 [INTERVAL] 天後到期。
-[[URL] 點按這裡]繼續訂用。
-
-付費用戶請[[PREMIUM_URL] 點按這裡]領取免費變聲工具。
-	</notification>
 	<notification name="VoiceEffectsNew">
 		新的變聲效果上市了！
 	</notification>