1 files changed, 197 insertions, 82 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 6512bbc392..e38622b43b 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -31,10 +31,10 @@
 #include "llfasttimer.h"
 #include "llsd.h"
 #include <vector>
+#include <sstream>
 
 #if LL_WINDOWS
-#include "llwin32headerslean.h"
-#include <winnls.h> // for WideCharToMultiByte
+#include "llwin32headers.h"
 #endif
 
 std::string ll_safe_string(const char* in)
@@ -141,10 +141,10 @@ std::string rawstr_to_utf8(const std::string& raw)
     return wstring_to_utf8str(wstr);
 }
 
-std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars)
+std::string wchar_to_utf8chars(llwchar in_char)
 {
-    U32 cur_char = (U32)in_char;
-    char* base = outchars;
+    U32 cur_char(in_char);
+    char buff[8], *outchars = buff;
     if (cur_char < 0x80)
     {
         *outchars++ = (U8)cur_char;
@@ -189,7 +189,7 @@ std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars)
         LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL;
         *outchars++ = LL_UNKNOWN_CHAR;
     }
-    return outchars - base;
+    return { buff, std::string::size_type(outchars - buff) };
 }
 
 auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
@@ -214,7 +214,8 @@ auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
 
 llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
 {
-    llutf16string out;
+    // ostringstream for llutf16string
+    std::basic_ostringstream<U16> out;
 
     S32 i = 0;
     while (i < len)
@@ -222,16 +223,16 @@ llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
         U32 cur_char = utf32str[i];
         if (cur_char > 0xFFFF)
         {
-            out += (0xD7C0 + (cur_char >> 10));
-            out += (0xDC00 | (cur_char & 0x3FF));
+            out.put(U16(0xD7C0 + (cur_char >> 10)));
+            out.put(U16(0xDC00 | (cur_char & 0x3FF)));
         }
         else
         {
-            out += cur_char;
+            out.put(U16(cur_char));
         }
         i++;
     }
-    return out;
+    return out.str();
 }
 
 llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
@@ -242,18 +243,26 @@ llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
 
 LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
 {
-    LLWString wout;
-    if (len == 0) return wout;
+    if (len == 0) return {};
+
+    // MS doesn't support std::basic_ostringstream<llwchar>; have to work
+    // around it.
+    std::vector<llwchar> wout;
+    // We want to minimize allocations. We don't know how many llwchars we'll
+    // generate from this utf16str, but we do know the length should be at
+    // most len. So if we reserve 'len' llwchars, we shouldn't need to expand
+    // wout incrementally.
+    wout.reserve(len);
 
     S32 i = 0;
     const U16* chars16 = utf16str;
     while (i < len)
     {
         llwchar cur_char;
-        i += utf16chars_to_wchar(chars16+i, &cur_char);
-        wout += cur_char;
+        i += (S32)utf16chars_to_wchar(chars16+i, &cur_char);
+        wout.push_back(cur_char);
     }
-    return wout;
+    return { wout.begin(), wout.end() };
 }
 
 // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
@@ -308,10 +317,10 @@ S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wle
 // Given a wstring and an offset in it, returns the length as wstring (i.e.,
 // number of llwchars) of the longest substring that starts at the offset
 // and whose equivalent utf-16 string does not exceeds the given utf16_length.
-S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
+S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, bool *unaligned)
 {
     const auto end = wstr.length();
-    BOOL u = FALSE;
+    bool u{ false };
     S32 n = woffset + utf16_length;
     S32 i = woffset;
     while (i < end)
@@ -367,13 +376,12 @@ std::string wchar_utf8_preview(const llwchar wc)
     std::ostringstream oss;
     oss << std::hex << std::uppercase << (U32)wc;
 
-    U8 out_bytes[8];
-    U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes);
+    auto out_bytes = wchar_to_utf8chars(wc);
 
-    if (size > 1)
+    if (out_bytes.length() > 1)
     {
         oss << " [";
-        for (U32 i = 0; i < size; ++i)
+        for (U32 i = 0; i < out_bytes.length(); ++i)
         {
             if (i)
             {
@@ -399,7 +407,14 @@ S32 wstring_utf8_length(const LLWString& wstr)
 
 LLWString utf8str_to_wstring(const char* utf8str, size_t len)
 {
-    LLWString wout;
+    // MS doesn't support std::basic_ostringstream<llwchar>; have to work
+    // around it.
+    std::vector<llwchar> wout;
+    // We want to minimize allocations. We don't know how many llwchars we'll
+    // generate from this utf8str, but we do know the length should be at most
+    // len. So if we reserve 'len' llwchars, we shouldn't need to expand wout
+    // incrementally.
+    wout.reserve(len);
 
     S32 i = 0;
     while (i < len)
@@ -442,7 +457,7 @@ LLWString utf8str_to_wstring(const char* utf8str, size_t len)
             }
             else
             {
-                wout += LL_UNKNOWN_CHAR;
+                wout.push_back(LL_UNKNOWN_CHAR);
                 ++i;
                 continue;
             }
@@ -479,26 +494,21 @@ LLWString utf8str_to_wstring(const char* utf8str, size_t len)
             }
         }
 
-        wout += unichar;
+        wout.push_back(unichar);
         ++i;
     }
-    return wout;
+    return { wout.begin(), wout.end() };
 }
 
 std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
 {
-    std::string out;
+    std::ostringstream out;
 
-    S32 i = 0;
-    while (i < len)
+    for (size_t i = 0; i < len; ++i)
     {
-        char tchars[8];     /* Flawfinder: ignore */
-        auto n = wchar_to_utf8chars(utf32str[i], tchars);
-        tchars[n] = 0;
-        out += tchars;
-        i++;
+        out << wchar_to_utf8chars(utf32str[i]);
     }
-    return out;
+    return out.str();
 }
 
 std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
@@ -686,7 +696,21 @@ llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t lengt
 
 std::string utf8str_showBytesUTF8(const std::string& utf8str)
 {
-    std::string result;
+    std::ostringstream result;
+    char lastchar = '\0';
+    auto append = [&result, &lastchar](char c)
+    {
+        lastchar = c;
+        result << c;
+    };
+    auto appends = [&result, &lastchar](const std::string& s)
+    {
+        if (! s.empty())
+        {
+            lastchar = s.back();
+            result << s;
+        }
+    };
 
     bool in_sequence = false;
     size_t sequence_size = 0;
@@ -695,9 +719,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str)
 
     auto open_sequence = [&]()
         {
-            if (!result.empty() && result.back() != '\n')
-                result += '\n'; // Use LF as a separator before new UTF-8 sequence
-            result += '[';
+            if (lastchar != '\0' && lastchar != '\n')
+                append('\n'); // Use LF as a separator before new UTF-8 sequence
+            append('[');
             in_sequence = true;
         };
 
@@ -706,9 +730,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str)
             llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size);
             if (unicode != LL_UNKNOWN_CHAR)
             {
-                result += llformat("+%04X", unicode);
+                appends(llformat("+%04X", unicode));
             }
-            result += ']';
+            append(']');
             in_sequence = false;
             sequence_size = 0;
         };
@@ -729,9 +753,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str)
             }
             else // Continue the same UTF-8 sequence
             {
-                result += '.';
+                append('.');
             }
-            result += llformat("%02X", byte); // The byte is represented in hexadecimal form
+            appends(llformat("%02X", byte)); // The byte is represented in hexadecimal form
             ++sequence_size;
         }
         else // ASCII symbol is represented as a character
@@ -741,10 +765,10 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str)
                 close_sequence();
                 if (byte != '\n')
                 {
-                    result += '\n'; // Use LF as a separator between UTF-8 and ASCII
+                    append('\n'); // Use LF as a separator between UTF-8 and ASCII
                 }
             }
-            result += byte;
+            append(byte);
         }
         ++byte_index;
     }
@@ -754,11 +778,11 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str)
         close_sequence();
     }
 
-    return result;
+    return result.str();
 }
 
 // Search for any emoji symbol, return true if found
-bool wstring_has_emoji(const LLWString& wstr)
+bool wstring_has_emoji(LLWStringView wstr)
 {
     for (const llwchar& wch : wstr)
     {
@@ -809,7 +833,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned
             code_page,
             0,
             in,
-            len_in,
+            static_cast<int>(len_in),
             NULL,
             0,
             0,
@@ -824,7 +848,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned
                 code_page,
                 0,
                 in,
-                len_in,
+                static_cast<int>(len_in),
                 pout,
                 len_out,
                 0,
@@ -851,8 +875,8 @@ std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int
     std::vector<wchar_t> w_out(len + 1);
 
     memset(&w_out[0], 0, w_out.size());
-    int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
-                                                  &w_out[0], w_out.size() - 1);
+    int real_output_str_len = MultiByteToWideChar(code_page, 0, in, static_cast<int>(len),
+                                                  &w_out[0], static_cast<int>(w_out.size() - 1));
 
     //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
     w_out[real_output_str_len] = 0;
@@ -900,6 +924,11 @@ void HeapFree_deleter(void* ptr)
 
 } // anonymous namespace
 
+unsigned long windows_get_last_error()
+{
+    return GetLastError();
+}
+
 template<>
 std::wstring windows_message<std::wstring>(DWORD error)
 {
@@ -938,12 +967,12 @@ std::wstring windows_message<std::wstring>(DWORD error)
     return out.str();
 }
 
-boost::optional<std::wstring> llstring_getoptenv(const std::string& key)
+std::optional<std::wstring> llstring_getoptenv(const std::string& key)
 {
     auto wkey = ll_convert_string_to_wide(key);
     // Take a wild guess as to how big the buffer should be.
     std::vector<wchar_t> buffer(1024);
-    auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
+    auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], static_cast<DWORD>(buffer.size()));
     // If our initial guess was too short, n will indicate the size (in
     // wchar_t's) that buffer should have been, including the terminating nul.
     if (n > (buffer.size() - 1))
@@ -951,13 +980,13 @@ boost::optional<std::wstring> llstring_getoptenv(const std::string& key)
         // make it big enough
         buffer.resize(n);
         // and try again
-        n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
+        n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], static_cast<DWORD>(buffer.size()));
     }
     // did that (ultimately) succeed?
     if (n)
     {
-        // great, return populated boost::optional
-        return boost::optional<std::wstring>(&buffer[0]);
+        // great, return populated std::optional
+        return std::make_optional<std::wstring>(&buffer[0]);
     }
 
     // not successful
@@ -968,23 +997,23 @@ boost::optional<std::wstring> llstring_getoptenv(const std::string& key)
         LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
                    << windows_message<std::string>(last_error) << LL_ENDL;
     }
-    // return empty boost::optional
+    // return empty std::optional
     return {};
 }
 
 #else  // ! LL_WINDOWS
 
-boost::optional<std::string> llstring_getoptenv(const std::string& key)
+std::optional<std::string> llstring_getoptenv(const std::string& key)
 {
     auto found = getenv(key.c_str());
     if (found)
     {
-        // return populated boost::optional
-        return boost::optional<std::string>(found);
+        // return populated std::optional
+        return std::make_optional<std::string>(found);
     }
     else
     {
-        // return empty boost::optional
+        // return empty std::optional
         return {};
     }
 }
@@ -1017,7 +1046,7 @@ bool LLStringOps::isEmoji(llwchar a)
     // These are indeed "genuine" emojis, we *do want* rendered as such. HB
     return a >= 0x1f000 && a < 0x20000;
 #endif
-}
+    }
 
 S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
 {
@@ -1115,18 +1144,27 @@ void LLStringOps::setupDayFormat(const std::string& data)
 }
 
 
-std::string LLStringOps::getDatetimeCode (std::string key)
+std::string LLStringOps::getDatetimeCode(std::string key)
 {
-    std::map<std::string, std::string>::iterator iter;
+    std::map<std::string, std::string>::iterator iter = datetimeToCodes.find(key);
+    return iter == datetimeToCodes.end() ? LLStringUtil::null : iter->second;
+}
 
-    iter = datetimeToCodes.find (key);
-    if (iter != datetimeToCodes.end())
+void LLStringOps::splitString(const std::string& text, char delimiter,
+    std::function<void(const std::string&)> handler)
+{
+    std::size_t from = 0;
+    for (std::size_t i = 0; i < text.size(); ++i)
     {
-        return iter->second;
+        if (text[i] == delimiter)
+        {
+            handler(text.substr(from, i - from));
+            from = i + 1;
+        }
     }
-    else
+    if (from <= text.size())
     {
-        return std::string("");
+        handler(text.substr(from));
     }
 }
 
@@ -1208,6 +1246,75 @@ namespace LLStringFn
         return output;
     }
 
+    using literals_t = std::map<char, std::string>;
+    static const literals_t xml_elem_literals =
+    {
+        { '<', "&lt;" },
+        { '>', "&gt;" },
+        { '&', "&amp;" }
+    };
+    static const literals_t xml_attr_literals =
+    {
+        { '"', "&quot;" },
+        { '\'', "&apos;" }
+    };
+
+    static void literals_encode(std::string& text, const literals_t& literals)
+    {
+        for (const std::pair<char, std::string> it : literals)
+        {
+            std::string::size_type pos = 0;
+            while ((pos = text.find(it.first, pos)) != std::string::npos)
+            {
+                text.replace(pos, 1, it.second);
+                pos += it.second.size();
+            }
+        }
+    }
+
+    static void literals_decode(std::string& text, const literals_t& literals)
+    {
+        for (const std::pair<char, std::string> it : literals)
+        {
+            std::string::size_type pos = 0;
+            while ((pos = text.find(it.second, pos)) != std::string::npos)
+            {
+                text[pos++] = it.first;
+                text.erase(pos, it.second.size() - 1);
+            }
+        }
+    }
+
+    /**
+     * @brief Replace all characters that are not allowed in XML 1.0
+     * with corresponding literals: [ < > & ] => [ &lt; &gt; &amp; ]
+     */
+    std::string xml_encode(const std::string& input, bool for_attribute)
+    {
+        std::string result(input);
+        literals_encode(result, xml_elem_literals);
+        if (for_attribute)
+        {
+            literals_encode(result, xml_attr_literals);
+        }
+        return result;
+    }
+
+    /**
+     * @brief Replace some of XML literals that are defined in XML 1.0
+     * with corresponding characters: [ &lt; &gt; &amp; ] => [ < > & ]
+     */
+    std::string xml_decode(const std::string& input, bool for_attribute)
+    {
+        std::string result(input);
+        literals_decode(result, xml_elem_literals);
+        if (for_attribute)
+        {
+            literals_decode(result, xml_attr_literals);
+        }
+        return result;
+    }
+
     /**
      * @brief Replace all control characters (c < 0x20) with replacement in
      * string.
@@ -1336,6 +1443,14 @@ bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token
 template<>
 void LLStringUtil::setLocale(std::string inLocale)
 {
+    if(startsWith(inLocale, "MissingString"))
+    {
+        // it seems this hasn't been working for some time, and I'm not sure how it is intentded to
+        // properly discover the correct locale.  early out now to avoid failures later in
+        // formatNumber()
+        LL_WARNS() << "Failed attempting to set invalid locale: " << inLocale << LL_ENDL;
+        return;
+    }
     sLocale = inLocale;
 };
 
@@ -1513,7 +1628,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
     LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
     S32 res = 0;
 
-    std::string output;
+    std::ostringstream output;
     std::vector<std::string> tokens;
 
     std::string::size_type start = 0;
@@ -1521,7 +1636,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
     std::string::size_type key_start = 0;
     while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
     {
-        output += std::string(s, prev_start, key_start-prev_start);
+        output << std::string(s, prev_start, key_start-prev_start);
         prev_start = start;
 
         bool found_replacement = false;
@@ -1552,7 +1667,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
             if (iter != substitutions.end())
             {
                 S32 secFromEpoch = 0;
-                BOOL r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
+                bool r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
                 if (r)
                 {
                     found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
@@ -1562,20 +1677,20 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
 
         if (found_replacement)
         {
-            output += replacement;
+            output << replacement;
             res++;
         }
         else
         {
             // we had no replacement, use the string as is
             // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
-            output += std::string(s, key_start, start-key_start);
+            output << std::string(s, key_start, start-key_start);
         }
         tokens.clear();
     }
     // send the remainder of the string (with no further matches for bracketed names)
-    output += std::string(s, start);
-    s = output;
+    output << std::string(s, start);
+    s = output.str();
     return res;
 }
 
@@ -1591,7 +1706,7 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
         return res;
     }
 
-    std::string output;
+    std::ostringstream output;
     std::vector<std::string> tokens;
 
     std::string::size_type start = 0;
@@ -1599,7 +1714,7 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
     std::string::size_type key_start = 0;
     while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
     {
-        output += std::string(s, prev_start, key_start-prev_start);
+        output << std::string(s, prev_start, key_start-prev_start);
         prev_start = start;
 
         bool found_replacement = false;
@@ -1632,20 +1747,20 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
 
         if (found_replacement)
         {
-            output += replacement;
+            output << replacement;
             res++;
         }
         else
         {
             // we had no replacement, use the string as is
             // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
-            output += std::string(s, key_start, start-key_start);
+            output << std::string(s, key_start, start-key_start);
         }
         tokens.clear();
     }
     // send the remainder of the string (with no further matches for bracketed names)
-    output += std::string(s, start);
-    s = output;
+    output << std::string(s, start);
+    s = output.str();
     return res;
 }