/**
 * @file llstring.cpp
 * @brief String utility functions and the std::string class.
 *
 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
 * Second Life Viewer Source Code
 * Copyright (C) 2010, Linden Research, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation;
 * version 2.1 of the License only.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
 * $/LicenseInfo$
 */

#include "linden_common.h"

#include "llstring.h"
#include "llerror.h"
#include "llfasttimer.h"
#include "llsd.h"
#include <vector>
#include <sstream>

#if LL_WINDOWS
#include "llwin32headers.h"
#endif

std::string ll_safe_string(const char* in)
{
    if(in) return std::string(in);
    return std::string();
}

std::string ll_safe_string(const char* in, S32 maxlen)
{
    if(in && maxlen > 0 ) return std::string(in, maxlen);

    return std::string();
}

bool is_char_hex(char hex)
{
    if((hex >= '0') && (hex <= '9'))
    {
        return true;
    }
    else if((hex >= 'a') && (hex <='f'))
    {
        return true;
    }
    else if((hex >= 'A') && (hex <='F'))
    {
        return true;
    }
    return false; // uh - oh, not hex any more...
}

U8 hex_as_nybble(char hex)
{
    if((hex >= '0') && (hex <= '9'))
    {
        return (U8)(hex - '0');
    }
    else if((hex >= 'a') && (hex <='f'))
    {
        return (U8)(10 + hex - 'a');
    }
    else if((hex >= 'A') && (hex <='F'))
    {
        return (U8)(10 + hex - 'A');
    }
    return 0; // uh - oh, not hex any more...
}

bool iswindividual(llwchar elem)
{
    U32 cur_char = (U32)elem;
    bool result = false;
    if (0x2E80<= cur_char && cur_char <= 0x9FFF)
    {
        result = true;
    }
    else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
    {
        result = true;
    }
    else if (0xF900<= cur_char && cur_char <= 0xFA60 )
    {
        result = true;
    }
    return result;
}

bool _read_file_into_string(std::string& str, const std::string& filename)
{
    llifstream ifs(filename.c_str(), llifstream::binary);
    if (!ifs.is_open())
    {
        LL_INFOS() << "Unable to open file " << filename << LL_ENDL;
        return false;
    }

    std::ostringstream oss;

    oss << ifs.rdbuf();
    str = oss.str();
    ifs.close();
    return true;
}




// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
// for the Unicode implementation - this doesn't match because it was written before finding
// it.


std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
{
    std::string utf8_str = wstring_to_utf8str(wstr);
    s << utf8_str;
    return s;
}

std::string rawstr_to_utf8(const std::string& raw)
{
    LLWString wstr(utf8str_to_wstring(raw));
    return wstring_to_utf8str(wstr);
}

std::string wchar_to_utf8chars(llwchar in_char)
{
    U32 cur_char(in_char);
    char buff[8], *outchars = buff;
    if (cur_char < 0x80)
    {
        *outchars++ = (U8)cur_char;
    }
    else if (cur_char < 0x800)
    {
        *outchars++ = 0xC0 | (cur_char >> 6);
        *outchars++ = 0x80 | (cur_char & 0x3F);
    }
    else if (cur_char < 0x10000)
    {
        *outchars++ = 0xE0 | (cur_char >> 12);
        *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
        *outchars++ = 0x80 | (cur_char & 0x3F);
    }
    else if (cur_char < 0x200000)
    {
        *outchars++ = 0xF0 | (cur_char >> 18);
        *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
        *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
        *outchars++ = 0x80 | (cur_char & 0x3F);
    }
    else if (cur_char < 0x4000000)
    {
        *outchars++ = 0xF8 | (cur_char >> 24);
        *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
        *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
        *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
        *outchars++ = 0x80 | (cur_char & 0x3F);
    }
    else if (cur_char < 0x80000000)
    {
        *outchars++ = 0xFC | (cur_char >> 30);
        *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
        *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
        *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
        *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
        *outchars++ = 0x80 | (cur_char & 0x3F);
    }
    else
    {
        LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL;
        *outchars++ = LL_UNKNOWN_CHAR;
    }
    return { buff, std::string::size_type(outchars - buff) };
}

auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
{
    const U16* base = inchars;
    U16 cur_char = *inchars++;
    llwchar char32 = cur_char;
    if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
    {
        // Surrogates
        char32 = ((llwchar)(cur_char - 0xD800)) << 10;
        cur_char = *inchars++;
        char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
    }
    else
    {
        char32 = (llwchar)cur_char;
    }
    *outchar = char32;
    return inchars - base;
}

llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
{
    // ostringstream for llutf16string
    std::basic_ostringstream<U16> out;

    S32 i = 0;
    while (i < len)
    {
        U32 cur_char = utf32str[i];
        if (cur_char > 0xFFFF)
        {
            out.put(U16(0xD7C0 + (cur_char >> 10)));
            out.put(U16(0xDC00 | (cur_char & 0x3FF)));
        }
        else
        {
            out.put(U16(cur_char));
        }
        i++;
    }
    return out.str();
}

llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
{
    LLWString wstr = utf8str_to_wstring ( utf8str, len );
    return wstring_to_utf16str ( wstr );
}

LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
{
    if (len == 0) return {};

    // MS doesn't support std::basic_ostringstream<llwchar>; have to work
    // around it.
    std::vector<llwchar> wout;
    // We want to minimize allocations. We don't know how many llwchars we'll
    // generate from this utf16str, but we do know the length should be at
    // most len. So if we reserve 'len' llwchars, we shouldn't need to expand
    // wout incrementally.
    wout.reserve(len);

    S32 i = 0;
    const U16* chars16 = utf16str;
    while (i < len)
    {
        llwchar cur_char;
        i += (S32)utf16chars_to_wchar(chars16+i, &cur_char);
        wout.push_back(cur_char);
    }
    return { wout.begin(), wout.end() };
}

// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
{
    S32 surrogate_pairs = 0;
    // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
    const U16 *const utf16_chars = &(*(utf16str.begin()));
    S32 i = 0;
    while (i < utf16_len)
    {
        const U16 c = utf16_chars[i++];
        if (c >= 0xD800 && c <= 0xDBFF)     // See http://en.wikipedia.org/wiki/UTF-16
        {   // Have first byte of a surrogate pair
            if (i >= utf16_len)
            {
                break;
            }
            const U16 d = utf16_chars[i];
            if (d >= 0xDC00 && d <= 0xDFFF)
            {   // Have valid second byte of a surrogate pair
                surrogate_pairs++;
                i++;
            }
        }
    }
    return utf16_len - surrogate_pairs;
}

// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
{
    const S32 end = llmin((S32)wstr.length(), woffset + wlen);
    if (end < woffset)
    {
        return 0;
    }
    else
    {
        S32 length = end - woffset;
        for (S32 i = woffset; i < end; i++)
        {
            if (wstr[i] >= 0x10000)
            {
                length++;
            }
        }
        return length;
    }
}

// Given a wstring and an offset in it, returns the length as wstring (i.e.,
// number of llwchars) of the longest substring that starts at the offset
// and whose equivalent utf-16 string does not exceeds the given utf16_length.
S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, bool *unaligned)
{
    const auto end = wstr.length();
    bool u{ false };
    S32 n = woffset + utf16_length;
    S32 i = woffset;
    while (i < end)
    {
        if (wstr[i] >= 0x10000)
        {
            --n;
        }
        if (i >= n)
        {
            u = (i > n);
            break;
        }
        i++;
    }
    if (unaligned)
    {
        *unaligned = u;
    }
    return i - woffset;
}

S32 wchar_utf8_length(const llwchar wc)
{
    if (wc < 0x80)
    {
        return 1;
    }
    else if (wc < 0x800)
    {
        return 2;
    }
    else if (wc < 0x10000)
    {
        return 3;
    }
    else if (wc < 0x200000)
    {
        return 4;
    }
    else if (wc < 0x4000000)
    {
        return 5;
    }
    else
    {
        return 6;
    }
}

std::string wchar_utf8_preview(const llwchar wc)
{
    std::ostringstream oss;
    oss << std::hex << std::uppercase << (U32)wc;

    auto out_bytes = wchar_to_utf8chars(wc);

    if (out_bytes.length() > 1)
    {
        oss << " [";
        for (U32 i = 0; i < out_bytes.length(); ++i)
        {
            if (i)
            {
                oss << ", ";
            }
            oss << (int)out_bytes[i];
        }
        oss << "]";
    }

    return oss.str();
}

S32 wstring_utf8_length(const LLWString& wstr)
{
    S32 len = 0;
    for (S32 i = 0; i < (S32)wstr.length(); i++)
    {
        len += wchar_utf8_length(wstr[i]);
    }
    return len;
}

LLWString utf8str_to_wstring(const char* utf8str, size_t len)
{
    // MS doesn't support std::basic_ostringstream<llwchar>; have to work
    // around it.
    std::vector<llwchar> wout;
    // We want to minimize allocations. We don't know how many llwchars we'll
    // generate from this utf8str, but we do know the length should be at most
    // len. So if we reserve 'len' llwchars, we shouldn't need to expand wout
    // incrementally.
    wout.reserve(len);

    S32 i = 0;
    while (i < len)
    {
        llwchar unichar;
        U8 cur_char = utf8str[i];

        if (cur_char < 0x80)
        {
            // Ascii character, just add it
            unichar = cur_char;
        }
        else
        {
            S32 cont_bytes = 0;
            if ((cur_char >> 5) == 0x6)         // Two byte UTF8 -> 1 UTF32
            {
                unichar = (0x1F&cur_char);
                cont_bytes = 1;
            }
            else if ((cur_char >> 4) == 0xe)    // Three byte UTF8 -> 1 UTF32
            {
                unichar = (0x0F&cur_char);
                cont_bytes = 2;
            }
            else if ((cur_char >> 3) == 0x1e)   // Four byte UTF8 -> 1 UTF32
            {
                unichar = (0x07&cur_char);
                cont_bytes = 3;
            }
            else if ((cur_char >> 2) == 0x3e)   // Five byte UTF8 -> 1 UTF32
            {
                unichar = (0x03&cur_char);
                cont_bytes = 4;
            }
            else if ((cur_char >> 1) == 0x7e)   // Six byte UTF8 -> 1 UTF32
            {
                unichar = (0x01&cur_char);
                cont_bytes = 5;
            }
            else
            {
                wout.push_back(LL_UNKNOWN_CHAR);
                ++i;
                continue;
            }

            // Check that this character doesn't go past the end of the string
            auto end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
            do
            {
                ++i;

                cur_char = utf8str[i];
                if ( (cur_char >> 6) == 0x2 )
                {
                    unichar <<= 6;
                    unichar += (0x3F&cur_char);
                }
                else
                {
                    // Malformed sequence - roll back to look at this as a new char
                    unichar = LL_UNKNOWN_CHAR;
                    --i;
                    break;
                }
            } while(i < end);

            // Handle overlong characters and NULL characters
            if ( ((cont_bytes == 1) && (unichar < 0x80))
                || ((cont_bytes == 2) && (unichar < 0x800))
                || ((cont_bytes == 3) && (unichar < 0x10000))
                || ((cont_bytes == 4) && (unichar < 0x200000))
                || ((cont_bytes == 5) && (unichar < 0x4000000)) )
            {
                unichar = LL_UNKNOWN_CHAR;
            }
        }

        wout.push_back(unichar);
        ++i;
    }
    return { wout.begin(), wout.end() };
}

std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
{
    std::ostringstream out;

    for (size_t i = 0; i < len; ++i)
    {
        out << wchar_to_utf8chars(utf32str[i]);
    }
    return out.str();
}

std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
{
    return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
}

std::string utf8str_trim(const std::string& utf8str)
{
    LLWString wstr = utf8str_to_wstring(utf8str);
    LLWStringUtil::trim(wstr);
    return wstring_to_utf8str(wstr);
}


std::string utf8str_tolower(const std::string& utf8str)
{
    LLWString out_str = utf8str_to_wstring(utf8str);
    LLWStringUtil::toLower(out_str);
    return wstring_to_utf8str(out_str);
}


S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
{
    LLWString wlhs = utf8str_to_wstring(lhs);
    LLWString wrhs = utf8str_to_wstring(rhs);
    return LLWStringUtil::compareInsensitive(wlhs, wrhs);
}

std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
{
    if (0 == max_len)
    {
        return std::string();
    }
    if ((S32)utf8str.length() <= max_len)
    {
        return utf8str;
    }
    else
    {
        S32 cur_char = max_len;

        // If we're ASCII, we don't need to do anything
        if ((U8)utf8str[cur_char] > 0x7f)
        {
            // If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
            // to the first character
            while (0x80 == (0xc0 & utf8str[cur_char]))
            {
                cur_char--;
                // Keep moving forward until we hit the first char;
                if (cur_char == 0)
                {
                    // Make sure we don't trash memory if we've got a bogus string.
                    break;
                }
            }
        }
        // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
        return utf8str.substr(0, cur_char);
    }
}

std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len)
{
    if (0 == symbol_len)
    {
        return std::string();
    }
    if ((S32)utf8str.length() <= symbol_len)
    {
        return utf8str;
    }
    else
    {
        int len = 0, byteIndex = 0;
        const char* aStr = utf8str.c_str();
        size_t origSize = utf8str.size();

        for (byteIndex = 0; len < symbol_len && byteIndex < origSize; byteIndex++)
        {
            if ((aStr[byteIndex] & 0xc0) != 0x80)
            {
                len += 1;
            }
        }
        return utf8str.substr(0, byteIndex);
    }
}

std::string utf8str_substChar(
    const std::string& utf8str,
    const llwchar target_char,
    const llwchar replace_char)
{
    LLWString wstr = utf8str_to_wstring(utf8str);
    LLWStringUtil::replaceChar(wstr, target_char, replace_char);
    //wstr = wstring_substChar(wstr, target_char, replace_char);
    return wstring_to_utf8str(wstr);
}

std::string utf8str_makeASCII(const std::string& utf8str)
{
    LLWString wstr = utf8str_to_wstring(utf8str);
    LLWStringUtil::_makeASCII(wstr);
    return wstring_to_utf8str(wstr);
}

std::string mbcsstring_makeASCII(const std::string& wstr)
{
    // Replace non-ASCII chars with replace_char
    std::string out_str = wstr;
    for (S32 i = 0; i < (S32)out_str.length(); i++)
    {
        if ((U8)out_str[i] > 0x7f)
        {
            out_str[i] = LL_UNKNOWN_CHAR;
        }
    }
    return out_str;
}

std::string utf8str_removeCRLF(const std::string& utf8str)
{
    if (0 == utf8str.length())
    {
        return std::string();
    }
    const char CR = 13;

    std::string out;
    out.reserve(utf8str.length());
    const S32 len = (S32)utf8str.length();
    for( S32 i = 0; i < len; i++ )
    {
        if( utf8str[i] != CR )
        {
            out.push_back(utf8str[i]);
        }
    }
    return out;
}

llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length)
{
    switch (length)
    {
    case 2:
        return ((utf8str[offset] & 0x1F) << 6) +
                (utf8str[offset + 1] & 0x3F);
    case 3:
        return ((utf8str[offset] & 0x0F) << 12) +
                ((utf8str[offset + 1] & 0x3F) << 6) +
                (utf8str[offset + 2] & 0x3F);
    case 4:
        return ((utf8str[offset] & 0x07) << 18) +
                ((utf8str[offset + 1] & 0x3F) << 12) +
                ((utf8str[offset + 2] & 0x3F) << 6) +
                (utf8str[offset + 3] & 0x3F);
    case 5:
        return ((utf8str[offset] & 0x03) << 24) +
                ((utf8str[offset + 1] & 0x3F) << 18) +
                ((utf8str[offset + 2] & 0x3F) << 12) +
                ((utf8str[offset + 3] & 0x3F) << 6) +
                (utf8str[offset + 4] & 0x3F);
    case 6:
        return ((utf8str[offset] & 0x01) << 30) +
                ((utf8str[offset + 1] & 0x3F) << 24) +
                ((utf8str[offset + 2] & 0x3F) << 18) +
                ((utf8str[offset + 3] & 0x3F) << 12) +
                ((utf8str[offset + 4] & 0x3F) << 6) +
                (utf8str[offset + 5] & 0x3F);
    case 7:
        return ((utf8str[offset + 1] & 0x03) << 30) +
                ((utf8str[offset + 2] & 0x3F) << 24) +
                ((utf8str[offset + 3] & 0x3F) << 18) +
                ((utf8str[offset + 4] & 0x3F) << 12) +
                ((utf8str[offset + 5] & 0x3F) << 6) +
                (utf8str[offset + 6] & 0x3F);
    }
    return LL_UNKNOWN_CHAR;
}

std::string utf8str_showBytesUTF8(const std::string& utf8str)
{
    std::ostringstream result;
    char lastchar = '\0';
    auto append = [&result, &lastchar](char c)
    {
        lastchar = c;
        result << c;
    };
    auto appends = [&result, &lastchar](const std::string& s)
    {
        if (! s.empty())
        {
            lastchar = s.back();
            result << s;
        }
    };

    bool in_sequence = false;
    size_t sequence_size = 0;
    size_t byte_index = 0;
    size_t source_length = utf8str.size();

    auto open_sequence = [&]()
        {
            if (lastchar != '\0' && lastchar != '\n')
                append('\n'); // Use LF as a separator before new UTF-8 sequence
            append('[');
            in_sequence = true;
        };

    auto close_sequence = [&]()
        {
            llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size);
            if (unicode != LL_UNKNOWN_CHAR)
            {
                appends(llformat("+%04X", unicode));
            }
            append(']');
            in_sequence = false;
            sequence_size = 0;
        };

    while (byte_index < source_length)
    {
        U8 byte = utf8str[byte_index];
        if (byte >= 0x80) // Part of an UTF-8 sequence
        {
            if (!in_sequence) // Start new UTF-8 sequence
            {
                open_sequence();
            }
            else if (byte >= 0xC0) // Start another UTF-8 sequence
            {
                close_sequence();
                open_sequence();
            }
            else // Continue the same UTF-8 sequence
            {
                append('.');
            }
            appends(llformat("%02X", byte)); // The byte is represented in hexadecimal form
            ++sequence_size;
        }
        else // ASCII symbol is represented as a character
        {
            if (in_sequence) // End of UTF-8 sequence
            {
                close_sequence();
                if (byte != '\n')
                {
                    append('\n'); // Use LF as a separator between UTF-8 and ASCII
                }
            }
            append(byte);
        }
        ++byte_index;
    }

    if (in_sequence) // End of UTF-8 sequence
    {
        close_sequence();
    }

    return result.str();
}

// Search for any emoji symbol, return true if found
bool wstring_has_emoji(LLWStringView wstr)
{
    for (const llwchar& wch : wstr)
    {
        if (LLStringOps::isEmoji(wch))
            return true;
    }

    return false;
}

// Cut emoji symbols if exist
bool wstring_remove_emojis(LLWString& wstr)
{
    bool found = false;
    for (size_t i = 0; i < wstr.size(); ++i)
    {
        if (LLStringOps::isEmoji(wstr[i]))
        {
            wstr.erase(i--, 1);
            found = true;
        }
    }
    return found;
}

// Cut emoji symbols if exist
bool utf8str_remove_emojis(std::string& utf8str)
{
    LLWString wstr = utf8str_to_wstring(utf8str);
    if (!wstring_remove_emojis(wstr))
        return false;
    utf8str = wstring_to_utf8str(wstr);
    return true;
}

#if LL_WINDOWS
unsigned int ll_wstring_default_code_page()
{
    return CP_UTF8;
}

std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
{
    std::string out;
    if(in)
    {
        int len_out = WideCharToMultiByte(
            code_page,
            0,
            in,
            static_cast<int>(len_in),
            NULL,
            0,
            0,
            0);
        // We will need two more bytes for the double NULL ending
        // created in WideCharToMultiByte().
        char* pout = new char [len_out + 2];
        memset(pout, 0, len_out + 2);
        if(pout)
        {
            WideCharToMultiByte(
                code_page,
                0,
                in,
                static_cast<int>(len_in),
                pout,
                len_out,
                0,
                0);
            out.assign(pout);
            delete[] pout;
        }
    }
    return out;
}

std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
{
    // From review:
    // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
    // plus one for a null terminator, and be guaranteed to not overflow.

    //  Normally, I'd call that sort of thing premature optimization,
    // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
//  int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);

    // reserve an output buffer that will be destroyed on exit, with a place
    // to put NULL terminator
    std::vector<wchar_t> w_out(len + 1);

    memset(&w_out[0], 0, w_out.size());
    int real_output_str_len = MultiByteToWideChar(code_page, 0, in, static_cast<int>(len),
                                                  &w_out[0], static_cast<int>(w_out.size() - 1));

    //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
    w_out[real_output_str_len] = 0;

    // construct string<wchar_t> from our temporary output buffer
    return {&w_out[0]};
}

LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
{
    // Whether or not std::wstring and llutf16string are distinct types, they
    // both hold UTF-16LE characters. (See header file comments.) Pretend this
    // wchar_t* sequence is really a U16* sequence and use the conversion we
    // define above.
    return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
}

std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
{
    // first, convert to llutf16string, for which we have a real implementation
    auto utf16str{ wstring_to_utf16str(in, len) };
    // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
    // string pointer is a wchar_t* and instantiate a std::wstring of the same
    // length.
    return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
}

std::string ll_convert_string_to_utf8_string(const std::string& in)
{
    // If you pass code_page, you must also pass length, otherwise the code
    // page parameter will be mistaken for length.
    auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP);
    // CP_UTF8 is default -- see ll_wstring_default_code_page() above.
    return ll_convert_wide_to_string(w_mesg);
}

namespace
{

void HeapFree_deleter(void* ptr)
{
    // instead of LocalFree(), per https://stackoverflow.com/a/31541205
    HeapFree(GetProcessHeap(), NULL, ptr);
}

} // anonymous namespace

unsigned long windows_get_last_error()
{
    return GetLastError();
}

template<>
std::wstring windows_message<std::wstring>(DWORD error)
{
    // derived from https://stackoverflow.com/a/455533
    wchar_t* rawptr = nullptr;
    auto okay = FormatMessageW(
        // use system message tables for GetLastError() codes
        FORMAT_MESSAGE_FROM_SYSTEM |
        // internally allocate buffer and return its pointer
        FORMAT_MESSAGE_ALLOCATE_BUFFER |
        // you cannot pass insertion parameters (thanks Gandalf)
        FORMAT_MESSAGE_IGNORE_INSERTS |
        // ignore line breaks in message definition text
        FORMAT_MESSAGE_MAX_WIDTH_MASK,
        NULL,                       // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM
        error,                      // dwMessageId
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId
        (LPWSTR)&rawptr,         // lpBuffer: force-cast wchar_t** to wchar_t*
        0,                // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER
        NULL);            // Arguments, unused

    // make a unique_ptr from rawptr so it gets cleaned up properly
    std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter);

    if (okay && bufferptr)
    {
        // got the message, return it ('okay' is length in characters)
        return { bufferptr.get(), okay };
    }

    // did not get the message, synthesize one
    auto format_message_error = GetLastError();
    std::wostringstream out;
    out << L"GetLastError() " << error << L" (FormatMessageW() failed with "
        << format_message_error << L")";
    return out.str();
}

std::optional<std::wstring> llstring_getoptenv(const std::string& key)
{
    auto wkey = ll_convert_string_to_wide(key);
    // Take a wild guess as to how big the buffer should be.
    std::vector<wchar_t> buffer(1024);
    auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], static_cast<DWORD>(buffer.size()));
    // If our initial guess was too short, n will indicate the size (in
    // wchar_t's) that buffer should have been, including the terminating nul.
    if (n > (buffer.size() - 1))
    {
        // make it big enough
        buffer.resize(n);
        // and try again
        n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], static_cast<DWORD>(buffer.size()));
    }
    // did that (ultimately) succeed?
    if (n)
    {
        // great, return populated std::optional
        return std::make_optional<std::wstring>(&buffer[0]);
    }

    // not successful
    auto last_error = GetLastError();
    // Don't bother warning for NOT_FOUND; that's an expected case
    if (last_error != ERROR_ENVVAR_NOT_FOUND)
    {
        LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
                   << windows_message<std::string>(last_error) << LL_ENDL;
    }
    // return empty std::optional
    return {};
}

#else  // ! LL_WINDOWS

std::optional<std::string> llstring_getoptenv(const std::string& key)
{
    auto found = getenv(key.c_str());
    if (found)
    {
        // return populated std::optional
        return std::make_optional<std::string>(found);
    }
    else
    {
        // return empty std::optional
        return {};
    }
}

#endif // ! LL_WINDOWS

long LLStringOps::sPacificTimeOffset = 0;
long LLStringOps::sLocalTimeOffset = 0;
bool LLStringOps::sPacificDaylightTime = 0;
std::map<std::string, std::string> LLStringOps::datetimeToCodes;

std::vector<std::string> LLStringOps::sWeekDayList;
std::vector<std::string> LLStringOps::sWeekDayShortList;
std::vector<std::string> LLStringOps::sMonthList;
std::vector<std::string> LLStringOps::sMonthShortList;


std::string LLStringOps::sDayFormat;
std::string LLStringOps::sAM;
std::string LLStringOps::sPM;

// static
bool LLStringOps::isEmoji(llwchar a)
{
#if 0   // Do not consider special characters that might have a corresponding
        // glyph in the monochorme fallback fonts as a "genuine" emoji. HB
    return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) ||
           (a >= 0x1f000 && a < 0x20000);
#else
    // These are indeed "genuine" emojis, we *do want* rendered as such. HB
    return a >= 0x1f000 && a < 0x20000;
#endif
    }

S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
{
    #if LL_WINDOWS
        // in Windows, wide string functions operator on 16-bit strings,
        // not the proper 32 bit wide string
        return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
    #else
        return wcscoll(a, b);
    #endif
}

void LLStringOps::setupDatetimeInfo (bool daylight)
{
    time_t nowT, localT, gmtT;
    struct tm * tmpT;

    nowT = time (NULL);

    tmpT = gmtime (&nowT);
    gmtT = mktime (tmpT);

    tmpT = localtime (&nowT);
    localT = mktime (tmpT);

    sLocalTimeOffset = (long) (gmtT - localT);
    if (tmpT->tm_isdst)
    {
        sLocalTimeOffset -= 60 * 60;    // 1 hour
    }

    sPacificDaylightTime = daylight;
    sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;

    datetimeToCodes["wkday"]    = "%a";     // Thu
    datetimeToCodes["weekday"]  = "%A";     // Thursday
    datetimeToCodes["year4"]    = "%Y";     // 2009
    datetimeToCodes["year"]     = "%Y";     // 2009
    datetimeToCodes["year2"]    = "%y";     // 09
    datetimeToCodes["mth"]      = "%b";     // Aug
    datetimeToCodes["month"]    = "%B";     // August
    datetimeToCodes["mthnum"]   = "%m";     // 08
    datetimeToCodes["day"]      = "%d";     // 31
    datetimeToCodes["sday"]     = "%-d";    // 9
    datetimeToCodes["hour24"]   = "%H";     // 14
    datetimeToCodes["hour"]     = "%H";     // 14
    datetimeToCodes["hour12"]   = "%I";     // 02
    datetimeToCodes["min"]      = "%M";     // 59
    datetimeToCodes["ampm"]     = "%p";     // AM
    datetimeToCodes["second"]   = "%S";     // 59
    datetimeToCodes["timezone"] = "%Z";     // PST
}

void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output)
{
    output.clear();
    size_t length = data.size();

    // tokenize it and put it in the array
    std::string cur_word;
    for(size_t i = 0; i < length; ++i)
    {
        if(data[i] == ':')
        {
            output.push_back(cur_word);
            cur_word.clear();
        }
        else
        {
            cur_word.append(1, data[i]);
        }
    }
    output.push_back(cur_word);
}

void LLStringOps::setupWeekDaysNames(const std::string& data)
{
    tokenizeStringToArray(data,sWeekDayList);
}
void LLStringOps::setupWeekDaysShortNames(const std::string& data)
{
    tokenizeStringToArray(data,sWeekDayShortList);
}
void LLStringOps::setupMonthNames(const std::string& data)
{
    tokenizeStringToArray(data,sMonthList);
}
void LLStringOps::setupMonthShortNames(const std::string& data)
{
    tokenizeStringToArray(data,sMonthShortList);
}
void LLStringOps::setupDayFormat(const std::string& data)
{
    sDayFormat = data;
}


std::string LLStringOps::getDatetimeCode(std::string key)
{
    std::map<std::string, std::string>::iterator iter = datetimeToCodes.find(key);
    return iter == datetimeToCodes.end() ? LLStringUtil::null : iter->second;
}

void LLStringOps::splitString(const std::string& text, char delimiter,
    std::function<void(const std::string&)> handler)
{
    std::size_t from = 0;
    for (std::size_t i = 0; i < text.size(); ++i)
    {
        if (text[i] == delimiter)
        {
            handler(text.substr(from, i - from));
            from = i + 1;
        }
    }
    if (from <= text.size())
    {
        handler(text.substr(from));
    }
}

std::string LLStringOps::getReadableNumber(F64 num)
{
    if (fabs(num)>=1e9)
    {
        return llformat("%.2lfB", num / 1e9);
    }
    else if (fabs(num)>=1e6)
    {
        return llformat("%.2lfM", num / 1e6);
    }
    else if (fabs(num)>=1e3)
    {
        return llformat("%.2lfK", num / 1e3);
    }
    else
    {
        return llformat("%.2lf", num);
    }
}

namespace LLStringFn
{
    // NOTE - this restricts output to ascii
    void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
    {
        const char MIN = 0x20;
        std::basic_string<char>::size_type len = string.size();
        for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
        {
            if(string[ii] < MIN)
            {
                string[ii] = replacement;
            }
        }
    }


    // NOTE - this restricts output to ascii
    void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
                                       char replacement)
    {
        const char MIN  = 0x20;
        const char PIPE = 0x7c;
        std::basic_string<char>::size_type len = str.size();
        for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
        {
            if( (str[ii] < MIN) || (str[ii] == PIPE) )
            {
                str[ii] = replacement;
            }
        }
    }

    // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
    // allowable code points for XML. Specifically, they are:
    // 0x09, 0x0a, 0x0d, and 0x20 on up.  JC
    std::string strip_invalid_xml(const std::string& instr)
    {
        std::string output;
        output.reserve( instr.size() );
        std::string::const_iterator it = instr.begin();
        while (it != instr.end())
        {
            // Must compare as unsigned for >=
            // Test most likely match first
            const unsigned char c = (unsigned char)*it;
            if (   c >= (unsigned char)0x20   // SPACE
                || c == (unsigned char)0x09   // TAB
                || c == (unsigned char)0x0a   // LINE_FEED
                || c == (unsigned char)0x0d ) // CARRIAGE_RETURN
            {
                output.push_back(c);
            }
            ++it;
        }
        return output;
    }

    using literals_t = std::map<char, std::string>;
    static const literals_t xml_elem_literals =
    {
        { '<', "&lt;" },
        { '>', "&gt;" },
        { '&', "&amp;" }
    };
    static const literals_t xml_attr_literals =
    {
        { '"', "&quot;" },
        { '\'', "&apos;" }
    };

    static void literals_encode(std::string& text, const literals_t& literals)
    {
        for (const std::pair<char, std::string> it : literals)
        {
            std::string::size_type pos = 0;
            while ((pos = text.find(it.first, pos)) != std::string::npos)
            {
                text.replace(pos, 1, it.second);
                pos += it.second.size();
            }
        }
    }

    static void literals_decode(std::string& text, const literals_t& literals)
    {
        for (const std::pair<char, std::string> it : literals)
        {
            std::string::size_type pos = 0;
            while ((pos = text.find(it.second, pos)) != std::string::npos)
            {
                text[pos++] = it.first;
                text.erase(pos, it.second.size() - 1);
            }
        }
    }

    /**
     * @brief Replace all characters that are not allowed in XML 1.0
     * with corresponding literals: [ < > & ] => [ &lt; &gt; &amp; ]
     */
    std::string xml_encode(const std::string& input, bool for_attribute)
    {
        std::string result(input);
        literals_encode(result, xml_elem_literals);
        if (for_attribute)
        {
            literals_encode(result, xml_attr_literals);
        }
        return result;
    }

    /**
     * @brief Replace some of XML literals that are defined in XML 1.0
     * with corresponding characters: [ &lt; &gt; &amp; ] => [ < > & ]
     */
    std::string xml_decode(const std::string& input, bool for_attribute)
    {
        std::string result(input);
        literals_decode(result, xml_elem_literals);
        if (for_attribute)
        {
            literals_decode(result, xml_attr_literals);
        }
        return result;
    }

    /**
     * @brief Replace all control characters (c < 0x20) with replacement in
     * string.
     */
    void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
    {
        const unsigned char MIN = 0x20;
        std::basic_string<char>::size_type len = string.size();
        for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
        {
            const unsigned char c = (unsigned char) string[ii];
            if(c < MIN)
            {
                string[ii] = replacement;
            }
        }
    }
}

////////////////////////////////////////////////////////////

// Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime.
template<>
S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);

//static
template<>
void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
{
    // Starting at offset 0, scan forward for the next non-delimiter. We're
    // done when the only characters left in 'instr' are delimiters.
    for (std::string::size_type begIdx, endIdx = 0;
         (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; )
    {
        // Found a non-delimiter. After that, find the next delimiter.
        endIdx = instr.find_first_of (delims, begIdx);
        if (endIdx == std::string::npos)
        {
            // No more delimiters: this token extends to the end of the string.
            endIdx = instr.length();
        }

        // extract the token between begIdx and endIdx; substr() needs length
        std::string currToken(instr.substr(begIdx, endIdx - begIdx));
        LLStringUtil::trim (currToken);
        tokens.push_back(currToken);
        // next scan past delimiters starts at endIdx
    }
}

template<>
LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens)
{
    const std::string delims (",");

    // Find the first [
    size_type pos1 = instr.find('[', start);
    if (pos1 == std::string::npos)
        return std::string::npos;

    //Find the first ] after the initial [
    size_type pos2 = instr.find(']', pos1);
    if (pos2 == std::string::npos)
        return std::string::npos;

    // Find the last [ before ] in case of nested [[]]
    pos1 = instr.find_last_of('[', pos2-1);
    if (pos1 == std::string::npos || pos1 < start)
        return std::string::npos;

    getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims);
    start = pos2+1;

    return pos1;
}

// static
template<>
bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions)
{
    // see if we have a replacement for the bracketed string (without the brackets)
    // test first using has() because if we just look up with operator[] we get back an
    // empty string even if the value is missing. We want to distinguish between
    // missing replacements and deliberately empty replacement strings.
    format_map_t::const_iterator iter = substitutions.find(token);
    if (iter != substitutions.end())
    {
        replacement = iter->second;
        return true;
    }
    // if not, see if there's one WITH brackets
    iter = substitutions.find(std::string("[" + token + "]"));
    if (iter != substitutions.end())
    {
        replacement = iter->second;
        return true;
    }

    return false;
}

// static
template<>
bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions)
{
    // see if we have a replacement for the bracketed string (without the brackets)
    // test first using has() because if we just look up with operator[] we get back an
    // empty string even if the value is missing. We want to distinguish between
    // missing replacements and deliberately empty replacement strings.
    if (substitutions.has(token))
    {
        replacement = substitutions[token].asString();
        return true;
    }
    // if not, see if there's one WITH brackets
    else if (substitutions.has(std::string("[" + token + "]")))
    {
        replacement = substitutions[std::string("[" + token + "]")].asString();
        return true;
    }

    return false;
}

//static
template<>
void LLStringUtil::setLocale(std::string inLocale)
{
    if(startsWith(inLocale, "MissingString"))
    {
        // it seems this hasn't been working for some time, and I'm not sure how it is intentded to
        // properly discover the correct locale.  early out now to avoid failures later in
        // formatNumber()
        LL_WARNS() << "Failed attempting to set invalid locale: " << inLocale << LL_ENDL;
        return;
    }
    sLocale = inLocale;
};

//static
template<>
std::string LLStringUtil::getLocale(void)
{
    return sLocale;
};

// static
template<>
void LLStringUtil::formatNumber(std::string& numStr, std::string decimals)
{
    std::stringstream strStream;
    S32 intDecimals = 0;

    convertToS32 (decimals, intDecimals);
    if (!sLocale.empty())
    {
        // std::locale() throws if the locale is unknown! (EXT-7926)
        try
        {
            strStream.imbue(std::locale(sLocale.c_str()));
        } catch (const std::exception &)
        {
            LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL;
        }
    }

    if (!intDecimals)
    {
        S32 intStr;

        if (convertToS32(numStr, intStr))
        {
            strStream << intStr;
            numStr = strStream.str();
        }
    }
    else
    {
        F32 floatStr;

        if (convertToF32(numStr, floatStr))
        {
            strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr;
            numStr = strStream.str();
        }
    }
}

// static
template<>
bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
                                  std::string param, S32 secFromEpoch)
{
    if (param == "local")   // local
    {
        secFromEpoch -= LLStringOps::getLocalTimeOffset();
    }
    else if (param != "utc") // slt
    {
        secFromEpoch -= LLStringOps::getPacificTimeOffset();
    }

    // if never fell into those two ifs above, param must be utc
    if (secFromEpoch < 0) secFromEpoch = 0;

    LLDate datetime((F64)secFromEpoch);
    std::string code = LLStringOps::getDatetimeCode (token);

    // special case to handle timezone
    if (code == "%Z") {
        if (param == "utc")
        {
            replacement = "GMT";
        }
        else if (param == "local")
        {
            replacement = "";       // user knows their own timezone
        }
        else
        {
#if 0
            // EXT-1565 : Zai Lynch, James Linden : 15/Oct/09
            // [BSI] Feedback: Viewer clock mentions SLT, but would prefer it to show PST/PDT
            // "slt" = Second Life Time, which is deprecated.
            // If not utc or user local time, fallback to Pacific time
            replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST";
#else
            // SL-20370 : Steeltoe Linden : 29/Sep/23
            // Change "PDT" to "SLT" on menu bar
            replacement = "SLT";
#endif
        }
        return true;
    }

    //EXT-7013
    //few codes are not suppotred by strtime function (example - weekdays for Japanise)
    //so use predefined ones

    //if sWeekDayList is not empty than current locale doesn't support
        //weekday name.
    time_t loc_seconds = (time_t) secFromEpoch;
    if(LLStringOps::sWeekDayList.size() == 7 && code == "%A")
    {
        struct tm * gmt = gmtime (&loc_seconds);
        replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
    }
    else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
    {
        struct tm * gmt = gmtime (&loc_seconds);
        replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
    }
    else if(LLStringOps::sMonthList.size() == 12 && code == "%B")
    {
        struct tm * gmt = gmtime (&loc_seconds);
        replacement = LLStringOps::sMonthList[gmt->tm_mon];
    }
    else if( !LLStringOps::sDayFormat.empty() && code == "%d" )
    {
        struct tm * gmt = gmtime (&loc_seconds);
        LLStringUtil::format_map_t args;
        args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
        replacement = LLStringOps::sDayFormat;
        LLStringUtil::format(replacement, args);
    }
    else if (code == "%-d")
    {
        struct tm * gmt = gmtime (&loc_seconds);
        replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero
    }
    else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" )
    {
        struct tm * gmt = gmtime (&loc_seconds);
        if(gmt->tm_hour<12)
        {
            replacement = LLStringOps::sAM;
        }
        else
        {
            replacement = LLStringOps::sPM;
        }
    }
    else
    {
        replacement = datetime.toHTTPDateString(code);
    }

    // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format
    // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738).
    // We could have used '%l' format instead, but it's not supported by Windows.
    if(code == "%I" && token == "hour12" && replacement.at(0) == '0')
    {
        replacement = replacement.at(1);
    }

    return !code.empty();
}

// LLStringUtil::format recogizes the following patterns.
// All substitutions *must* be encased in []'s in the input string.
// The []'s are optional in the substitution map.
// [FOO_123]
// [FOO,number,precision]
// [FOO,datetime,format]


// static
template<>
S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
{
    LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
    S32 res = 0;

    std::ostringstream output;
    std::vector<std::string> tokens;

    std::string::size_type start = 0;
    std::string::size_type prev_start = 0;
    std::string::size_type key_start = 0;
    while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
    {
        output << std::string(s, prev_start, key_start-prev_start);
        prev_start = start;

        bool found_replacement = false;
        std::string replacement;

        if (tokens.size() == 0)
        {
            found_replacement = false;
        }
        else if (tokens.size() == 1)
        {
            found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
        }
        else if (tokens[1] == "number")
        {
            std::string param = "0";

            if (tokens.size() > 2) param = tokens[2];
            found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
            if (found_replacement) formatNumber (replacement, param);
        }
        else if (tokens[1] == "datetime")
        {
            std::string param;
            if (tokens.size() > 2) param = tokens[2];

            format_map_t::const_iterator iter = substitutions.find("datetime");
            if (iter != substitutions.end())
            {
                S32 secFromEpoch = 0;
                bool r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
                if (r)
                {
                    found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
                }
            }
        }

        if (found_replacement)
        {
            output << replacement;
            res++;
        }
        else
        {
            // we had no replacement, use the string as is
            // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
            output << std::string(s, key_start, start-key_start);
        }
        tokens.clear();
    }
    // send the remainder of the string (with no further matches for bracketed names)
    output << std::string(s, start);
    s = output.str();
    return res;
}

//static
template<>
S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
{
    LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
    S32 res = 0;

    if (!substitutions.isMap())
    {
        return res;
    }

    std::ostringstream output;
    std::vector<std::string> tokens;

    std::string::size_type start = 0;
    std::string::size_type prev_start = 0;
    std::string::size_type key_start = 0;
    while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
    {
        output << std::string(s, prev_start, key_start-prev_start);
        prev_start = start;

        bool found_replacement = false;
        std::string replacement;

        if (tokens.size() == 0)
        {
            found_replacement = false;
        }
        else if (tokens.size() == 1)
        {
            found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
        }
        else if (tokens[1] == "number")
        {
            std::string param = "0";

            if (tokens.size() > 2) param = tokens[2];
            found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
            if (found_replacement) formatNumber (replacement, param);
        }
        else if (tokens[1] == "datetime")
        {
            std::string param;
            if (tokens.size() > 2) param = tokens[2];

            S32 secFromEpoch = (S32) substitutions["datetime"].asInteger();
            found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch);
        }

        if (found_replacement)
        {
            output << replacement;
            res++;
        }
        else
        {
            // we had no replacement, use the string as is
            // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
            output << std::string(s, key_start, start-key_start);
        }
        tokens.clear();
    }
    // send the remainder of the string (with no further matches for bracketed names)
    output << std::string(s, start);
    s = output.str();
    return res;
}

////////////////////////////////////////////////////////////
// Testing

#ifdef _DEBUG

template<class T>
void LLStringUtilBase<T>::testHarness()
{
    std::string s1;

    llassert( s1.c_str() == NULL );
    llassert( s1.size() == 0 );
    llassert( s1.empty() );

    std::string s2( "hello");
    llassert( !strcmp( s2.c_str(), "hello" ) );
    llassert( s2.size() == 5 );
    llassert( !s2.empty() );
    std::string s3( s2 );

    llassert( "hello" == s2 );
    llassert( s2 == "hello" );
    llassert( s2 > "gello" );
    llassert( "gello" < s2 );
    llassert( "gello" != s2 );
    llassert( s2 != "gello" );

    std::string s4 = s2;
    llassert( !s4.empty() );
    s4.empty();
    llassert( s4.empty() );

    std::string s5("");
    llassert( s5.empty() );

    llassert( isValidIndex(s5, 0) );
    llassert( !isValidIndex(s5, 1) );

    s3 = s2;
    s4 = "hello again";

    s4 += "!";
    s4 += s4;
    llassert( s4 == "hello again!hello again!" );


    std::string s6 = s2 + " " + s2;
    std::string s7 = s6;
    llassert( s6 == s7 );
    llassert( !( s6 != s7) );
    llassert( !(s6 < s7) );
    llassert( !(s6 > s7) );

    llassert( !(s6 == "hi"));
    llassert( s6 == "hello hello");
    llassert( s6 < "hi");

    llassert( s6[1] == 'e' );
    s6[1] = 'f';
    llassert( s6[1] == 'f' );

    s2.erase( 4, 1 );
    llassert( s2 == "hell");
    s2.insert( 0, "y" );
    llassert( s2 == "yhell");
    s2.erase( 1, 3 );
    llassert( s2 == "yl");
    s2.insert( 1, "awn, don't yel");
    llassert( s2 == "yawn, don't yell");

    std::string s8 = s2.substr( 6, 5 );
    llassert( s8 == "don't"  );

    std::string s9 = "   \t\ntest  \t\t\n  ";
    trim(s9);
    llassert( s9 == "test"  );

    s8 = "abc123&*(ABC";

    s9 = s8;
    toUpper(s9);
    llassert( s9 == "ABC123&*(ABC"  );

    s9 = s8;
    toLower(s9);
    llassert( s9 == "abc123&*(abc"  );


    std::string s10( 10, 'x' );
    llassert( s10 == "xxxxxxxxxx" );

    std::string s11( "monkey in the middle", 7, 2 );
    llassert( s11 == "in" );

    std::string s12;  //empty
    s12 += "foo";
    llassert( s12 == "foo" );

    std::string s13;  //empty
    s13 += 'f';
    llassert( s13 == "f" );
}


#endif  // _DEBUG