diff options
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r-- | indra/llcommon/llstring.cpp | 3512 |
1 files changed, 1756 insertions, 1756 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 6512bbc392..b3fc9b484a 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -1,1756 +1,1756 @@ -/** - * @file llstring.cpp - * @brief String utility functions and the std::string class. - * - * $LicenseInfo:firstyear=2001&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA - * $/LicenseInfo$ - */ - -#include "linden_common.h" - -#include "llstring.h" -#include "llerror.h" -#include "llfasttimer.h" -#include "llsd.h" -#include <vector> - -#if LL_WINDOWS -#include "llwin32headerslean.h" -#include <winnls.h> // for WideCharToMultiByte -#endif - -std::string ll_safe_string(const char* in) -{ - if(in) return std::string(in); - return std::string(); -} - -std::string ll_safe_string(const char* in, S32 maxlen) -{ - if(in && maxlen > 0 ) return std::string(in, maxlen); - - return std::string(); -} - -bool is_char_hex(char hex) -{ - if((hex >= '0') && (hex <= '9')) - { - return true; - } - else if((hex >= 'a') && (hex <='f')) - { - return true; - } - else if((hex >= 'A') && (hex <='F')) - { - return true; - } - return false; // uh - oh, not hex any more... -} - -U8 hex_as_nybble(char hex) -{ - if((hex >= '0') && (hex <= '9')) - { - return (U8)(hex - '0'); - } - else if((hex >= 'a') && (hex <='f')) - { - return (U8)(10 + hex - 'a'); - } - else if((hex >= 'A') && (hex <='F')) - { - return (U8)(10 + hex - 'A'); - } - return 0; // uh - oh, not hex any more... -} - -bool iswindividual(llwchar elem) -{ - U32 cur_char = (U32)elem; - bool result = false; - if (0x2E80<= cur_char && cur_char <= 0x9FFF) - { - result = true; - } - else if (0xAC00<= cur_char && cur_char <= 0xD7A0 ) - { - result = true; - } - else if (0xF900<= cur_char && cur_char <= 0xFA60 ) - { - result = true; - } - return result; -} - -bool _read_file_into_string(std::string& str, const std::string& filename) -{ - llifstream ifs(filename.c_str(), llifstream::binary); - if (!ifs.is_open()) - { - LL_INFOS() << "Unable to open file " << filename << LL_ENDL; - return false; - } - - std::ostringstream oss; - - oss << ifs.rdbuf(); - str = oss.str(); - ifs.close(); - return true; -} - - - - -// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c -// for the Unicode implementation - this doesn't match because it was written before finding -// it. - - -std::ostream& operator<<(std::ostream &s, const LLWString &wstr) -{ - std::string utf8_str = wstring_to_utf8str(wstr); - s << utf8_str; - return s; -} - -std::string rawstr_to_utf8(const std::string& raw) -{ - LLWString wstr(utf8str_to_wstring(raw)); - return wstring_to_utf8str(wstr); -} - -std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars) -{ - U32 cur_char = (U32)in_char; - char* base = outchars; - if (cur_char < 0x80) - { - *outchars++ = (U8)cur_char; - } - else if (cur_char < 0x800) - { - *outchars++ = 0xC0 | (cur_char >> 6); - *outchars++ = 0x80 | (cur_char & 0x3F); - } - else if (cur_char < 0x10000) - { - *outchars++ = 0xE0 | (cur_char >> 12); - *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); - *outchars++ = 0x80 | (cur_char & 0x3F); - } - else if (cur_char < 0x200000) - { - *outchars++ = 0xF0 | (cur_char >> 18); - *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); - *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); - *outchars++ = 0x80 | (cur_char & 0x3F); - } - else if (cur_char < 0x4000000) - { - *outchars++ = 0xF8 | (cur_char >> 24); - *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F); - *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); - *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); - *outchars++ = 0x80 | (cur_char & 0x3F); - } - else if (cur_char < 0x80000000) - { - *outchars++ = 0xFC | (cur_char >> 30); - *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F); - *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F); - *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); - *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); - *outchars++ = 0x80 | (cur_char & 0x3F); - } - else - { - LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL; - *outchars++ = LL_UNKNOWN_CHAR; - } - return outchars - base; -} - -auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar) -{ - const U16* base = inchars; - U16 cur_char = *inchars++; - llwchar char32 = cur_char; - if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF)) - { - // Surrogates - char32 = ((llwchar)(cur_char - 0xD800)) << 10; - cur_char = *inchars++; - char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL; - } - else - { - char32 = (llwchar)cur_char; - } - *outchar = char32; - return inchars - base; -} - -llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) -{ - llutf16string out; - - S32 i = 0; - while (i < len) - { - U32 cur_char = utf32str[i]; - if (cur_char > 0xFFFF) - { - out += (0xD7C0 + (cur_char >> 10)); - out += (0xDC00 | (cur_char & 0x3FF)); - } - else - { - out += cur_char; - } - i++; - } - return out; -} - -llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) -{ - LLWString wstr = utf8str_to_wstring ( utf8str, len ); - return wstring_to_utf16str ( wstr ); -} - -LLWString utf16str_to_wstring(const U16* utf16str, size_t len) -{ - LLWString wout; - if (len == 0) return wout; - - S32 i = 0; - const U16* chars16 = utf16str; - while (i < len) - { - llwchar cur_char; - i += utf16chars_to_wchar(chars16+i, &cur_char); - wout += cur_char; - } - return wout; -} - -// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. -S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len) -{ - S32 surrogate_pairs = 0; - // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): - const U16 *const utf16_chars = &(*(utf16str.begin())); - S32 i = 0; - while (i < utf16_len) - { - const U16 c = utf16_chars[i++]; - if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16 - { // Have first byte of a surrogate pair - if (i >= utf16_len) - { - break; - } - const U16 d = utf16_chars[i]; - if (d >= 0xDC00 && d <= 0xDFFF) - { // Have valid second byte of a surrogate pair - surrogate_pairs++; - i++; - } - } - } - return utf16_len - surrogate_pairs; -} - -// Length in utf16string (UTF-16) of wlen wchars beginning at woffset. -S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen) -{ - const S32 end = llmin((S32)wstr.length(), woffset + wlen); - if (end < woffset) - { - return 0; - } - else - { - S32 length = end - woffset; - for (S32 i = woffset; i < end; i++) - { - if (wstr[i] >= 0x10000) - { - length++; - } - } - return length; - } -} - -// Given a wstring and an offset in it, returns the length as wstring (i.e., -// number of llwchars) of the longest substring that starts at the offset -// and whose equivalent utf-16 string does not exceeds the given utf16_length. -S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned) -{ - const auto end = wstr.length(); - BOOL u = FALSE; - S32 n = woffset + utf16_length; - S32 i = woffset; - while (i < end) - { - if (wstr[i] >= 0x10000) - { - --n; - } - if (i >= n) - { - u = (i > n); - break; - } - i++; - } - if (unaligned) - { - *unaligned = u; - } - return i - woffset; -} - -S32 wchar_utf8_length(const llwchar wc) -{ - if (wc < 0x80) - { - return 1; - } - else if (wc < 0x800) - { - return 2; - } - else if (wc < 0x10000) - { - return 3; - } - else if (wc < 0x200000) - { - return 4; - } - else if (wc < 0x4000000) - { - return 5; - } - else - { - return 6; - } -} - -std::string wchar_utf8_preview(const llwchar wc) -{ - std::ostringstream oss; - oss << std::hex << std::uppercase << (U32)wc; - - U8 out_bytes[8]; - U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes); - - if (size > 1) - { - oss << " ["; - for (U32 i = 0; i < size; ++i) - { - if (i) - { - oss << ", "; - } - oss << (int)out_bytes[i]; - } - oss << "]"; - } - - return oss.str(); -} - -S32 wstring_utf8_length(const LLWString& wstr) -{ - S32 len = 0; - for (S32 i = 0; i < (S32)wstr.length(); i++) - { - len += wchar_utf8_length(wstr[i]); - } - return len; -} - -LLWString utf8str_to_wstring(const char* utf8str, size_t len) -{ - LLWString wout; - - S32 i = 0; - while (i < len) - { - llwchar unichar; - U8 cur_char = utf8str[i]; - - if (cur_char < 0x80) - { - // Ascii character, just add it - unichar = cur_char; - } - else - { - S32 cont_bytes = 0; - if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32 - { - unichar = (0x1F&cur_char); - cont_bytes = 1; - } - else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32 - { - unichar = (0x0F&cur_char); - cont_bytes = 2; - } - else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32 - { - unichar = (0x07&cur_char); - cont_bytes = 3; - } - else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32 - { - unichar = (0x03&cur_char); - cont_bytes = 4; - } - else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32 - { - unichar = (0x01&cur_char); - cont_bytes = 5; - } - else - { - wout += LL_UNKNOWN_CHAR; - ++i; - continue; - } - - // Check that this character doesn't go past the end of the string - auto end = (len < (i + cont_bytes)) ? len : (i + cont_bytes); - do - { - ++i; - - cur_char = utf8str[i]; - if ( (cur_char >> 6) == 0x2 ) - { - unichar <<= 6; - unichar += (0x3F&cur_char); - } - else - { - // Malformed sequence - roll back to look at this as a new char - unichar = LL_UNKNOWN_CHAR; - --i; - break; - } - } while(i < end); - - // Handle overlong characters and NULL characters - if ( ((cont_bytes == 1) && (unichar < 0x80)) - || ((cont_bytes == 2) && (unichar < 0x800)) - || ((cont_bytes == 3) && (unichar < 0x10000)) - || ((cont_bytes == 4) && (unichar < 0x200000)) - || ((cont_bytes == 5) && (unichar < 0x4000000)) ) - { - unichar = LL_UNKNOWN_CHAR; - } - } - - wout += unichar; - ++i; - } - return wout; -} - -std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) -{ - std::string out; - - S32 i = 0; - while (i < len) - { - char tchars[8]; /* Flawfinder: ignore */ - auto n = wchar_to_utf8chars(utf32str[i], tchars); - tchars[n] = 0; - out += tchars; - i++; - } - return out; -} - -std::string utf16str_to_utf8str(const U16* utf16str, size_t len) -{ - return wstring_to_utf8str(utf16str_to_wstring(utf16str, len)); -} - -std::string utf8str_trim(const std::string& utf8str) -{ - LLWString wstr = utf8str_to_wstring(utf8str); - LLWStringUtil::trim(wstr); - return wstring_to_utf8str(wstr); -} - - -std::string utf8str_tolower(const std::string& utf8str) -{ - LLWString out_str = utf8str_to_wstring(utf8str); - LLWStringUtil::toLower(out_str); - return wstring_to_utf8str(out_str); -} - - -S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs) -{ - LLWString wlhs = utf8str_to_wstring(lhs); - LLWString wrhs = utf8str_to_wstring(rhs); - return LLWStringUtil::compareInsensitive(wlhs, wrhs); -} - -std::string utf8str_truncate(const std::string& utf8str, const S32 max_len) -{ - if (0 == max_len) - { - return std::string(); - } - if ((S32)utf8str.length() <= max_len) - { - return utf8str; - } - else - { - S32 cur_char = max_len; - - // If we're ASCII, we don't need to do anything - if ((U8)utf8str[cur_char] > 0x7f) - { - // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back - // to the first character - while (0x80 == (0xc0 & utf8str[cur_char])) - { - cur_char--; - // Keep moving forward until we hit the first char; - if (cur_char == 0) - { - // Make sure we don't trash memory if we've got a bogus string. - break; - } - } - } - // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars - return utf8str.substr(0, cur_char); - } -} - -std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len) -{ - if (0 == symbol_len) - { - return std::string(); - } - if ((S32)utf8str.length() <= symbol_len) - { - return utf8str; - } - else - { - int len = 0, byteIndex = 0; - const char* aStr = utf8str.c_str(); - size_t origSize = utf8str.size(); - - for (byteIndex = 0; len < symbol_len && byteIndex < origSize; byteIndex++) - { - if ((aStr[byteIndex] & 0xc0) != 0x80) - { - len += 1; - } - } - return utf8str.substr(0, byteIndex); - } -} - -std::string utf8str_substChar( - const std::string& utf8str, - const llwchar target_char, - const llwchar replace_char) -{ - LLWString wstr = utf8str_to_wstring(utf8str); - LLWStringUtil::replaceChar(wstr, target_char, replace_char); - //wstr = wstring_substChar(wstr, target_char, replace_char); - return wstring_to_utf8str(wstr); -} - -std::string utf8str_makeASCII(const std::string& utf8str) -{ - LLWString wstr = utf8str_to_wstring(utf8str); - LLWStringUtil::_makeASCII(wstr); - return wstring_to_utf8str(wstr); -} - -std::string mbcsstring_makeASCII(const std::string& wstr) -{ - // Replace non-ASCII chars with replace_char - std::string out_str = wstr; - for (S32 i = 0; i < (S32)out_str.length(); i++) - { - if ((U8)out_str[i] > 0x7f) - { - out_str[i] = LL_UNKNOWN_CHAR; - } - } - return out_str; -} - -std::string utf8str_removeCRLF(const std::string& utf8str) -{ - if (0 == utf8str.length()) - { - return std::string(); - } - const char CR = 13; - - std::string out; - out.reserve(utf8str.length()); - const S32 len = (S32)utf8str.length(); - for( S32 i = 0; i < len; i++ ) - { - if( utf8str[i] != CR ) - { - out.push_back(utf8str[i]); - } - } - return out; -} - -llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length) -{ - switch (length) - { - case 2: - return ((utf8str[offset] & 0x1F) << 6) + - (utf8str[offset + 1] & 0x3F); - case 3: - return ((utf8str[offset] & 0x0F) << 12) + - ((utf8str[offset + 1] & 0x3F) << 6) + - (utf8str[offset + 2] & 0x3F); - case 4: - return ((utf8str[offset] & 0x07) << 18) + - ((utf8str[offset + 1] & 0x3F) << 12) + - ((utf8str[offset + 2] & 0x3F) << 6) + - (utf8str[offset + 3] & 0x3F); - case 5: - return ((utf8str[offset] & 0x03) << 24) + - ((utf8str[offset + 1] & 0x3F) << 18) + - ((utf8str[offset + 2] & 0x3F) << 12) + - ((utf8str[offset + 3] & 0x3F) << 6) + - (utf8str[offset + 4] & 0x3F); - case 6: - return ((utf8str[offset] & 0x01) << 30) + - ((utf8str[offset + 1] & 0x3F) << 24) + - ((utf8str[offset + 2] & 0x3F) << 18) + - ((utf8str[offset + 3] & 0x3F) << 12) + - ((utf8str[offset + 4] & 0x3F) << 6) + - (utf8str[offset + 5] & 0x3F); - case 7: - return ((utf8str[offset + 1] & 0x03) << 30) + - ((utf8str[offset + 2] & 0x3F) << 24) + - ((utf8str[offset + 3] & 0x3F) << 18) + - ((utf8str[offset + 4] & 0x3F) << 12) + - ((utf8str[offset + 5] & 0x3F) << 6) + - (utf8str[offset + 6] & 0x3F); - } - return LL_UNKNOWN_CHAR; -} - -std::string utf8str_showBytesUTF8(const std::string& utf8str) -{ - std::string result; - - bool in_sequence = false; - size_t sequence_size = 0; - size_t byte_index = 0; - size_t source_length = utf8str.size(); - - auto open_sequence = [&]() - { - if (!result.empty() && result.back() != '\n') - result += '\n'; // Use LF as a separator before new UTF-8 sequence - result += '['; - in_sequence = true; - }; - - auto close_sequence = [&]() - { - llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size); - if (unicode != LL_UNKNOWN_CHAR) - { - result += llformat("+%04X", unicode); - } - result += ']'; - in_sequence = false; - sequence_size = 0; - }; - - while (byte_index < source_length) - { - U8 byte = utf8str[byte_index]; - if (byte >= 0x80) // Part of an UTF-8 sequence - { - if (!in_sequence) // Start new UTF-8 sequence - { - open_sequence(); - } - else if (byte >= 0xC0) // Start another UTF-8 sequence - { - close_sequence(); - open_sequence(); - } - else // Continue the same UTF-8 sequence - { - result += '.'; - } - result += llformat("%02X", byte); // The byte is represented in hexadecimal form - ++sequence_size; - } - else // ASCII symbol is represented as a character - { - if (in_sequence) // End of UTF-8 sequence - { - close_sequence(); - if (byte != '\n') - { - result += '\n'; // Use LF as a separator between UTF-8 and ASCII - } - } - result += byte; - } - ++byte_index; - } - - if (in_sequence) // End of UTF-8 sequence - { - close_sequence(); - } - - return result; -} - -// Search for any emoji symbol, return true if found -bool wstring_has_emoji(const LLWString& wstr) -{ - for (const llwchar& wch : wstr) - { - if (LLStringOps::isEmoji(wch)) - return true; - } - - return false; -} - -// Cut emoji symbols if exist -bool wstring_remove_emojis(LLWString& wstr) -{ - bool found = false; - for (size_t i = 0; i < wstr.size(); ++i) - { - if (LLStringOps::isEmoji(wstr[i])) - { - wstr.erase(i--, 1); - found = true; - } - } - return found; -} - -// Cut emoji symbols if exist -bool utf8str_remove_emojis(std::string& utf8str) -{ - LLWString wstr = utf8str_to_wstring(utf8str); - if (!wstring_remove_emojis(wstr)) - return false; - utf8str = wstring_to_utf8str(wstr); - return true; -} - -#if LL_WINDOWS -unsigned int ll_wstring_default_code_page() -{ - return CP_UTF8; -} - -std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page) -{ - std::string out; - if(in) - { - int len_out = WideCharToMultiByte( - code_page, - 0, - in, - len_in, - NULL, - 0, - 0, - 0); - // We will need two more bytes for the double NULL ending - // created in WideCharToMultiByte(). - char* pout = new char [len_out + 2]; - memset(pout, 0, len_out + 2); - if(pout) - { - WideCharToMultiByte( - code_page, - 0, - in, - len_in, - pout, - len_out, - 0, - 0); - out.assign(pout); - delete[] pout; - } - } - return out; -} - -std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page) -{ - // From review: - // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, - // plus one for a null terminator, and be guaranteed to not overflow. - - // Normally, I'd call that sort of thing premature optimization, - // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets. -// int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0); - - // reserve an output buffer that will be destroyed on exit, with a place - // to put NULL terminator - std::vector<wchar_t> w_out(len + 1); - - memset(&w_out[0], 0, w_out.size()); - int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len, - &w_out[0], w_out.size() - 1); - - //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. - w_out[real_output_str_len] = 0; - - // construct string<wchar_t> from our temporary output buffer - return {&w_out[0]}; -} - -LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len) -{ - // Whether or not std::wstring and llutf16string are distinct types, they - // both hold UTF-16LE characters. (See header file comments.) Pretend this - // wchar_t* sequence is really a U16* sequence and use the conversion we - // define above. - return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len); -} - -std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len) -{ - // first, convert to llutf16string, for which we have a real implementation - auto utf16str{ wstring_to_utf16str(in, len) }; - // then, because each U16 char must be UTF-16LE encoded, pretend the U16* - // string pointer is a wchar_t* and instantiate a std::wstring of the same - // length. - return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() }; -} - -std::string ll_convert_string_to_utf8_string(const std::string& in) -{ - // If you pass code_page, you must also pass length, otherwise the code - // page parameter will be mistaken for length. - auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP); - // CP_UTF8 is default -- see ll_wstring_default_code_page() above. - return ll_convert_wide_to_string(w_mesg); -} - -namespace -{ - -void HeapFree_deleter(void* ptr) -{ - // instead of LocalFree(), per https://stackoverflow.com/a/31541205 - HeapFree(GetProcessHeap(), NULL, ptr); -} - -} // anonymous namespace - -template<> -std::wstring windows_message<std::wstring>(DWORD error) -{ - // derived from https://stackoverflow.com/a/455533 - wchar_t* rawptr = nullptr; - auto okay = FormatMessageW( - // use system message tables for GetLastError() codes - FORMAT_MESSAGE_FROM_SYSTEM | - // internally allocate buffer and return its pointer - FORMAT_MESSAGE_ALLOCATE_BUFFER | - // you cannot pass insertion parameters (thanks Gandalf) - FORMAT_MESSAGE_IGNORE_INSERTS | - // ignore line breaks in message definition text - FORMAT_MESSAGE_MAX_WIDTH_MASK, - NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM - error, // dwMessageId - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId - (LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t* - 0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER - NULL); // Arguments, unused - - // make a unique_ptr from rawptr so it gets cleaned up properly - std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter); - - if (okay && bufferptr) - { - // got the message, return it ('okay' is length in characters) - return { bufferptr.get(), okay }; - } - - // did not get the message, synthesize one - auto format_message_error = GetLastError(); - std::wostringstream out; - out << L"GetLastError() " << error << L" (FormatMessageW() failed with " - << format_message_error << L")"; - return out.str(); -} - -boost::optional<std::wstring> llstring_getoptenv(const std::string& key) -{ - auto wkey = ll_convert_string_to_wide(key); - // Take a wild guess as to how big the buffer should be. - std::vector<wchar_t> buffer(1024); - auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); - // If our initial guess was too short, n will indicate the size (in - // wchar_t's) that buffer should have been, including the terminating nul. - if (n > (buffer.size() - 1)) - { - // make it big enough - buffer.resize(n); - // and try again - n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); - } - // did that (ultimately) succeed? - if (n) - { - // great, return populated boost::optional - return boost::optional<std::wstring>(&buffer[0]); - } - - // not successful - auto last_error = GetLastError(); - // Don't bother warning for NOT_FOUND; that's an expected case - if (last_error != ERROR_ENVVAR_NOT_FOUND) - { - LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " - << windows_message<std::string>(last_error) << LL_ENDL; - } - // return empty boost::optional - return {}; -} - -#else // ! LL_WINDOWS - -boost::optional<std::string> llstring_getoptenv(const std::string& key) -{ - auto found = getenv(key.c_str()); - if (found) - { - // return populated boost::optional - return boost::optional<std::string>(found); - } - else - { - // return empty boost::optional - return {}; - } -} - -#endif // ! LL_WINDOWS - -long LLStringOps::sPacificTimeOffset = 0; -long LLStringOps::sLocalTimeOffset = 0; -bool LLStringOps::sPacificDaylightTime = 0; -std::map<std::string, std::string> LLStringOps::datetimeToCodes; - -std::vector<std::string> LLStringOps::sWeekDayList; -std::vector<std::string> LLStringOps::sWeekDayShortList; -std::vector<std::string> LLStringOps::sMonthList; -std::vector<std::string> LLStringOps::sMonthShortList; - - -std::string LLStringOps::sDayFormat; -std::string LLStringOps::sAM; -std::string LLStringOps::sPM; - -// static -bool LLStringOps::isEmoji(llwchar a) -{ -#if 0 // Do not consider special characters that might have a corresponding - // glyph in the monochorme fallback fonts as a "genuine" emoji. HB - return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) || - (a >= 0x1f000 && a < 0x20000); -#else - // These are indeed "genuine" emojis, we *do want* rendered as such. HB - return a >= 0x1f000 && a < 0x20000; -#endif -} - -S32 LLStringOps::collate(const llwchar* a, const llwchar* b) -{ - #if LL_WINDOWS - // in Windows, wide string functions operator on 16-bit strings, - // not the proper 32 bit wide string - return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str()); - #else - return wcscoll(a, b); - #endif -} - -void LLStringOps::setupDatetimeInfo (bool daylight) -{ - time_t nowT, localT, gmtT; - struct tm * tmpT; - - nowT = time (NULL); - - tmpT = gmtime (&nowT); - gmtT = mktime (tmpT); - - tmpT = localtime (&nowT); - localT = mktime (tmpT); - - sLocalTimeOffset = (long) (gmtT - localT); - if (tmpT->tm_isdst) - { - sLocalTimeOffset -= 60 * 60; // 1 hour - } - - sPacificDaylightTime = daylight; - sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60; - - datetimeToCodes["wkday"] = "%a"; // Thu - datetimeToCodes["weekday"] = "%A"; // Thursday - datetimeToCodes["year4"] = "%Y"; // 2009 - datetimeToCodes["year"] = "%Y"; // 2009 - datetimeToCodes["year2"] = "%y"; // 09 - datetimeToCodes["mth"] = "%b"; // Aug - datetimeToCodes["month"] = "%B"; // August - datetimeToCodes["mthnum"] = "%m"; // 08 - datetimeToCodes["day"] = "%d"; // 31 - datetimeToCodes["sday"] = "%-d"; // 9 - datetimeToCodes["hour24"] = "%H"; // 14 - datetimeToCodes["hour"] = "%H"; // 14 - datetimeToCodes["hour12"] = "%I"; // 02 - datetimeToCodes["min"] = "%M"; // 59 - datetimeToCodes["ampm"] = "%p"; // AM - datetimeToCodes["second"] = "%S"; // 59 - datetimeToCodes["timezone"] = "%Z"; // PST -} - -void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output) -{ - output.clear(); - size_t length = data.size(); - - // tokenize it and put it in the array - std::string cur_word; - for(size_t i = 0; i < length; ++i) - { - if(data[i] == ':') - { - output.push_back(cur_word); - cur_word.clear(); - } - else - { - cur_word.append(1, data[i]); - } - } - output.push_back(cur_word); -} - -void LLStringOps::setupWeekDaysNames(const std::string& data) -{ - tokenizeStringToArray(data,sWeekDayList); -} -void LLStringOps::setupWeekDaysShortNames(const std::string& data) -{ - tokenizeStringToArray(data,sWeekDayShortList); -} -void LLStringOps::setupMonthNames(const std::string& data) -{ - tokenizeStringToArray(data,sMonthList); -} -void LLStringOps::setupMonthShortNames(const std::string& data) -{ - tokenizeStringToArray(data,sMonthShortList); -} -void LLStringOps::setupDayFormat(const std::string& data) -{ - sDayFormat = data; -} - - -std::string LLStringOps::getDatetimeCode (std::string key) -{ - std::map<std::string, std::string>::iterator iter; - - iter = datetimeToCodes.find (key); - if (iter != datetimeToCodes.end()) - { - return iter->second; - } - else - { - return std::string(""); - } -} - -std::string LLStringOps::getReadableNumber(F64 num) -{ - if (fabs(num)>=1e9) - { - return llformat("%.2lfB", num / 1e9); - } - else if (fabs(num)>=1e6) - { - return llformat("%.2lfM", num / 1e6); - } - else if (fabs(num)>=1e3) - { - return llformat("%.2lfK", num / 1e3); - } - else - { - return llformat("%.2lf", num); - } -} - -namespace LLStringFn -{ - // NOTE - this restricts output to ascii - void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement) - { - const char MIN = 0x20; - std::basic_string<char>::size_type len = string.size(); - for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) - { - if(string[ii] < MIN) - { - string[ii] = replacement; - } - } - } - - - // NOTE - this restricts output to ascii - void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str, - char replacement) - { - const char MIN = 0x20; - const char PIPE = 0x7c; - std::basic_string<char>::size_type len = str.size(); - for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) - { - if( (str[ii] < MIN) || (str[ii] == PIPE) ) - { - str[ii] = replacement; - } - } - } - - // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on - // allowable code points for XML. Specifically, they are: - // 0x09, 0x0a, 0x0d, and 0x20 on up. JC - std::string strip_invalid_xml(const std::string& instr) - { - std::string output; - output.reserve( instr.size() ); - std::string::const_iterator it = instr.begin(); - while (it != instr.end()) - { - // Must compare as unsigned for >= - // Test most likely match first - const unsigned char c = (unsigned char)*it; - if ( c >= (unsigned char)0x20 // SPACE - || c == (unsigned char)0x09 // TAB - || c == (unsigned char)0x0a // LINE_FEED - || c == (unsigned char)0x0d ) // CARRIAGE_RETURN - { - output.push_back(c); - } - ++it; - } - return output; - } - - /** - * @brief Replace all control characters (c < 0x20) with replacement in - * string. - */ - void replace_ascii_controlchars(std::basic_string<char>& string, char replacement) - { - const unsigned char MIN = 0x20; - std::basic_string<char>::size_type len = string.size(); - for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) - { - const unsigned char c = (unsigned char) string[ii]; - if(c < MIN) - { - string[ii] = replacement; - } - } - } -} - -//////////////////////////////////////////////////////////// - -// Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime. -template<> -S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions); - -//static -template<> -void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims) -{ - // Starting at offset 0, scan forward for the next non-delimiter. We're - // done when the only characters left in 'instr' are delimiters. - for (std::string::size_type begIdx, endIdx = 0; - (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; ) - { - // Found a non-delimiter. After that, find the next delimiter. - endIdx = instr.find_first_of (delims, begIdx); - if (endIdx == std::string::npos) - { - // No more delimiters: this token extends to the end of the string. - endIdx = instr.length(); - } - - // extract the token between begIdx and endIdx; substr() needs length - std::string currToken(instr.substr(begIdx, endIdx - begIdx)); - LLStringUtil::trim (currToken); - tokens.push_back(currToken); - // next scan past delimiters starts at endIdx - } -} - -template<> -LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens) -{ - const std::string delims (","); - - // Find the first [ - size_type pos1 = instr.find('[', start); - if (pos1 == std::string::npos) - return std::string::npos; - - //Find the first ] after the initial [ - size_type pos2 = instr.find(']', pos1); - if (pos2 == std::string::npos) - return std::string::npos; - - // Find the last [ before ] in case of nested [[]] - pos1 = instr.find_last_of('[', pos2-1); - if (pos1 == std::string::npos || pos1 < start) - return std::string::npos; - - getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims); - start = pos2+1; - - return pos1; -} - -// static -template<> -bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions) -{ - // see if we have a replacement for the bracketed string (without the brackets) - // test first using has() because if we just look up with operator[] we get back an - // empty string even if the value is missing. We want to distinguish between - // missing replacements and deliberately empty replacement strings. - format_map_t::const_iterator iter = substitutions.find(token); - if (iter != substitutions.end()) - { - replacement = iter->second; - return true; - } - // if not, see if there's one WITH brackets - iter = substitutions.find(std::string("[" + token + "]")); - if (iter != substitutions.end()) - { - replacement = iter->second; - return true; - } - - return false; -} - -// static -template<> -bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions) -{ - // see if we have a replacement for the bracketed string (without the brackets) - // test first using has() because if we just look up with operator[] we get back an - // empty string even if the value is missing. We want to distinguish between - // missing replacements and deliberately empty replacement strings. - if (substitutions.has(token)) - { - replacement = substitutions[token].asString(); - return true; - } - // if not, see if there's one WITH brackets - else if (substitutions.has(std::string("[" + token + "]"))) - { - replacement = substitutions[std::string("[" + token + "]")].asString(); - return true; - } - - return false; -} - -//static -template<> -void LLStringUtil::setLocale(std::string inLocale) -{ - sLocale = inLocale; -}; - -//static -template<> -std::string LLStringUtil::getLocale(void) -{ - return sLocale; -}; - -// static -template<> -void LLStringUtil::formatNumber(std::string& numStr, std::string decimals) -{ - std::stringstream strStream; - S32 intDecimals = 0; - - convertToS32 (decimals, intDecimals); - if (!sLocale.empty()) - { - // std::locale() throws if the locale is unknown! (EXT-7926) - try - { - strStream.imbue(std::locale(sLocale.c_str())); - } catch (const std::exception &) - { - LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL; - } - } - - if (!intDecimals) - { - S32 intStr; - - if (convertToS32(numStr, intStr)) - { - strStream << intStr; - numStr = strStream.str(); - } - } - else - { - F32 floatStr; - - if (convertToF32(numStr, floatStr)) - { - strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr; - numStr = strStream.str(); - } - } -} - -// static -template<> -bool LLStringUtil::formatDatetime(std::string& replacement, std::string token, - std::string param, S32 secFromEpoch) -{ - if (param == "local") // local - { - secFromEpoch -= LLStringOps::getLocalTimeOffset(); - } - else if (param != "utc") // slt - { - secFromEpoch -= LLStringOps::getPacificTimeOffset(); - } - - // if never fell into those two ifs above, param must be utc - if (secFromEpoch < 0) secFromEpoch = 0; - - LLDate datetime((F64)secFromEpoch); - std::string code = LLStringOps::getDatetimeCode (token); - - // special case to handle timezone - if (code == "%Z") { - if (param == "utc") - { - replacement = "GMT"; - } - else if (param == "local") - { - replacement = ""; // user knows their own timezone - } - else - { -#if 0 - // EXT-1565 : Zai Lynch, James Linden : 15/Oct/09 - // [BSI] Feedback: Viewer clock mentions SLT, but would prefer it to show PST/PDT - // "slt" = Second Life Time, which is deprecated. - // If not utc or user local time, fallback to Pacific time - replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST"; -#else - // SL-20370 : Steeltoe Linden : 29/Sep/23 - // Change "PDT" to "SLT" on menu bar - replacement = "SLT"; -#endif - } - return true; - } - - //EXT-7013 - //few codes are not suppotred by strtime function (example - weekdays for Japanise) - //so use predefined ones - - //if sWeekDayList is not empty than current locale doesn't support - //weekday name. - time_t loc_seconds = (time_t) secFromEpoch; - if(LLStringOps::sWeekDayList.size() == 7 && code == "%A") - { - struct tm * gmt = gmtime (&loc_seconds); - replacement = LLStringOps::sWeekDayList[gmt->tm_wday]; - } - else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a") - { - struct tm * gmt = gmtime (&loc_seconds); - replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday]; - } - else if(LLStringOps::sMonthList.size() == 12 && code == "%B") - { - struct tm * gmt = gmtime (&loc_seconds); - replacement = LLStringOps::sMonthList[gmt->tm_mon]; - } - else if( !LLStringOps::sDayFormat.empty() && code == "%d" ) - { - struct tm * gmt = gmtime (&loc_seconds); - LLStringUtil::format_map_t args; - args["[MDAY]"] = llformat ("%d", gmt->tm_mday); - replacement = LLStringOps::sDayFormat; - LLStringUtil::format(replacement, args); - } - else if (code == "%-d") - { - struct tm * gmt = gmtime (&loc_seconds); - replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero - } - else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" ) - { - struct tm * gmt = gmtime (&loc_seconds); - if(gmt->tm_hour<12) - { - replacement = LLStringOps::sAM; - } - else - { - replacement = LLStringOps::sPM; - } - } - else - { - replacement = datetime.toHTTPDateString(code); - } - - // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format - // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738). - // We could have used '%l' format instead, but it's not supported by Windows. - if(code == "%I" && token == "hour12" && replacement.at(0) == '0') - { - replacement = replacement.at(1); - } - - return !code.empty(); -} - -// LLStringUtil::format recogizes the following patterns. -// All substitutions *must* be encased in []'s in the input string. -// The []'s are optional in the substitution map. -// [FOO_123] -// [FOO,number,precision] -// [FOO,datetime,format] - - -// static -template<> -S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) -{ - LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING; - S32 res = 0; - - std::string output; - std::vector<std::string> tokens; - - std::string::size_type start = 0; - std::string::size_type prev_start = 0; - std::string::size_type key_start = 0; - while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) - { - output += std::string(s, prev_start, key_start-prev_start); - prev_start = start; - - bool found_replacement = false; - std::string replacement; - - if (tokens.size() == 0) - { - found_replacement = false; - } - else if (tokens.size() == 1) - { - found_replacement = simpleReplacement (replacement, tokens[0], substitutions); - } - else if (tokens[1] == "number") - { - std::string param = "0"; - - if (tokens.size() > 2) param = tokens[2]; - found_replacement = simpleReplacement (replacement, tokens[0], substitutions); - if (found_replacement) formatNumber (replacement, param); - } - else if (tokens[1] == "datetime") - { - std::string param; - if (tokens.size() > 2) param = tokens[2]; - - format_map_t::const_iterator iter = substitutions.find("datetime"); - if (iter != substitutions.end()) - { - S32 secFromEpoch = 0; - BOOL r = LLStringUtil::convertToS32(iter->second, secFromEpoch); - if (r) - { - found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch); - } - } - } - - if (found_replacement) - { - output += replacement; - res++; - } - else - { - // we had no replacement, use the string as is - // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" - output += std::string(s, key_start, start-key_start); - } - tokens.clear(); - } - // send the remainder of the string (with no further matches for bracketed names) - output += std::string(s, start); - s = output; - return res; -} - -//static -template<> -S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) -{ - LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING; - S32 res = 0; - - if (!substitutions.isMap()) - { - return res; - } - - std::string output; - std::vector<std::string> tokens; - - std::string::size_type start = 0; - std::string::size_type prev_start = 0; - std::string::size_type key_start = 0; - while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) - { - output += std::string(s, prev_start, key_start-prev_start); - prev_start = start; - - bool found_replacement = false; - std::string replacement; - - if (tokens.size() == 0) - { - found_replacement = false; - } - else if (tokens.size() == 1) - { - found_replacement = simpleReplacement (replacement, tokens[0], substitutions); - } - else if (tokens[1] == "number") - { - std::string param = "0"; - - if (tokens.size() > 2) param = tokens[2]; - found_replacement = simpleReplacement (replacement, tokens[0], substitutions); - if (found_replacement) formatNumber (replacement, param); - } - else if (tokens[1] == "datetime") - { - std::string param; - if (tokens.size() > 2) param = tokens[2]; - - S32 secFromEpoch = (S32) substitutions["datetime"].asInteger(); - found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch); - } - - if (found_replacement) - { - output += replacement; - res++; - } - else - { - // we had no replacement, use the string as is - // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" - output += std::string(s, key_start, start-key_start); - } - tokens.clear(); - } - // send the remainder of the string (with no further matches for bracketed names) - output += std::string(s, start); - s = output; - return res; -} - -//////////////////////////////////////////////////////////// -// Testing - -#ifdef _DEBUG - -template<class T> -void LLStringUtilBase<T>::testHarness() -{ - std::string s1; - - llassert( s1.c_str() == NULL ); - llassert( s1.size() == 0 ); - llassert( s1.empty() ); - - std::string s2( "hello"); - llassert( !strcmp( s2.c_str(), "hello" ) ); - llassert( s2.size() == 5 ); - llassert( !s2.empty() ); - std::string s3( s2 ); - - llassert( "hello" == s2 ); - llassert( s2 == "hello" ); - llassert( s2 > "gello" ); - llassert( "gello" < s2 ); - llassert( "gello" != s2 ); - llassert( s2 != "gello" ); - - std::string s4 = s2; - llassert( !s4.empty() ); - s4.empty(); - llassert( s4.empty() ); - - std::string s5(""); - llassert( s5.empty() ); - - llassert( isValidIndex(s5, 0) ); - llassert( !isValidIndex(s5, 1) ); - - s3 = s2; - s4 = "hello again"; - - s4 += "!"; - s4 += s4; - llassert( s4 == "hello again!hello again!" ); - - - std::string s6 = s2 + " " + s2; - std::string s7 = s6; - llassert( s6 == s7 ); - llassert( !( s6 != s7) ); - llassert( !(s6 < s7) ); - llassert( !(s6 > s7) ); - - llassert( !(s6 == "hi")); - llassert( s6 == "hello hello"); - llassert( s6 < "hi"); - - llassert( s6[1] == 'e' ); - s6[1] = 'f'; - llassert( s6[1] == 'f' ); - - s2.erase( 4, 1 ); - llassert( s2 == "hell"); - s2.insert( 0, "y" ); - llassert( s2 == "yhell"); - s2.erase( 1, 3 ); - llassert( s2 == "yl"); - s2.insert( 1, "awn, don't yel"); - llassert( s2 == "yawn, don't yell"); - - std::string s8 = s2.substr( 6, 5 ); - llassert( s8 == "don't" ); - - std::string s9 = " \t\ntest \t\t\n "; - trim(s9); - llassert( s9 == "test" ); - - s8 = "abc123&*(ABC"; - - s9 = s8; - toUpper(s9); - llassert( s9 == "ABC123&*(ABC" ); - - s9 = s8; - toLower(s9); - llassert( s9 == "abc123&*(abc" ); - - - std::string s10( 10, 'x' ); - llassert( s10 == "xxxxxxxxxx" ); - - std::string s11( "monkey in the middle", 7, 2 ); - llassert( s11 == "in" ); - - std::string s12; //empty - s12 += "foo"; - llassert( s12 == "foo" ); - - std::string s13; //empty - s13 += 'f'; - llassert( s13 == "f" ); -} - - -#endif // _DEBUG +/**
+ * @file llstring.cpp
+ * @brief String utility functions and the std::string class.
+ *
+ * $LicenseInfo:firstyear=2001&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+#include "linden_common.h"
+
+#include "llstring.h"
+#include "llerror.h"
+#include "llfasttimer.h"
+#include "llsd.h"
+#include <vector>
+
+#if LL_WINDOWS
+#include "llwin32headerslean.h"
+#include <winnls.h> // for WideCharToMultiByte
+#endif
+
+std::string ll_safe_string(const char* in)
+{
+ if(in) return std::string(in);
+ return std::string();
+}
+
+std::string ll_safe_string(const char* in, S32 maxlen)
+{
+ if(in && maxlen > 0 ) return std::string(in, maxlen);
+
+ return std::string();
+}
+
+bool is_char_hex(char hex)
+{
+ if((hex >= '0') && (hex <= '9'))
+ {
+ return true;
+ }
+ else if((hex >= 'a') && (hex <='f'))
+ {
+ return true;
+ }
+ else if((hex >= 'A') && (hex <='F'))
+ {
+ return true;
+ }
+ return false; // uh - oh, not hex any more...
+}
+
+U8 hex_as_nybble(char hex)
+{
+ if((hex >= '0') && (hex <= '9'))
+ {
+ return (U8)(hex - '0');
+ }
+ else if((hex >= 'a') && (hex <='f'))
+ {
+ return (U8)(10 + hex - 'a');
+ }
+ else if((hex >= 'A') && (hex <='F'))
+ {
+ return (U8)(10 + hex - 'A');
+ }
+ return 0; // uh - oh, not hex any more...
+}
+
+bool iswindividual(llwchar elem)
+{
+ U32 cur_char = (U32)elem;
+ bool result = false;
+ if (0x2E80<= cur_char && cur_char <= 0x9FFF)
+ {
+ result = true;
+ }
+ else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
+ {
+ result = true;
+ }
+ else if (0xF900<= cur_char && cur_char <= 0xFA60 )
+ {
+ result = true;
+ }
+ return result;
+}
+
+bool _read_file_into_string(std::string& str, const std::string& filename)
+{
+ llifstream ifs(filename.c_str(), llifstream::binary);
+ if (!ifs.is_open())
+ {
+ LL_INFOS() << "Unable to open file " << filename << LL_ENDL;
+ return false;
+ }
+
+ std::ostringstream oss;
+
+ oss << ifs.rdbuf();
+ str = oss.str();
+ ifs.close();
+ return true;
+}
+
+
+
+
+// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
+// for the Unicode implementation - this doesn't match because it was written before finding
+// it.
+
+
+std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
+{
+ std::string utf8_str = wstring_to_utf8str(wstr);
+ s << utf8_str;
+ return s;
+}
+
+std::string rawstr_to_utf8(const std::string& raw)
+{
+ LLWString wstr(utf8str_to_wstring(raw));
+ return wstring_to_utf8str(wstr);
+}
+
+std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars)
+{
+ U32 cur_char = (U32)in_char;
+ char* base = outchars;
+ if (cur_char < 0x80)
+ {
+ *outchars++ = (U8)cur_char;
+ }
+ else if (cur_char < 0x800)
+ {
+ *outchars++ = 0xC0 | (cur_char >> 6);
+ *outchars++ = 0x80 | (cur_char & 0x3F);
+ }
+ else if (cur_char < 0x10000)
+ {
+ *outchars++ = 0xE0 | (cur_char >> 12);
+ *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+ *outchars++ = 0x80 | (cur_char & 0x3F);
+ }
+ else if (cur_char < 0x200000)
+ {
+ *outchars++ = 0xF0 | (cur_char >> 18);
+ *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
+ *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+ *outchars++ = 0x80 | (cur_char & 0x3F);
+ }
+ else if (cur_char < 0x4000000)
+ {
+ *outchars++ = 0xF8 | (cur_char >> 24);
+ *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
+ *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
+ *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+ *outchars++ = 0x80 | (cur_char & 0x3F);
+ }
+ else if (cur_char < 0x80000000)
+ {
+ *outchars++ = 0xFC | (cur_char >> 30);
+ *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
+ *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
+ *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
+ *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
+ *outchars++ = 0x80 | (cur_char & 0x3F);
+ }
+ else
+ {
+ LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL;
+ *outchars++ = LL_UNKNOWN_CHAR;
+ }
+ return outchars - base;
+}
+
+auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
+{
+ const U16* base = inchars;
+ U16 cur_char = *inchars++;
+ llwchar char32 = cur_char;
+ if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
+ {
+ // Surrogates
+ char32 = ((llwchar)(cur_char - 0xD800)) << 10;
+ cur_char = *inchars++;
+ char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
+ }
+ else
+ {
+ char32 = (llwchar)cur_char;
+ }
+ *outchar = char32;
+ return inchars - base;
+}
+
+llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
+{
+ llutf16string out;
+
+ S32 i = 0;
+ while (i < len)
+ {
+ U32 cur_char = utf32str[i];
+ if (cur_char > 0xFFFF)
+ {
+ out += (0xD7C0 + (cur_char >> 10));
+ out += (0xDC00 | (cur_char & 0x3FF));
+ }
+ else
+ {
+ out += cur_char;
+ }
+ i++;
+ }
+ return out;
+}
+
+llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
+{
+ LLWString wstr = utf8str_to_wstring ( utf8str, len );
+ return wstring_to_utf16str ( wstr );
+}
+
+LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
+{
+ LLWString wout;
+ if (len == 0) return wout;
+
+ S32 i = 0;
+ const U16* chars16 = utf16str;
+ while (i < len)
+ {
+ llwchar cur_char;
+ i += utf16chars_to_wchar(chars16+i, &cur_char);
+ wout += cur_char;
+ }
+ return wout;
+}
+
+// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
+S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
+{
+ S32 surrogate_pairs = 0;
+ // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
+ const U16 *const utf16_chars = &(*(utf16str.begin()));
+ S32 i = 0;
+ while (i < utf16_len)
+ {
+ const U16 c = utf16_chars[i++];
+ if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16
+ { // Have first byte of a surrogate pair
+ if (i >= utf16_len)
+ {
+ break;
+ }
+ const U16 d = utf16_chars[i];
+ if (d >= 0xDC00 && d <= 0xDFFF)
+ { // Have valid second byte of a surrogate pair
+ surrogate_pairs++;
+ i++;
+ }
+ }
+ }
+ return utf16_len - surrogate_pairs;
+}
+
+// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
+S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
+{
+ const S32 end = llmin((S32)wstr.length(), woffset + wlen);
+ if (end < woffset)
+ {
+ return 0;
+ }
+ else
+ {
+ S32 length = end - woffset;
+ for (S32 i = woffset; i < end; i++)
+ {
+ if (wstr[i] >= 0x10000)
+ {
+ length++;
+ }
+ }
+ return length;
+ }
+}
+
+// Given a wstring and an offset in it, returns the length as wstring (i.e.,
+// number of llwchars) of the longest substring that starts at the offset
+// and whose equivalent utf-16 string does not exceeds the given utf16_length.
+S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, bool *unaligned)
+{
+ const auto end = wstr.length();
+ bool u{ false };
+ S32 n = woffset + utf16_length;
+ S32 i = woffset;
+ while (i < end)
+ {
+ if (wstr[i] >= 0x10000)
+ {
+ --n;
+ }
+ if (i >= n)
+ {
+ u = (i > n);
+ break;
+ }
+ i++;
+ }
+ if (unaligned)
+ {
+ *unaligned = u;
+ }
+ return i - woffset;
+}
+
+S32 wchar_utf8_length(const llwchar wc)
+{
+ if (wc < 0x80)
+ {
+ return 1;
+ }
+ else if (wc < 0x800)
+ {
+ return 2;
+ }
+ else if (wc < 0x10000)
+ {
+ return 3;
+ }
+ else if (wc < 0x200000)
+ {
+ return 4;
+ }
+ else if (wc < 0x4000000)
+ {
+ return 5;
+ }
+ else
+ {
+ return 6;
+ }
+}
+
+std::string wchar_utf8_preview(const llwchar wc)
+{
+ std::ostringstream oss;
+ oss << std::hex << std::uppercase << (U32)wc;
+
+ U8 out_bytes[8];
+ U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes);
+
+ if (size > 1)
+ {
+ oss << " [";
+ for (U32 i = 0; i < size; ++i)
+ {
+ if (i)
+ {
+ oss << ", ";
+ }
+ oss << (int)out_bytes[i];
+ }
+ oss << "]";
+ }
+
+ return oss.str();
+}
+
+S32 wstring_utf8_length(const LLWString& wstr)
+{
+ S32 len = 0;
+ for (S32 i = 0; i < (S32)wstr.length(); i++)
+ {
+ len += wchar_utf8_length(wstr[i]);
+ }
+ return len;
+}
+
+LLWString utf8str_to_wstring(const char* utf8str, size_t len)
+{
+ LLWString wout;
+
+ S32 i = 0;
+ while (i < len)
+ {
+ llwchar unichar;
+ U8 cur_char = utf8str[i];
+
+ if (cur_char < 0x80)
+ {
+ // Ascii character, just add it
+ unichar = cur_char;
+ }
+ else
+ {
+ S32 cont_bytes = 0;
+ if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32
+ {
+ unichar = (0x1F&cur_char);
+ cont_bytes = 1;
+ }
+ else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32
+ {
+ unichar = (0x0F&cur_char);
+ cont_bytes = 2;
+ }
+ else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32
+ {
+ unichar = (0x07&cur_char);
+ cont_bytes = 3;
+ }
+ else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32
+ {
+ unichar = (0x03&cur_char);
+ cont_bytes = 4;
+ }
+ else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32
+ {
+ unichar = (0x01&cur_char);
+ cont_bytes = 5;
+ }
+ else
+ {
+ wout += LL_UNKNOWN_CHAR;
+ ++i;
+ continue;
+ }
+
+ // Check that this character doesn't go past the end of the string
+ auto end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
+ do
+ {
+ ++i;
+
+ cur_char = utf8str[i];
+ if ( (cur_char >> 6) == 0x2 )
+ {
+ unichar <<= 6;
+ unichar += (0x3F&cur_char);
+ }
+ else
+ {
+ // Malformed sequence - roll back to look at this as a new char
+ unichar = LL_UNKNOWN_CHAR;
+ --i;
+ break;
+ }
+ } while(i < end);
+
+ // Handle overlong characters and NULL characters
+ if ( ((cont_bytes == 1) && (unichar < 0x80))
+ || ((cont_bytes == 2) && (unichar < 0x800))
+ || ((cont_bytes == 3) && (unichar < 0x10000))
+ || ((cont_bytes == 4) && (unichar < 0x200000))
+ || ((cont_bytes == 5) && (unichar < 0x4000000)) )
+ {
+ unichar = LL_UNKNOWN_CHAR;
+ }
+ }
+
+ wout += unichar;
+ ++i;
+ }
+ return wout;
+}
+
+std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
+{
+ std::string out;
+
+ S32 i = 0;
+ while (i < len)
+ {
+ char tchars[8]; /* Flawfinder: ignore */
+ auto n = wchar_to_utf8chars(utf32str[i], tchars);
+ tchars[n] = 0;
+ out += tchars;
+ i++;
+ }
+ return out;
+}
+
+std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
+{
+ return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
+}
+
+std::string utf8str_trim(const std::string& utf8str)
+{
+ LLWString wstr = utf8str_to_wstring(utf8str);
+ LLWStringUtil::trim(wstr);
+ return wstring_to_utf8str(wstr);
+}
+
+
+std::string utf8str_tolower(const std::string& utf8str)
+{
+ LLWString out_str = utf8str_to_wstring(utf8str);
+ LLWStringUtil::toLower(out_str);
+ return wstring_to_utf8str(out_str);
+}
+
+
+S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
+{
+ LLWString wlhs = utf8str_to_wstring(lhs);
+ LLWString wrhs = utf8str_to_wstring(rhs);
+ return LLWStringUtil::compareInsensitive(wlhs, wrhs);
+}
+
+std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
+{
+ if (0 == max_len)
+ {
+ return std::string();
+ }
+ if ((S32)utf8str.length() <= max_len)
+ {
+ return utf8str;
+ }
+ else
+ {
+ S32 cur_char = max_len;
+
+ // If we're ASCII, we don't need to do anything
+ if ((U8)utf8str[cur_char] > 0x7f)
+ {
+ // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back
+ // to the first character
+ while (0x80 == (0xc0 & utf8str[cur_char]))
+ {
+ cur_char--;
+ // Keep moving forward until we hit the first char;
+ if (cur_char == 0)
+ {
+ // Make sure we don't trash memory if we've got a bogus string.
+ break;
+ }
+ }
+ }
+ // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
+ return utf8str.substr(0, cur_char);
+ }
+}
+
+std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len)
+{
+ if (0 == symbol_len)
+ {
+ return std::string();
+ }
+ if ((S32)utf8str.length() <= symbol_len)
+ {
+ return utf8str;
+ }
+ else
+ {
+ int len = 0, byteIndex = 0;
+ const char* aStr = utf8str.c_str();
+ size_t origSize = utf8str.size();
+
+ for (byteIndex = 0; len < symbol_len && byteIndex < origSize; byteIndex++)
+ {
+ if ((aStr[byteIndex] & 0xc0) != 0x80)
+ {
+ len += 1;
+ }
+ }
+ return utf8str.substr(0, byteIndex);
+ }
+}
+
+std::string utf8str_substChar(
+ const std::string& utf8str,
+ const llwchar target_char,
+ const llwchar replace_char)
+{
+ LLWString wstr = utf8str_to_wstring(utf8str);
+ LLWStringUtil::replaceChar(wstr, target_char, replace_char);
+ //wstr = wstring_substChar(wstr, target_char, replace_char);
+ return wstring_to_utf8str(wstr);
+}
+
+std::string utf8str_makeASCII(const std::string& utf8str)
+{
+ LLWString wstr = utf8str_to_wstring(utf8str);
+ LLWStringUtil::_makeASCII(wstr);
+ return wstring_to_utf8str(wstr);
+}
+
+std::string mbcsstring_makeASCII(const std::string& wstr)
+{
+ // Replace non-ASCII chars with replace_char
+ std::string out_str = wstr;
+ for (S32 i = 0; i < (S32)out_str.length(); i++)
+ {
+ if ((U8)out_str[i] > 0x7f)
+ {
+ out_str[i] = LL_UNKNOWN_CHAR;
+ }
+ }
+ return out_str;
+}
+
+std::string utf8str_removeCRLF(const std::string& utf8str)
+{
+ if (0 == utf8str.length())
+ {
+ return std::string();
+ }
+ const char CR = 13;
+
+ std::string out;
+ out.reserve(utf8str.length());
+ const S32 len = (S32)utf8str.length();
+ for( S32 i = 0; i < len; i++ )
+ {
+ if( utf8str[i] != CR )
+ {
+ out.push_back(utf8str[i]);
+ }
+ }
+ return out;
+}
+
+llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length)
+{
+ switch (length)
+ {
+ case 2:
+ return ((utf8str[offset] & 0x1F) << 6) +
+ (utf8str[offset + 1] & 0x3F);
+ case 3:
+ return ((utf8str[offset] & 0x0F) << 12) +
+ ((utf8str[offset + 1] & 0x3F) << 6) +
+ (utf8str[offset + 2] & 0x3F);
+ case 4:
+ return ((utf8str[offset] & 0x07) << 18) +
+ ((utf8str[offset + 1] & 0x3F) << 12) +
+ ((utf8str[offset + 2] & 0x3F) << 6) +
+ (utf8str[offset + 3] & 0x3F);
+ case 5:
+ return ((utf8str[offset] & 0x03) << 24) +
+ ((utf8str[offset + 1] & 0x3F) << 18) +
+ ((utf8str[offset + 2] & 0x3F) << 12) +
+ ((utf8str[offset + 3] & 0x3F) << 6) +
+ (utf8str[offset + 4] & 0x3F);
+ case 6:
+ return ((utf8str[offset] & 0x01) << 30) +
+ ((utf8str[offset + 1] & 0x3F) << 24) +
+ ((utf8str[offset + 2] & 0x3F) << 18) +
+ ((utf8str[offset + 3] & 0x3F) << 12) +
+ ((utf8str[offset + 4] & 0x3F) << 6) +
+ (utf8str[offset + 5] & 0x3F);
+ case 7:
+ return ((utf8str[offset + 1] & 0x03) << 30) +
+ ((utf8str[offset + 2] & 0x3F) << 24) +
+ ((utf8str[offset + 3] & 0x3F) << 18) +
+ ((utf8str[offset + 4] & 0x3F) << 12) +
+ ((utf8str[offset + 5] & 0x3F) << 6) +
+ (utf8str[offset + 6] & 0x3F);
+ }
+ return LL_UNKNOWN_CHAR;
+}
+
+std::string utf8str_showBytesUTF8(const std::string& utf8str)
+{
+ std::string result;
+
+ bool in_sequence = false;
+ size_t sequence_size = 0;
+ size_t byte_index = 0;
+ size_t source_length = utf8str.size();
+
+ auto open_sequence = [&]()
+ {
+ if (!result.empty() && result.back() != '\n')
+ result += '\n'; // Use LF as a separator before new UTF-8 sequence
+ result += '[';
+ in_sequence = true;
+ };
+
+ auto close_sequence = [&]()
+ {
+ llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size);
+ if (unicode != LL_UNKNOWN_CHAR)
+ {
+ result += llformat("+%04X", unicode);
+ }
+ result += ']';
+ in_sequence = false;
+ sequence_size = 0;
+ };
+
+ while (byte_index < source_length)
+ {
+ U8 byte = utf8str[byte_index];
+ if (byte >= 0x80) // Part of an UTF-8 sequence
+ {
+ if (!in_sequence) // Start new UTF-8 sequence
+ {
+ open_sequence();
+ }
+ else if (byte >= 0xC0) // Start another UTF-8 sequence
+ {
+ close_sequence();
+ open_sequence();
+ }
+ else // Continue the same UTF-8 sequence
+ {
+ result += '.';
+ }
+ result += llformat("%02X", byte); // The byte is represented in hexadecimal form
+ ++sequence_size;
+ }
+ else // ASCII symbol is represented as a character
+ {
+ if (in_sequence) // End of UTF-8 sequence
+ {
+ close_sequence();
+ if (byte != '\n')
+ {
+ result += '\n'; // Use LF as a separator between UTF-8 and ASCII
+ }
+ }
+ result += byte;
+ }
+ ++byte_index;
+ }
+
+ if (in_sequence) // End of UTF-8 sequence
+ {
+ close_sequence();
+ }
+
+ return result;
+}
+
+// Search for any emoji symbol, return true if found
+bool wstring_has_emoji(const LLWString& wstr)
+{
+ for (const llwchar& wch : wstr)
+ {
+ if (LLStringOps::isEmoji(wch))
+ return true;
+ }
+
+ return false;
+}
+
+// Cut emoji symbols if exist
+bool wstring_remove_emojis(LLWString& wstr)
+{
+ bool found = false;
+ for (size_t i = 0; i < wstr.size(); ++i)
+ {
+ if (LLStringOps::isEmoji(wstr[i]))
+ {
+ wstr.erase(i--, 1);
+ found = true;
+ }
+ }
+ return found;
+}
+
+// Cut emoji symbols if exist
+bool utf8str_remove_emojis(std::string& utf8str)
+{
+ LLWString wstr = utf8str_to_wstring(utf8str);
+ if (!wstring_remove_emojis(wstr))
+ return false;
+ utf8str = wstring_to_utf8str(wstr);
+ return true;
+}
+
+#if LL_WINDOWS
+unsigned int ll_wstring_default_code_page()
+{
+ return CP_UTF8;
+}
+
+std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
+{
+ std::string out;
+ if(in)
+ {
+ int len_out = WideCharToMultiByte(
+ code_page,
+ 0,
+ in,
+ len_in,
+ NULL,
+ 0,
+ 0,
+ 0);
+ // We will need two more bytes for the double NULL ending
+ // created in WideCharToMultiByte().
+ char* pout = new char [len_out + 2];
+ memset(pout, 0, len_out + 2);
+ if(pout)
+ {
+ WideCharToMultiByte(
+ code_page,
+ 0,
+ in,
+ len_in,
+ pout,
+ len_out,
+ 0,
+ 0);
+ out.assign(pout);
+ delete[] pout;
+ }
+ }
+ return out;
+}
+
+std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
+{
+ // From review:
+ // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
+ // plus one for a null terminator, and be guaranteed to not overflow.
+
+ // Normally, I'd call that sort of thing premature optimization,
+ // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
+// int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);
+
+ // reserve an output buffer that will be destroyed on exit, with a place
+ // to put NULL terminator
+ std::vector<wchar_t> w_out(len + 1);
+
+ memset(&w_out[0], 0, w_out.size());
+ int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
+ &w_out[0], w_out.size() - 1);
+
+ //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
+ w_out[real_output_str_len] = 0;
+
+ // construct string<wchar_t> from our temporary output buffer
+ return {&w_out[0]};
+}
+
+LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
+{
+ // Whether or not std::wstring and llutf16string are distinct types, they
+ // both hold UTF-16LE characters. (See header file comments.) Pretend this
+ // wchar_t* sequence is really a U16* sequence and use the conversion we
+ // define above.
+ return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
+}
+
+std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
+{
+ // first, convert to llutf16string, for which we have a real implementation
+ auto utf16str{ wstring_to_utf16str(in, len) };
+ // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
+ // string pointer is a wchar_t* and instantiate a std::wstring of the same
+ // length.
+ return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
+}
+
+std::string ll_convert_string_to_utf8_string(const std::string& in)
+{
+ // If you pass code_page, you must also pass length, otherwise the code
+ // page parameter will be mistaken for length.
+ auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP);
+ // CP_UTF8 is default -- see ll_wstring_default_code_page() above.
+ return ll_convert_wide_to_string(w_mesg);
+}
+
+namespace
+{
+
+void HeapFree_deleter(void* ptr)
+{
+ // instead of LocalFree(), per https://stackoverflow.com/a/31541205
+ HeapFree(GetProcessHeap(), NULL, ptr);
+}
+
+} // anonymous namespace
+
+template<>
+std::wstring windows_message<std::wstring>(DWORD error)
+{
+ // derived from https://stackoverflow.com/a/455533
+ wchar_t* rawptr = nullptr;
+ auto okay = FormatMessageW(
+ // use system message tables for GetLastError() codes
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ // internally allocate buffer and return its pointer
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ // you cannot pass insertion parameters (thanks Gandalf)
+ FORMAT_MESSAGE_IGNORE_INSERTS |
+ // ignore line breaks in message definition text
+ FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM
+ error, // dwMessageId
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId
+ (LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t*
+ 0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER
+ NULL); // Arguments, unused
+
+ // make a unique_ptr from rawptr so it gets cleaned up properly
+ std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter);
+
+ if (okay && bufferptr)
+ {
+ // got the message, return it ('okay' is length in characters)
+ return { bufferptr.get(), okay };
+ }
+
+ // did not get the message, synthesize one
+ auto format_message_error = GetLastError();
+ std::wostringstream out;
+ out << L"GetLastError() " << error << L" (FormatMessageW() failed with "
+ << format_message_error << L")";
+ return out.str();
+}
+
+std::optional<std::wstring> llstring_getoptenv(const std::string& key)
+{
+ auto wkey = ll_convert_string_to_wide(key);
+ // Take a wild guess as to how big the buffer should be.
+ std::vector<wchar_t> buffer(1024);
+ auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
+ // If our initial guess was too short, n will indicate the size (in
+ // wchar_t's) that buffer should have been, including the terminating nul.
+ if (n > (buffer.size() - 1))
+ {
+ // make it big enough
+ buffer.resize(n);
+ // and try again
+ n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
+ }
+ // did that (ultimately) succeed?
+ if (n)
+ {
+ // great, return populated std::optional
+ return std::make_optional<std::wstring>(&buffer[0]);
+ }
+
+ // not successful
+ auto last_error = GetLastError();
+ // Don't bother warning for NOT_FOUND; that's an expected case
+ if (last_error != ERROR_ENVVAR_NOT_FOUND)
+ {
+ LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
+ << windows_message<std::string>(last_error) << LL_ENDL;
+ }
+ // return empty std::optional
+ return {};
+}
+
+#else // ! LL_WINDOWS
+
+std::optional<std::string> llstring_getoptenv(const std::string& key)
+{
+ auto found = getenv(key.c_str());
+ if (found)
+ {
+ // return populated std::optional
+ return std::make_optional<std::string>(found);
+ }
+ else
+ {
+ // return empty std::optional
+ return {};
+ }
+}
+
+#endif // ! LL_WINDOWS
+
+long LLStringOps::sPacificTimeOffset = 0;
+long LLStringOps::sLocalTimeOffset = 0;
+bool LLStringOps::sPacificDaylightTime = 0;
+std::map<std::string, std::string> LLStringOps::datetimeToCodes;
+
+std::vector<std::string> LLStringOps::sWeekDayList;
+std::vector<std::string> LLStringOps::sWeekDayShortList;
+std::vector<std::string> LLStringOps::sMonthList;
+std::vector<std::string> LLStringOps::sMonthShortList;
+
+
+std::string LLStringOps::sDayFormat;
+std::string LLStringOps::sAM;
+std::string LLStringOps::sPM;
+
+// static
+bool LLStringOps::isEmoji(llwchar a)
+{
+#if 0 // Do not consider special characters that might have a corresponding
+ // glyph in the monochorme fallback fonts as a "genuine" emoji. HB
+ return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) ||
+ (a >= 0x1f000 && a < 0x20000);
+#else
+ // These are indeed "genuine" emojis, we *do want* rendered as such. HB
+ return a >= 0x1f000 && a < 0x20000;
+#endif
+ }
+
+S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
+{
+ #if LL_WINDOWS
+ // in Windows, wide string functions operator on 16-bit strings,
+ // not the proper 32 bit wide string
+ return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
+ #else
+ return wcscoll(a, b);
+ #endif
+}
+
+void LLStringOps::setupDatetimeInfo (bool daylight)
+{
+ time_t nowT, localT, gmtT;
+ struct tm * tmpT;
+
+ nowT = time (NULL);
+
+ tmpT = gmtime (&nowT);
+ gmtT = mktime (tmpT);
+
+ tmpT = localtime (&nowT);
+ localT = mktime (tmpT);
+
+ sLocalTimeOffset = (long) (gmtT - localT);
+ if (tmpT->tm_isdst)
+ {
+ sLocalTimeOffset -= 60 * 60; // 1 hour
+ }
+
+ sPacificDaylightTime = daylight;
+ sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;
+
+ datetimeToCodes["wkday"] = "%a"; // Thu
+ datetimeToCodes["weekday"] = "%A"; // Thursday
+ datetimeToCodes["year4"] = "%Y"; // 2009
+ datetimeToCodes["year"] = "%Y"; // 2009
+ datetimeToCodes["year2"] = "%y"; // 09
+ datetimeToCodes["mth"] = "%b"; // Aug
+ datetimeToCodes["month"] = "%B"; // August
+ datetimeToCodes["mthnum"] = "%m"; // 08
+ datetimeToCodes["day"] = "%d"; // 31
+ datetimeToCodes["sday"] = "%-d"; // 9
+ datetimeToCodes["hour24"] = "%H"; // 14
+ datetimeToCodes["hour"] = "%H"; // 14
+ datetimeToCodes["hour12"] = "%I"; // 02
+ datetimeToCodes["min"] = "%M"; // 59
+ datetimeToCodes["ampm"] = "%p"; // AM
+ datetimeToCodes["second"] = "%S"; // 59
+ datetimeToCodes["timezone"] = "%Z"; // PST
+}
+
+void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output)
+{
+ output.clear();
+ size_t length = data.size();
+
+ // tokenize it and put it in the array
+ std::string cur_word;
+ for(size_t i = 0; i < length; ++i)
+ {
+ if(data[i] == ':')
+ {
+ output.push_back(cur_word);
+ cur_word.clear();
+ }
+ else
+ {
+ cur_word.append(1, data[i]);
+ }
+ }
+ output.push_back(cur_word);
+}
+
+void LLStringOps::setupWeekDaysNames(const std::string& data)
+{
+ tokenizeStringToArray(data,sWeekDayList);
+}
+void LLStringOps::setupWeekDaysShortNames(const std::string& data)
+{
+ tokenizeStringToArray(data,sWeekDayShortList);
+}
+void LLStringOps::setupMonthNames(const std::string& data)
+{
+ tokenizeStringToArray(data,sMonthList);
+}
+void LLStringOps::setupMonthShortNames(const std::string& data)
+{
+ tokenizeStringToArray(data,sMonthShortList);
+}
+void LLStringOps::setupDayFormat(const std::string& data)
+{
+ sDayFormat = data;
+}
+
+
+std::string LLStringOps::getDatetimeCode (std::string key)
+{
+ std::map<std::string, std::string>::iterator iter;
+
+ iter = datetimeToCodes.find (key);
+ if (iter != datetimeToCodes.end())
+ {
+ return iter->second;
+ }
+ else
+ {
+ return std::string("");
+ }
+}
+
+std::string LLStringOps::getReadableNumber(F64 num)
+{
+ if (fabs(num)>=1e9)
+ {
+ return llformat("%.2lfB", num / 1e9);
+ }
+ else if (fabs(num)>=1e6)
+ {
+ return llformat("%.2lfM", num / 1e6);
+ }
+ else if (fabs(num)>=1e3)
+ {
+ return llformat("%.2lfK", num / 1e3);
+ }
+ else
+ {
+ return llformat("%.2lf", num);
+ }
+}
+
+namespace LLStringFn
+{
+ // NOTE - this restricts output to ascii
+ void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
+ {
+ const char MIN = 0x20;
+ std::basic_string<char>::size_type len = string.size();
+ for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
+ {
+ if(string[ii] < MIN)
+ {
+ string[ii] = replacement;
+ }
+ }
+ }
+
+
+ // NOTE - this restricts output to ascii
+ void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
+ char replacement)
+ {
+ const char MIN = 0x20;
+ const char PIPE = 0x7c;
+ std::basic_string<char>::size_type len = str.size();
+ for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
+ {
+ if( (str[ii] < MIN) || (str[ii] == PIPE) )
+ {
+ str[ii] = replacement;
+ }
+ }
+ }
+
+ // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
+ // allowable code points for XML. Specifically, they are:
+ // 0x09, 0x0a, 0x0d, and 0x20 on up. JC
+ std::string strip_invalid_xml(const std::string& instr)
+ {
+ std::string output;
+ output.reserve( instr.size() );
+ std::string::const_iterator it = instr.begin();
+ while (it != instr.end())
+ {
+ // Must compare as unsigned for >=
+ // Test most likely match first
+ const unsigned char c = (unsigned char)*it;
+ if ( c >= (unsigned char)0x20 // SPACE
+ || c == (unsigned char)0x09 // TAB
+ || c == (unsigned char)0x0a // LINE_FEED
+ || c == (unsigned char)0x0d ) // CARRIAGE_RETURN
+ {
+ output.push_back(c);
+ }
+ ++it;
+ }
+ return output;
+ }
+
+ /**
+ * @brief Replace all control characters (c < 0x20) with replacement in
+ * string.
+ */
+ void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
+ {
+ const unsigned char MIN = 0x20;
+ std::basic_string<char>::size_type len = string.size();
+ for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
+ {
+ const unsigned char c = (unsigned char) string[ii];
+ if(c < MIN)
+ {
+ string[ii] = replacement;
+ }
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////
+
+// Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime.
+template<>
+S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
+
+//static
+template<>
+void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
+{
+ // Starting at offset 0, scan forward for the next non-delimiter. We're
+ // done when the only characters left in 'instr' are delimiters.
+ for (std::string::size_type begIdx, endIdx = 0;
+ (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; )
+ {
+ // Found a non-delimiter. After that, find the next delimiter.
+ endIdx = instr.find_first_of (delims, begIdx);
+ if (endIdx == std::string::npos)
+ {
+ // No more delimiters: this token extends to the end of the string.
+ endIdx = instr.length();
+ }
+
+ // extract the token between begIdx and endIdx; substr() needs length
+ std::string currToken(instr.substr(begIdx, endIdx - begIdx));
+ LLStringUtil::trim (currToken);
+ tokens.push_back(currToken);
+ // next scan past delimiters starts at endIdx
+ }
+}
+
+template<>
+LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens)
+{
+ const std::string delims (",");
+
+ // Find the first [
+ size_type pos1 = instr.find('[', start);
+ if (pos1 == std::string::npos)
+ return std::string::npos;
+
+ //Find the first ] after the initial [
+ size_type pos2 = instr.find(']', pos1);
+ if (pos2 == std::string::npos)
+ return std::string::npos;
+
+ // Find the last [ before ] in case of nested [[]]
+ pos1 = instr.find_last_of('[', pos2-1);
+ if (pos1 == std::string::npos || pos1 < start)
+ return std::string::npos;
+
+ getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims);
+ start = pos2+1;
+
+ return pos1;
+}
+
+// static
+template<>
+bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions)
+{
+ // see if we have a replacement for the bracketed string (without the brackets)
+ // test first using has() because if we just look up with operator[] we get back an
+ // empty string even if the value is missing. We want to distinguish between
+ // missing replacements and deliberately empty replacement strings.
+ format_map_t::const_iterator iter = substitutions.find(token);
+ if (iter != substitutions.end())
+ {
+ replacement = iter->second;
+ return true;
+ }
+ // if not, see if there's one WITH brackets
+ iter = substitutions.find(std::string("[" + token + "]"));
+ if (iter != substitutions.end())
+ {
+ replacement = iter->second;
+ return true;
+ }
+
+ return false;
+}
+
+// static
+template<>
+bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions)
+{
+ // see if we have a replacement for the bracketed string (without the brackets)
+ // test first using has() because if we just look up with operator[] we get back an
+ // empty string even if the value is missing. We want to distinguish between
+ // missing replacements and deliberately empty replacement strings.
+ if (substitutions.has(token))
+ {
+ replacement = substitutions[token].asString();
+ return true;
+ }
+ // if not, see if there's one WITH brackets
+ else if (substitutions.has(std::string("[" + token + "]")))
+ {
+ replacement = substitutions[std::string("[" + token + "]")].asString();
+ return true;
+ }
+
+ return false;
+}
+
+//static
+template<>
+void LLStringUtil::setLocale(std::string inLocale)
+{
+ sLocale = inLocale;
+};
+
+//static
+template<>
+std::string LLStringUtil::getLocale(void)
+{
+ return sLocale;
+};
+
+// static
+template<>
+void LLStringUtil::formatNumber(std::string& numStr, std::string decimals)
+{
+ std::stringstream strStream;
+ S32 intDecimals = 0;
+
+ convertToS32 (decimals, intDecimals);
+ if (!sLocale.empty())
+ {
+ // std::locale() throws if the locale is unknown! (EXT-7926)
+ try
+ {
+ strStream.imbue(std::locale(sLocale.c_str()));
+ } catch (const std::exception &)
+ {
+ LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL;
+ }
+ }
+
+ if (!intDecimals)
+ {
+ S32 intStr;
+
+ if (convertToS32(numStr, intStr))
+ {
+ strStream << intStr;
+ numStr = strStream.str();
+ }
+ }
+ else
+ {
+ F32 floatStr;
+
+ if (convertToF32(numStr, floatStr))
+ {
+ strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr;
+ numStr = strStream.str();
+ }
+ }
+}
+
+// static
+template<>
+bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
+ std::string param, S32 secFromEpoch)
+{
+ if (param == "local") // local
+ {
+ secFromEpoch -= LLStringOps::getLocalTimeOffset();
+ }
+ else if (param != "utc") // slt
+ {
+ secFromEpoch -= LLStringOps::getPacificTimeOffset();
+ }
+
+ // if never fell into those two ifs above, param must be utc
+ if (secFromEpoch < 0) secFromEpoch = 0;
+
+ LLDate datetime((F64)secFromEpoch);
+ std::string code = LLStringOps::getDatetimeCode (token);
+
+ // special case to handle timezone
+ if (code == "%Z") {
+ if (param == "utc")
+ {
+ replacement = "GMT";
+ }
+ else if (param == "local")
+ {
+ replacement = ""; // user knows their own timezone
+ }
+ else
+ {
+#if 0
+ // EXT-1565 : Zai Lynch, James Linden : 15/Oct/09
+ // [BSI] Feedback: Viewer clock mentions SLT, but would prefer it to show PST/PDT
+ // "slt" = Second Life Time, which is deprecated.
+ // If not utc or user local time, fallback to Pacific time
+ replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST";
+#else
+ // SL-20370 : Steeltoe Linden : 29/Sep/23
+ // Change "PDT" to "SLT" on menu bar
+ replacement = "SLT";
+#endif
+ }
+ return true;
+ }
+
+ //EXT-7013
+ //few codes are not suppotred by strtime function (example - weekdays for Japanise)
+ //so use predefined ones
+
+ //if sWeekDayList is not empty than current locale doesn't support
+ //weekday name.
+ time_t loc_seconds = (time_t) secFromEpoch;
+ if(LLStringOps::sWeekDayList.size() == 7 && code == "%A")
+ {
+ struct tm * gmt = gmtime (&loc_seconds);
+ replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
+ }
+ else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
+ {
+ struct tm * gmt = gmtime (&loc_seconds);
+ replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
+ }
+ else if(LLStringOps::sMonthList.size() == 12 && code == "%B")
+ {
+ struct tm * gmt = gmtime (&loc_seconds);
+ replacement = LLStringOps::sMonthList[gmt->tm_mon];
+ }
+ else if( !LLStringOps::sDayFormat.empty() && code == "%d" )
+ {
+ struct tm * gmt = gmtime (&loc_seconds);
+ LLStringUtil::format_map_t args;
+ args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
+ replacement = LLStringOps::sDayFormat;
+ LLStringUtil::format(replacement, args);
+ }
+ else if (code == "%-d")
+ {
+ struct tm * gmt = gmtime (&loc_seconds);
+ replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero
+ }
+ else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" )
+ {
+ struct tm * gmt = gmtime (&loc_seconds);
+ if(gmt->tm_hour<12)
+ {
+ replacement = LLStringOps::sAM;
+ }
+ else
+ {
+ replacement = LLStringOps::sPM;
+ }
+ }
+ else
+ {
+ replacement = datetime.toHTTPDateString(code);
+ }
+
+ // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format
+ // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738).
+ // We could have used '%l' format instead, but it's not supported by Windows.
+ if(code == "%I" && token == "hour12" && replacement.at(0) == '0')
+ {
+ replacement = replacement.at(1);
+ }
+
+ return !code.empty();
+}
+
+// LLStringUtil::format recogizes the following patterns.
+// All substitutions *must* be encased in []'s in the input string.
+// The []'s are optional in the substitution map.
+// [FOO_123]
+// [FOO,number,precision]
+// [FOO,datetime,format]
+
+
+// static
+template<>
+S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
+ S32 res = 0;
+
+ std::string output;
+ std::vector<std::string> tokens;
+
+ std::string::size_type start = 0;
+ std::string::size_type prev_start = 0;
+ std::string::size_type key_start = 0;
+ while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
+ {
+ output += std::string(s, prev_start, key_start-prev_start);
+ prev_start = start;
+
+ bool found_replacement = false;
+ std::string replacement;
+
+ if (tokens.size() == 0)
+ {
+ found_replacement = false;
+ }
+ else if (tokens.size() == 1)
+ {
+ found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
+ }
+ else if (tokens[1] == "number")
+ {
+ std::string param = "0";
+
+ if (tokens.size() > 2) param = tokens[2];
+ found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
+ if (found_replacement) formatNumber (replacement, param);
+ }
+ else if (tokens[1] == "datetime")
+ {
+ std::string param;
+ if (tokens.size() > 2) param = tokens[2];
+
+ format_map_t::const_iterator iter = substitutions.find("datetime");
+ if (iter != substitutions.end())
+ {
+ S32 secFromEpoch = 0;
+ bool r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
+ if (r)
+ {
+ found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
+ }
+ }
+ }
+
+ if (found_replacement)
+ {
+ output += replacement;
+ res++;
+ }
+ else
+ {
+ // we had no replacement, use the string as is
+ // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
+ output += std::string(s, key_start, start-key_start);
+ }
+ tokens.clear();
+ }
+ // send the remainder of the string (with no further matches for bracketed names)
+ output += std::string(s, start);
+ s = output;
+ return res;
+}
+
+//static
+template<>
+S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
+ S32 res = 0;
+
+ if (!substitutions.isMap())
+ {
+ return res;
+ }
+
+ std::string output;
+ std::vector<std::string> tokens;
+
+ std::string::size_type start = 0;
+ std::string::size_type prev_start = 0;
+ std::string::size_type key_start = 0;
+ while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
+ {
+ output += std::string(s, prev_start, key_start-prev_start);
+ prev_start = start;
+
+ bool found_replacement = false;
+ std::string replacement;
+
+ if (tokens.size() == 0)
+ {
+ found_replacement = false;
+ }
+ else if (tokens.size() == 1)
+ {
+ found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
+ }
+ else if (tokens[1] == "number")
+ {
+ std::string param = "0";
+
+ if (tokens.size() > 2) param = tokens[2];
+ found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
+ if (found_replacement) formatNumber (replacement, param);
+ }
+ else if (tokens[1] == "datetime")
+ {
+ std::string param;
+ if (tokens.size() > 2) param = tokens[2];
+
+ S32 secFromEpoch = (S32) substitutions["datetime"].asInteger();
+ found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch);
+ }
+
+ if (found_replacement)
+ {
+ output += replacement;
+ res++;
+ }
+ else
+ {
+ // we had no replacement, use the string as is
+ // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
+ output += std::string(s, key_start, start-key_start);
+ }
+ tokens.clear();
+ }
+ // send the remainder of the string (with no further matches for bracketed names)
+ output += std::string(s, start);
+ s = output;
+ return res;
+}
+
+////////////////////////////////////////////////////////////
+// Testing
+
+#ifdef _DEBUG
+
+template<class T>
+void LLStringUtilBase<T>::testHarness()
+{
+ std::string s1;
+
+ llassert( s1.c_str() == NULL );
+ llassert( s1.size() == 0 );
+ llassert( s1.empty() );
+
+ std::string s2( "hello");
+ llassert( !strcmp( s2.c_str(), "hello" ) );
+ llassert( s2.size() == 5 );
+ llassert( !s2.empty() );
+ std::string s3( s2 );
+
+ llassert( "hello" == s2 );
+ llassert( s2 == "hello" );
+ llassert( s2 > "gello" );
+ llassert( "gello" < s2 );
+ llassert( "gello" != s2 );
+ llassert( s2 != "gello" );
+
+ std::string s4 = s2;
+ llassert( !s4.empty() );
+ s4.empty();
+ llassert( s4.empty() );
+
+ std::string s5("");
+ llassert( s5.empty() );
+
+ llassert( isValidIndex(s5, 0) );
+ llassert( !isValidIndex(s5, 1) );
+
+ s3 = s2;
+ s4 = "hello again";
+
+ s4 += "!";
+ s4 += s4;
+ llassert( s4 == "hello again!hello again!" );
+
+
+ std::string s6 = s2 + " " + s2;
+ std::string s7 = s6;
+ llassert( s6 == s7 );
+ llassert( !( s6 != s7) );
+ llassert( !(s6 < s7) );
+ llassert( !(s6 > s7) );
+
+ llassert( !(s6 == "hi"));
+ llassert( s6 == "hello hello");
+ llassert( s6 < "hi");
+
+ llassert( s6[1] == 'e' );
+ s6[1] = 'f';
+ llassert( s6[1] == 'f' );
+
+ s2.erase( 4, 1 );
+ llassert( s2 == "hell");
+ s2.insert( 0, "y" );
+ llassert( s2 == "yhell");
+ s2.erase( 1, 3 );
+ llassert( s2 == "yl");
+ s2.insert( 1, "awn, don't yel");
+ llassert( s2 == "yawn, don't yell");
+
+ std::string s8 = s2.substr( 6, 5 );
+ llassert( s8 == "don't" );
+
+ std::string s9 = " \t\ntest \t\t\n ";
+ trim(s9);
+ llassert( s9 == "test" );
+
+ s8 = "abc123&*(ABC";
+
+ s9 = s8;
+ toUpper(s9);
+ llassert( s9 == "ABC123&*(ABC" );
+
+ s9 = s8;
+ toLower(s9);
+ llassert( s9 == "abc123&*(abc" );
+
+
+ std::string s10( 10, 'x' );
+ llassert( s10 == "xxxxxxxxxx" );
+
+ std::string s11( "monkey in the middle", 7, 2 );
+ llassert( s11 == "in" );
+
+ std::string s12; //empty
+ s12 += "foo";
+ llassert( s12 == "foo" );
+
+ std::string s13; //empty
+ s13 += 'f';
+ llassert( s13 == "f" );
+}
+
+
+#endif // _DEBUG
|