diff options
author | Ansariel <ansariel.hiller@phoenixviewer.com> | 2024-05-22 21:25:21 +0200 |
---|---|---|
committer | Andrey Lihatskiy <alihatskiy@productengine.com> | 2024-05-22 22:40:26 +0300 |
commit | e2e37cced861b98de8c1a7c9c0d3a50d2d90e433 (patch) | |
tree | 1bb897489ce524986f6196201c10ac0d8861aa5f /indra/llcommon/llstring.cpp | |
parent | 069ea06848f766466f1a281144c82a0f2bd79f3a (diff) |
Fix line endlings
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r-- | indra/llcommon/llstring.cpp | 3512 |
1 files changed, 1756 insertions, 1756 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index b3fc9b484a..bbb6aa2c20 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -1,1756 +1,1756 @@ -/**
- * @file llstring.cpp
- * @brief String utility functions and the std::string class.
- *
- * $LicenseInfo:firstyear=2001&license=viewerlgpl$
- * Second Life Viewer Source Code
- * Copyright (C) 2010, Linden Research, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License only.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
- * $/LicenseInfo$
- */
-
-#include "linden_common.h"
-
-#include "llstring.h"
-#include "llerror.h"
-#include "llfasttimer.h"
-#include "llsd.h"
-#include <vector>
-
-#if LL_WINDOWS
-#include "llwin32headerslean.h"
-#include <winnls.h> // for WideCharToMultiByte
-#endif
-
-std::string ll_safe_string(const char* in)
-{
- if(in) return std::string(in);
- return std::string();
-}
-
-std::string ll_safe_string(const char* in, S32 maxlen)
-{
- if(in && maxlen > 0 ) return std::string(in, maxlen);
-
- return std::string();
-}
-
-bool is_char_hex(char hex)
-{
- if((hex >= '0') && (hex <= '9'))
- {
- return true;
- }
- else if((hex >= 'a') && (hex <='f'))
- {
- return true;
- }
- else if((hex >= 'A') && (hex <='F'))
- {
- return true;
- }
- return false; // uh - oh, not hex any more...
-}
-
-U8 hex_as_nybble(char hex)
-{
- if((hex >= '0') && (hex <= '9'))
- {
- return (U8)(hex - '0');
- }
- else if((hex >= 'a') && (hex <='f'))
- {
- return (U8)(10 + hex - 'a');
- }
- else if((hex >= 'A') && (hex <='F'))
- {
- return (U8)(10 + hex - 'A');
- }
- return 0; // uh - oh, not hex any more...
-}
-
-bool iswindividual(llwchar elem)
-{
- U32 cur_char = (U32)elem;
- bool result = false;
- if (0x2E80<= cur_char && cur_char <= 0x9FFF)
- {
- result = true;
- }
- else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
- {
- result = true;
- }
- else if (0xF900<= cur_char && cur_char <= 0xFA60 )
- {
- result = true;
- }
- return result;
-}
-
-bool _read_file_into_string(std::string& str, const std::string& filename)
-{
- llifstream ifs(filename.c_str(), llifstream::binary);
- if (!ifs.is_open())
- {
- LL_INFOS() << "Unable to open file " << filename << LL_ENDL;
- return false;
- }
-
- std::ostringstream oss;
-
- oss << ifs.rdbuf();
- str = oss.str();
- ifs.close();
- return true;
-}
-
-
-
-
-// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
-// for the Unicode implementation - this doesn't match because it was written before finding
-// it.
-
-
-std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
-{
- std::string utf8_str = wstring_to_utf8str(wstr);
- s << utf8_str;
- return s;
-}
-
-std::string rawstr_to_utf8(const std::string& raw)
-{
- LLWString wstr(utf8str_to_wstring(raw));
- return wstring_to_utf8str(wstr);
-}
-
-std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars)
-{
- U32 cur_char = (U32)in_char;
- char* base = outchars;
- if (cur_char < 0x80)
- {
- *outchars++ = (U8)cur_char;
- }
- else if (cur_char < 0x800)
- {
- *outchars++ = 0xC0 | (cur_char >> 6);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x10000)
- {
- *outchars++ = 0xE0 | (cur_char >> 12);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x200000)
- {
- *outchars++ = 0xF0 | (cur_char >> 18);
- *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x4000000)
- {
- *outchars++ = 0xF8 | (cur_char >> 24);
- *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x80000000)
- {
- *outchars++ = 0xFC | (cur_char >> 30);
- *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else
- {
- LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL;
- *outchars++ = LL_UNKNOWN_CHAR;
- }
- return outchars - base;
-}
-
-auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
-{
- const U16* base = inchars;
- U16 cur_char = *inchars++;
- llwchar char32 = cur_char;
- if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
- {
- // Surrogates
- char32 = ((llwchar)(cur_char - 0xD800)) << 10;
- cur_char = *inchars++;
- char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
- }
- else
- {
- char32 = (llwchar)cur_char;
- }
- *outchar = char32;
- return inchars - base;
-}
-
-llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
-{
- llutf16string out;
-
- S32 i = 0;
- while (i < len)
- {
- U32 cur_char = utf32str[i];
- if (cur_char > 0xFFFF)
- {
- out += (0xD7C0 + (cur_char >> 10));
- out += (0xDC00 | (cur_char & 0x3FF));
- }
- else
- {
- out += cur_char;
- }
- i++;
- }
- return out;
-}
-
-llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
-{
- LLWString wstr = utf8str_to_wstring ( utf8str, len );
- return wstring_to_utf16str ( wstr );
-}
-
-LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
-{
- LLWString wout;
- if (len == 0) return wout;
-
- S32 i = 0;
- const U16* chars16 = utf16str;
- while (i < len)
- {
- llwchar cur_char;
- i += utf16chars_to_wchar(chars16+i, &cur_char);
- wout += cur_char;
- }
- return wout;
-}
-
-// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
-S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
-{
- S32 surrogate_pairs = 0;
- // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
- const U16 *const utf16_chars = &(*(utf16str.begin()));
- S32 i = 0;
- while (i < utf16_len)
- {
- const U16 c = utf16_chars[i++];
- if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16
- { // Have first byte of a surrogate pair
- if (i >= utf16_len)
- {
- break;
- }
- const U16 d = utf16_chars[i];
- if (d >= 0xDC00 && d <= 0xDFFF)
- { // Have valid second byte of a surrogate pair
- surrogate_pairs++;
- i++;
- }
- }
- }
- return utf16_len - surrogate_pairs;
-}
-
-// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
-S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
-{
- const S32 end = llmin((S32)wstr.length(), woffset + wlen);
- if (end < woffset)
- {
- return 0;
- }
- else
- {
- S32 length = end - woffset;
- for (S32 i = woffset; i < end; i++)
- {
- if (wstr[i] >= 0x10000)
- {
- length++;
- }
- }
- return length;
- }
-}
-
-// Given a wstring and an offset in it, returns the length as wstring (i.e.,
-// number of llwchars) of the longest substring that starts at the offset
-// and whose equivalent utf-16 string does not exceeds the given utf16_length.
-S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, bool *unaligned)
-{
- const auto end = wstr.length();
- bool u{ false };
- S32 n = woffset + utf16_length;
- S32 i = woffset;
- while (i < end)
- {
- if (wstr[i] >= 0x10000)
- {
- --n;
- }
- if (i >= n)
- {
- u = (i > n);
- break;
- }
- i++;
- }
- if (unaligned)
- {
- *unaligned = u;
- }
- return i - woffset;
-}
-
-S32 wchar_utf8_length(const llwchar wc)
-{
- if (wc < 0x80)
- {
- return 1;
- }
- else if (wc < 0x800)
- {
- return 2;
- }
- else if (wc < 0x10000)
- {
- return 3;
- }
- else if (wc < 0x200000)
- {
- return 4;
- }
- else if (wc < 0x4000000)
- {
- return 5;
- }
- else
- {
- return 6;
- }
-}
-
-std::string wchar_utf8_preview(const llwchar wc)
-{
- std::ostringstream oss;
- oss << std::hex << std::uppercase << (U32)wc;
-
- U8 out_bytes[8];
- U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes);
-
- if (size > 1)
- {
- oss << " [";
- for (U32 i = 0; i < size; ++i)
- {
- if (i)
- {
- oss << ", ";
- }
- oss << (int)out_bytes[i];
- }
- oss << "]";
- }
-
- return oss.str();
-}
-
-S32 wstring_utf8_length(const LLWString& wstr)
-{
- S32 len = 0;
- for (S32 i = 0; i < (S32)wstr.length(); i++)
- {
- len += wchar_utf8_length(wstr[i]);
- }
- return len;
-}
-
-LLWString utf8str_to_wstring(const char* utf8str, size_t len)
-{
- LLWString wout;
-
- S32 i = 0;
- while (i < len)
- {
- llwchar unichar;
- U8 cur_char = utf8str[i];
-
- if (cur_char < 0x80)
- {
- // Ascii character, just add it
- unichar = cur_char;
- }
- else
- {
- S32 cont_bytes = 0;
- if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32
- {
- unichar = (0x1F&cur_char);
- cont_bytes = 1;
- }
- else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32
- {
- unichar = (0x0F&cur_char);
- cont_bytes = 2;
- }
- else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32
- {
- unichar = (0x07&cur_char);
- cont_bytes = 3;
- }
- else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32
- {
- unichar = (0x03&cur_char);
- cont_bytes = 4;
- }
- else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32
- {
- unichar = (0x01&cur_char);
- cont_bytes = 5;
- }
- else
- {
- wout += LL_UNKNOWN_CHAR;
- ++i;
- continue;
- }
-
- // Check that this character doesn't go past the end of the string
- auto end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
- do
- {
- ++i;
-
- cur_char = utf8str[i];
- if ( (cur_char >> 6) == 0x2 )
- {
- unichar <<= 6;
- unichar += (0x3F&cur_char);
- }
- else
- {
- // Malformed sequence - roll back to look at this as a new char
- unichar = LL_UNKNOWN_CHAR;
- --i;
- break;
- }
- } while(i < end);
-
- // Handle overlong characters and NULL characters
- if ( ((cont_bytes == 1) && (unichar < 0x80))
- || ((cont_bytes == 2) && (unichar < 0x800))
- || ((cont_bytes == 3) && (unichar < 0x10000))
- || ((cont_bytes == 4) && (unichar < 0x200000))
- || ((cont_bytes == 5) && (unichar < 0x4000000)) )
- {
- unichar = LL_UNKNOWN_CHAR;
- }
- }
-
- wout += unichar;
- ++i;
- }
- return wout;
-}
-
-std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
-{
- std::string out;
-
- S32 i = 0;
- while (i < len)
- {
- char tchars[8]; /* Flawfinder: ignore */
- auto n = wchar_to_utf8chars(utf32str[i], tchars);
- tchars[n] = 0;
- out += tchars;
- i++;
- }
- return out;
-}
-
-std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
-{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
-}
-
-std::string utf8str_trim(const std::string& utf8str)
-{
- LLWString wstr = utf8str_to_wstring(utf8str);
- LLWStringUtil::trim(wstr);
- return wstring_to_utf8str(wstr);
-}
-
-
-std::string utf8str_tolower(const std::string& utf8str)
-{
- LLWString out_str = utf8str_to_wstring(utf8str);
- LLWStringUtil::toLower(out_str);
- return wstring_to_utf8str(out_str);
-}
-
-
-S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
-{
- LLWString wlhs = utf8str_to_wstring(lhs);
- LLWString wrhs = utf8str_to_wstring(rhs);
- return LLWStringUtil::compareInsensitive(wlhs, wrhs);
-}
-
-std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
-{
- if (0 == max_len)
- {
- return std::string();
- }
- if ((S32)utf8str.length() <= max_len)
- {
- return utf8str;
- }
- else
- {
- S32 cur_char = max_len;
-
- // If we're ASCII, we don't need to do anything
- if ((U8)utf8str[cur_char] > 0x7f)
- {
- // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back
- // to the first character
- while (0x80 == (0xc0 & utf8str[cur_char]))
- {
- cur_char--;
- // Keep moving forward until we hit the first char;
- if (cur_char == 0)
- {
- // Make sure we don't trash memory if we've got a bogus string.
- break;
- }
- }
- }
- // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
- return utf8str.substr(0, cur_char);
- }
-}
-
-std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len)
-{
- if (0 == symbol_len)
- {
- return std::string();
- }
- if ((S32)utf8str.length() <= symbol_len)
- {
- return utf8str;
- }
- else
- {
- int len = 0, byteIndex = 0;
- const char* aStr = utf8str.c_str();
- size_t origSize = utf8str.size();
-
- for (byteIndex = 0; len < symbol_len && byteIndex < origSize; byteIndex++)
- {
- if ((aStr[byteIndex] & 0xc0) != 0x80)
- {
- len += 1;
- }
- }
- return utf8str.substr(0, byteIndex);
- }
-}
-
-std::string utf8str_substChar(
- const std::string& utf8str,
- const llwchar target_char,
- const llwchar replace_char)
-{
- LLWString wstr = utf8str_to_wstring(utf8str);
- LLWStringUtil::replaceChar(wstr, target_char, replace_char);
- //wstr = wstring_substChar(wstr, target_char, replace_char);
- return wstring_to_utf8str(wstr);
-}
-
-std::string utf8str_makeASCII(const std::string& utf8str)
-{
- LLWString wstr = utf8str_to_wstring(utf8str);
- LLWStringUtil::_makeASCII(wstr);
- return wstring_to_utf8str(wstr);
-}
-
-std::string mbcsstring_makeASCII(const std::string& wstr)
-{
- // Replace non-ASCII chars with replace_char
- std::string out_str = wstr;
- for (S32 i = 0; i < (S32)out_str.length(); i++)
- {
- if ((U8)out_str[i] > 0x7f)
- {
- out_str[i] = LL_UNKNOWN_CHAR;
- }
- }
- return out_str;
-}
-
-std::string utf8str_removeCRLF(const std::string& utf8str)
-{
- if (0 == utf8str.length())
- {
- return std::string();
- }
- const char CR = 13;
-
- std::string out;
- out.reserve(utf8str.length());
- const S32 len = (S32)utf8str.length();
- for( S32 i = 0; i < len; i++ )
- {
- if( utf8str[i] != CR )
- {
- out.push_back(utf8str[i]);
- }
- }
- return out;
-}
-
-llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length)
-{
- switch (length)
- {
- case 2:
- return ((utf8str[offset] & 0x1F) << 6) +
- (utf8str[offset + 1] & 0x3F);
- case 3:
- return ((utf8str[offset] & 0x0F) << 12) +
- ((utf8str[offset + 1] & 0x3F) << 6) +
- (utf8str[offset + 2] & 0x3F);
- case 4:
- return ((utf8str[offset] & 0x07) << 18) +
- ((utf8str[offset + 1] & 0x3F) << 12) +
- ((utf8str[offset + 2] & 0x3F) << 6) +
- (utf8str[offset + 3] & 0x3F);
- case 5:
- return ((utf8str[offset] & 0x03) << 24) +
- ((utf8str[offset + 1] & 0x3F) << 18) +
- ((utf8str[offset + 2] & 0x3F) << 12) +
- ((utf8str[offset + 3] & 0x3F) << 6) +
- (utf8str[offset + 4] & 0x3F);
- case 6:
- return ((utf8str[offset] & 0x01) << 30) +
- ((utf8str[offset + 1] & 0x3F) << 24) +
- ((utf8str[offset + 2] & 0x3F) << 18) +
- ((utf8str[offset + 3] & 0x3F) << 12) +
- ((utf8str[offset + 4] & 0x3F) << 6) +
- (utf8str[offset + 5] & 0x3F);
- case 7:
- return ((utf8str[offset + 1] & 0x03) << 30) +
- ((utf8str[offset + 2] & 0x3F) << 24) +
- ((utf8str[offset + 3] & 0x3F) << 18) +
- ((utf8str[offset + 4] & 0x3F) << 12) +
- ((utf8str[offset + 5] & 0x3F) << 6) +
- (utf8str[offset + 6] & 0x3F);
- }
- return LL_UNKNOWN_CHAR;
-}
-
-std::string utf8str_showBytesUTF8(const std::string& utf8str)
-{
- std::string result;
-
- bool in_sequence = false;
- size_t sequence_size = 0;
- size_t byte_index = 0;
- size_t source_length = utf8str.size();
-
- auto open_sequence = [&]()
- {
- if (!result.empty() && result.back() != '\n')
- result += '\n'; // Use LF as a separator before new UTF-8 sequence
- result += '[';
- in_sequence = true;
- };
-
- auto close_sequence = [&]()
- {
- llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size);
- if (unicode != LL_UNKNOWN_CHAR)
- {
- result += llformat("+%04X", unicode);
- }
- result += ']';
- in_sequence = false;
- sequence_size = 0;
- };
-
- while (byte_index < source_length)
- {
- U8 byte = utf8str[byte_index];
- if (byte >= 0x80) // Part of an UTF-8 sequence
- {
- if (!in_sequence) // Start new UTF-8 sequence
- {
- open_sequence();
- }
- else if (byte >= 0xC0) // Start another UTF-8 sequence
- {
- close_sequence();
- open_sequence();
- }
- else // Continue the same UTF-8 sequence
- {
- result += '.';
- }
- result += llformat("%02X", byte); // The byte is represented in hexadecimal form
- ++sequence_size;
- }
- else // ASCII symbol is represented as a character
- {
- if (in_sequence) // End of UTF-8 sequence
- {
- close_sequence();
- if (byte != '\n')
- {
- result += '\n'; // Use LF as a separator between UTF-8 and ASCII
- }
- }
- result += byte;
- }
- ++byte_index;
- }
-
- if (in_sequence) // End of UTF-8 sequence
- {
- close_sequence();
- }
-
- return result;
-}
-
-// Search for any emoji symbol, return true if found
-bool wstring_has_emoji(const LLWString& wstr)
-{
- for (const llwchar& wch : wstr)
- {
- if (LLStringOps::isEmoji(wch))
- return true;
- }
-
- return false;
-}
-
-// Cut emoji symbols if exist
-bool wstring_remove_emojis(LLWString& wstr)
-{
- bool found = false;
- for (size_t i = 0; i < wstr.size(); ++i)
- {
- if (LLStringOps::isEmoji(wstr[i]))
- {
- wstr.erase(i--, 1);
- found = true;
- }
- }
- return found;
-}
-
-// Cut emoji symbols if exist
-bool utf8str_remove_emojis(std::string& utf8str)
-{
- LLWString wstr = utf8str_to_wstring(utf8str);
- if (!wstring_remove_emojis(wstr))
- return false;
- utf8str = wstring_to_utf8str(wstr);
- return true;
-}
-
-#if LL_WINDOWS
-unsigned int ll_wstring_default_code_page()
-{
- return CP_UTF8;
-}
-
-std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
-{
- std::string out;
- if(in)
- {
- int len_out = WideCharToMultiByte(
- code_page,
- 0,
- in,
- len_in,
- NULL,
- 0,
- 0,
- 0);
- // We will need two more bytes for the double NULL ending
- // created in WideCharToMultiByte().
- char* pout = new char [len_out + 2];
- memset(pout, 0, len_out + 2);
- if(pout)
- {
- WideCharToMultiByte(
- code_page,
- 0,
- in,
- len_in,
- pout,
- len_out,
- 0,
- 0);
- out.assign(pout);
- delete[] pout;
- }
- }
- return out;
-}
-
-std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
-{
- // From review:
- // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
- // plus one for a null terminator, and be guaranteed to not overflow.
-
- // Normally, I'd call that sort of thing premature optimization,
- // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
-// int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);
-
- // reserve an output buffer that will be destroyed on exit, with a place
- // to put NULL terminator
- std::vector<wchar_t> w_out(len + 1);
-
- memset(&w_out[0], 0, w_out.size());
- int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
- &w_out[0], w_out.size() - 1);
-
- //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
- w_out[real_output_str_len] = 0;
-
- // construct string<wchar_t> from our temporary output buffer
- return {&w_out[0]};
-}
-
-LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
-{
- // Whether or not std::wstring and llutf16string are distinct types, they
- // both hold UTF-16LE characters. (See header file comments.) Pretend this
- // wchar_t* sequence is really a U16* sequence and use the conversion we
- // define above.
- return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
-}
-
-std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
-{
- // first, convert to llutf16string, for which we have a real implementation
- auto utf16str{ wstring_to_utf16str(in, len) };
- // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
- // string pointer is a wchar_t* and instantiate a std::wstring of the same
- // length.
- return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
-}
-
-std::string ll_convert_string_to_utf8_string(const std::string& in)
-{
- // If you pass code_page, you must also pass length, otherwise the code
- // page parameter will be mistaken for length.
- auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP);
- // CP_UTF8 is default -- see ll_wstring_default_code_page() above.
- return ll_convert_wide_to_string(w_mesg);
-}
-
-namespace
-{
-
-void HeapFree_deleter(void* ptr)
-{
- // instead of LocalFree(), per https://stackoverflow.com/a/31541205
- HeapFree(GetProcessHeap(), NULL, ptr);
-}
-
-} // anonymous namespace
-
-template<>
-std::wstring windows_message<std::wstring>(DWORD error)
-{
- // derived from https://stackoverflow.com/a/455533
- wchar_t* rawptr = nullptr;
- auto okay = FormatMessageW(
- // use system message tables for GetLastError() codes
- FORMAT_MESSAGE_FROM_SYSTEM |
- // internally allocate buffer and return its pointer
- FORMAT_MESSAGE_ALLOCATE_BUFFER |
- // you cannot pass insertion parameters (thanks Gandalf)
- FORMAT_MESSAGE_IGNORE_INSERTS |
- // ignore line breaks in message definition text
- FORMAT_MESSAGE_MAX_WIDTH_MASK,
- NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM
- error, // dwMessageId
- MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId
- (LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t*
- 0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER
- NULL); // Arguments, unused
-
- // make a unique_ptr from rawptr so it gets cleaned up properly
- std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter);
-
- if (okay && bufferptr)
- {
- // got the message, return it ('okay' is length in characters)
- return { bufferptr.get(), okay };
- }
-
- // did not get the message, synthesize one
- auto format_message_error = GetLastError();
- std::wostringstream out;
- out << L"GetLastError() " << error << L" (FormatMessageW() failed with "
- << format_message_error << L")";
- return out.str();
-}
-
-std::optional<std::wstring> llstring_getoptenv(const std::string& key)
-{
- auto wkey = ll_convert_string_to_wide(key);
- // Take a wild guess as to how big the buffer should be.
- std::vector<wchar_t> buffer(1024);
- auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
- // If our initial guess was too short, n will indicate the size (in
- // wchar_t's) that buffer should have been, including the terminating nul.
- if (n > (buffer.size() - 1))
- {
- // make it big enough
- buffer.resize(n);
- // and try again
- n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size());
- }
- // did that (ultimately) succeed?
- if (n)
- {
- // great, return populated std::optional
- return std::make_optional<std::wstring>(&buffer[0]);
- }
-
- // not successful
- auto last_error = GetLastError();
- // Don't bother warning for NOT_FOUND; that's an expected case
- if (last_error != ERROR_ENVVAR_NOT_FOUND)
- {
- LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: "
- << windows_message<std::string>(last_error) << LL_ENDL;
- }
- // return empty std::optional
- return {};
-}
-
-#else // ! LL_WINDOWS
-
-std::optional<std::string> llstring_getoptenv(const std::string& key)
-{
- auto found = getenv(key.c_str());
- if (found)
- {
- // return populated std::optional
- return std::make_optional<std::string>(found);
- }
- else
- {
- // return empty std::optional
- return {};
- }
-}
-
-#endif // ! LL_WINDOWS
-
-long LLStringOps::sPacificTimeOffset = 0;
-long LLStringOps::sLocalTimeOffset = 0;
-bool LLStringOps::sPacificDaylightTime = 0;
-std::map<std::string, std::string> LLStringOps::datetimeToCodes;
-
-std::vector<std::string> LLStringOps::sWeekDayList;
-std::vector<std::string> LLStringOps::sWeekDayShortList;
-std::vector<std::string> LLStringOps::sMonthList;
-std::vector<std::string> LLStringOps::sMonthShortList;
-
-
-std::string LLStringOps::sDayFormat;
-std::string LLStringOps::sAM;
-std::string LLStringOps::sPM;
-
-// static
-bool LLStringOps::isEmoji(llwchar a)
-{
-#if 0 // Do not consider special characters that might have a corresponding
- // glyph in the monochorme fallback fonts as a "genuine" emoji. HB
- return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) ||
- (a >= 0x1f000 && a < 0x20000);
-#else
- // These are indeed "genuine" emojis, we *do want* rendered as such. HB
- return a >= 0x1f000 && a < 0x20000;
-#endif
- }
-
-S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
-{
- #if LL_WINDOWS
- // in Windows, wide string functions operator on 16-bit strings,
- // not the proper 32 bit wide string
- return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
- #else
- return wcscoll(a, b);
- #endif
-}
-
-void LLStringOps::setupDatetimeInfo (bool daylight)
-{
- time_t nowT, localT, gmtT;
- struct tm * tmpT;
-
- nowT = time (NULL);
-
- tmpT = gmtime (&nowT);
- gmtT = mktime (tmpT);
-
- tmpT = localtime (&nowT);
- localT = mktime (tmpT);
-
- sLocalTimeOffset = (long) (gmtT - localT);
- if (tmpT->tm_isdst)
- {
- sLocalTimeOffset -= 60 * 60; // 1 hour
- }
-
- sPacificDaylightTime = daylight;
- sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;
-
- datetimeToCodes["wkday"] = "%a"; // Thu
- datetimeToCodes["weekday"] = "%A"; // Thursday
- datetimeToCodes["year4"] = "%Y"; // 2009
- datetimeToCodes["year"] = "%Y"; // 2009
- datetimeToCodes["year2"] = "%y"; // 09
- datetimeToCodes["mth"] = "%b"; // Aug
- datetimeToCodes["month"] = "%B"; // August
- datetimeToCodes["mthnum"] = "%m"; // 08
- datetimeToCodes["day"] = "%d"; // 31
- datetimeToCodes["sday"] = "%-d"; // 9
- datetimeToCodes["hour24"] = "%H"; // 14
- datetimeToCodes["hour"] = "%H"; // 14
- datetimeToCodes["hour12"] = "%I"; // 02
- datetimeToCodes["min"] = "%M"; // 59
- datetimeToCodes["ampm"] = "%p"; // AM
- datetimeToCodes["second"] = "%S"; // 59
- datetimeToCodes["timezone"] = "%Z"; // PST
-}
-
-void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output)
-{
- output.clear();
- size_t length = data.size();
-
- // tokenize it and put it in the array
- std::string cur_word;
- for(size_t i = 0; i < length; ++i)
- {
- if(data[i] == ':')
- {
- output.push_back(cur_word);
- cur_word.clear();
- }
- else
- {
- cur_word.append(1, data[i]);
- }
- }
- output.push_back(cur_word);
-}
-
-void LLStringOps::setupWeekDaysNames(const std::string& data)
-{
- tokenizeStringToArray(data,sWeekDayList);
-}
-void LLStringOps::setupWeekDaysShortNames(const std::string& data)
-{
- tokenizeStringToArray(data,sWeekDayShortList);
-}
-void LLStringOps::setupMonthNames(const std::string& data)
-{
- tokenizeStringToArray(data,sMonthList);
-}
-void LLStringOps::setupMonthShortNames(const std::string& data)
-{
- tokenizeStringToArray(data,sMonthShortList);
-}
-void LLStringOps::setupDayFormat(const std::string& data)
-{
- sDayFormat = data;
-}
-
-
-std::string LLStringOps::getDatetimeCode (std::string key)
-{
- std::map<std::string, std::string>::iterator iter;
-
- iter = datetimeToCodes.find (key);
- if (iter != datetimeToCodes.end())
- {
- return iter->second;
- }
- else
- {
- return std::string("");
- }
-}
-
-std::string LLStringOps::getReadableNumber(F64 num)
-{
- if (fabs(num)>=1e9)
- {
- return llformat("%.2lfB", num / 1e9);
- }
- else if (fabs(num)>=1e6)
- {
- return llformat("%.2lfM", num / 1e6);
- }
- else if (fabs(num)>=1e3)
- {
- return llformat("%.2lfK", num / 1e3);
- }
- else
- {
- return llformat("%.2lf", num);
- }
-}
-
-namespace LLStringFn
-{
- // NOTE - this restricts output to ascii
- void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
- {
- const char MIN = 0x20;
- std::basic_string<char>::size_type len = string.size();
- for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
- {
- if(string[ii] < MIN)
- {
- string[ii] = replacement;
- }
- }
- }
-
-
- // NOTE - this restricts output to ascii
- void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
- char replacement)
- {
- const char MIN = 0x20;
- const char PIPE = 0x7c;
- std::basic_string<char>::size_type len = str.size();
- for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
- {
- if( (str[ii] < MIN) || (str[ii] == PIPE) )
- {
- str[ii] = replacement;
- }
- }
- }
-
- // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
- // allowable code points for XML. Specifically, they are:
- // 0x09, 0x0a, 0x0d, and 0x20 on up. JC
- std::string strip_invalid_xml(const std::string& instr)
- {
- std::string output;
- output.reserve( instr.size() );
- std::string::const_iterator it = instr.begin();
- while (it != instr.end())
- {
- // Must compare as unsigned for >=
- // Test most likely match first
- const unsigned char c = (unsigned char)*it;
- if ( c >= (unsigned char)0x20 // SPACE
- || c == (unsigned char)0x09 // TAB
- || c == (unsigned char)0x0a // LINE_FEED
- || c == (unsigned char)0x0d ) // CARRIAGE_RETURN
- {
- output.push_back(c);
- }
- ++it;
- }
- return output;
- }
-
- /**
- * @brief Replace all control characters (c < 0x20) with replacement in
- * string.
- */
- void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
- {
- const unsigned char MIN = 0x20;
- std::basic_string<char>::size_type len = string.size();
- for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
- {
- const unsigned char c = (unsigned char) string[ii];
- if(c < MIN)
- {
- string[ii] = replacement;
- }
- }
- }
-}
-
-////////////////////////////////////////////////////////////
-
-// Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime.
-template<>
-S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
-
-//static
-template<>
-void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
-{
- // Starting at offset 0, scan forward for the next non-delimiter. We're
- // done when the only characters left in 'instr' are delimiters.
- for (std::string::size_type begIdx, endIdx = 0;
- (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; )
- {
- // Found a non-delimiter. After that, find the next delimiter.
- endIdx = instr.find_first_of (delims, begIdx);
- if (endIdx == std::string::npos)
- {
- // No more delimiters: this token extends to the end of the string.
- endIdx = instr.length();
- }
-
- // extract the token between begIdx and endIdx; substr() needs length
- std::string currToken(instr.substr(begIdx, endIdx - begIdx));
- LLStringUtil::trim (currToken);
- tokens.push_back(currToken);
- // next scan past delimiters starts at endIdx
- }
-}
-
-template<>
-LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens)
-{
- const std::string delims (",");
-
- // Find the first [
- size_type pos1 = instr.find('[', start);
- if (pos1 == std::string::npos)
- return std::string::npos;
-
- //Find the first ] after the initial [
- size_type pos2 = instr.find(']', pos1);
- if (pos2 == std::string::npos)
- return std::string::npos;
-
- // Find the last [ before ] in case of nested [[]]
- pos1 = instr.find_last_of('[', pos2-1);
- if (pos1 == std::string::npos || pos1 < start)
- return std::string::npos;
-
- getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims);
- start = pos2+1;
-
- return pos1;
-}
-
-// static
-template<>
-bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions)
-{
- // see if we have a replacement for the bracketed string (without the brackets)
- // test first using has() because if we just look up with operator[] we get back an
- // empty string even if the value is missing. We want to distinguish between
- // missing replacements and deliberately empty replacement strings.
- format_map_t::const_iterator iter = substitutions.find(token);
- if (iter != substitutions.end())
- {
- replacement = iter->second;
- return true;
- }
- // if not, see if there's one WITH brackets
- iter = substitutions.find(std::string("[" + token + "]"));
- if (iter != substitutions.end())
- {
- replacement = iter->second;
- return true;
- }
-
- return false;
-}
-
-// static
-template<>
-bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions)
-{
- // see if we have a replacement for the bracketed string (without the brackets)
- // test first using has() because if we just look up with operator[] we get back an
- // empty string even if the value is missing. We want to distinguish between
- // missing replacements and deliberately empty replacement strings.
- if (substitutions.has(token))
- {
- replacement = substitutions[token].asString();
- return true;
- }
- // if not, see if there's one WITH brackets
- else if (substitutions.has(std::string("[" + token + "]")))
- {
- replacement = substitutions[std::string("[" + token + "]")].asString();
- return true;
- }
-
- return false;
-}
-
-//static
-template<>
-void LLStringUtil::setLocale(std::string inLocale)
-{
- sLocale = inLocale;
-};
-
-//static
-template<>
-std::string LLStringUtil::getLocale(void)
-{
- return sLocale;
-};
-
-// static
-template<>
-void LLStringUtil::formatNumber(std::string& numStr, std::string decimals)
-{
- std::stringstream strStream;
- S32 intDecimals = 0;
-
- convertToS32 (decimals, intDecimals);
- if (!sLocale.empty())
- {
- // std::locale() throws if the locale is unknown! (EXT-7926)
- try
- {
- strStream.imbue(std::locale(sLocale.c_str()));
- } catch (const std::exception &)
- {
- LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL;
- }
- }
-
- if (!intDecimals)
- {
- S32 intStr;
-
- if (convertToS32(numStr, intStr))
- {
- strStream << intStr;
- numStr = strStream.str();
- }
- }
- else
- {
- F32 floatStr;
-
- if (convertToF32(numStr, floatStr))
- {
- strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr;
- numStr = strStream.str();
- }
- }
-}
-
-// static
-template<>
-bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
- std::string param, S32 secFromEpoch)
-{
- if (param == "local") // local
- {
- secFromEpoch -= LLStringOps::getLocalTimeOffset();
- }
- else if (param != "utc") // slt
- {
- secFromEpoch -= LLStringOps::getPacificTimeOffset();
- }
-
- // if never fell into those two ifs above, param must be utc
- if (secFromEpoch < 0) secFromEpoch = 0;
-
- LLDate datetime((F64)secFromEpoch);
- std::string code = LLStringOps::getDatetimeCode (token);
-
- // special case to handle timezone
- if (code == "%Z") {
- if (param == "utc")
- {
- replacement = "GMT";
- }
- else if (param == "local")
- {
- replacement = ""; // user knows their own timezone
- }
- else
- {
-#if 0
- // EXT-1565 : Zai Lynch, James Linden : 15/Oct/09
- // [BSI] Feedback: Viewer clock mentions SLT, but would prefer it to show PST/PDT
- // "slt" = Second Life Time, which is deprecated.
- // If not utc or user local time, fallback to Pacific time
- replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST";
-#else
- // SL-20370 : Steeltoe Linden : 29/Sep/23
- // Change "PDT" to "SLT" on menu bar
- replacement = "SLT";
-#endif
- }
- return true;
- }
-
- //EXT-7013
- //few codes are not suppotred by strtime function (example - weekdays for Japanise)
- //so use predefined ones
-
- //if sWeekDayList is not empty than current locale doesn't support
- //weekday name.
- time_t loc_seconds = (time_t) secFromEpoch;
- if(LLStringOps::sWeekDayList.size() == 7 && code == "%A")
- {
- struct tm * gmt = gmtime (&loc_seconds);
- replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
- }
- else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
- {
- struct tm * gmt = gmtime (&loc_seconds);
- replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
- }
- else if(LLStringOps::sMonthList.size() == 12 && code == "%B")
- {
- struct tm * gmt = gmtime (&loc_seconds);
- replacement = LLStringOps::sMonthList[gmt->tm_mon];
- }
- else if( !LLStringOps::sDayFormat.empty() && code == "%d" )
- {
- struct tm * gmt = gmtime (&loc_seconds);
- LLStringUtil::format_map_t args;
- args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
- replacement = LLStringOps::sDayFormat;
- LLStringUtil::format(replacement, args);
- }
- else if (code == "%-d")
- {
- struct tm * gmt = gmtime (&loc_seconds);
- replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero
- }
- else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" )
- {
- struct tm * gmt = gmtime (&loc_seconds);
- if(gmt->tm_hour<12)
- {
- replacement = LLStringOps::sAM;
- }
- else
- {
- replacement = LLStringOps::sPM;
- }
- }
- else
- {
- replacement = datetime.toHTTPDateString(code);
- }
-
- // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format
- // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738).
- // We could have used '%l' format instead, but it's not supported by Windows.
- if(code == "%I" && token == "hour12" && replacement.at(0) == '0')
- {
- replacement = replacement.at(1);
- }
-
- return !code.empty();
-}
-
-// LLStringUtil::format recogizes the following patterns.
-// All substitutions *must* be encased in []'s in the input string.
-// The []'s are optional in the substitution map.
-// [FOO_123]
-// [FOO,number,precision]
-// [FOO,datetime,format]
-
-
-// static
-template<>
-S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
- S32 res = 0;
-
- std::string output;
- std::vector<std::string> tokens;
-
- std::string::size_type start = 0;
- std::string::size_type prev_start = 0;
- std::string::size_type key_start = 0;
- while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
- {
- output += std::string(s, prev_start, key_start-prev_start);
- prev_start = start;
-
- bool found_replacement = false;
- std::string replacement;
-
- if (tokens.size() == 0)
- {
- found_replacement = false;
- }
- else if (tokens.size() == 1)
- {
- found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
- }
- else if (tokens[1] == "number")
- {
- std::string param = "0";
-
- if (tokens.size() > 2) param = tokens[2];
- found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
- if (found_replacement) formatNumber (replacement, param);
- }
- else if (tokens[1] == "datetime")
- {
- std::string param;
- if (tokens.size() > 2) param = tokens[2];
-
- format_map_t::const_iterator iter = substitutions.find("datetime");
- if (iter != substitutions.end())
- {
- S32 secFromEpoch = 0;
- bool r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
- if (r)
- {
- found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
- }
- }
- }
-
- if (found_replacement)
- {
- output += replacement;
- res++;
- }
- else
- {
- // we had no replacement, use the string as is
- // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
- output += std::string(s, key_start, start-key_start);
- }
- tokens.clear();
- }
- // send the remainder of the string (with no further matches for bracketed names)
- output += std::string(s, start);
- s = output;
- return res;
-}
-
-//static
-template<>
-S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
-{
- LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
- S32 res = 0;
-
- if (!substitutions.isMap())
- {
- return res;
- }
-
- std::string output;
- std::vector<std::string> tokens;
-
- std::string::size_type start = 0;
- std::string::size_type prev_start = 0;
- std::string::size_type key_start = 0;
- while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
- {
- output += std::string(s, prev_start, key_start-prev_start);
- prev_start = start;
-
- bool found_replacement = false;
- std::string replacement;
-
- if (tokens.size() == 0)
- {
- found_replacement = false;
- }
- else if (tokens.size() == 1)
- {
- found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
- }
- else if (tokens[1] == "number")
- {
- std::string param = "0";
-
- if (tokens.size() > 2) param = tokens[2];
- found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
- if (found_replacement) formatNumber (replacement, param);
- }
- else if (tokens[1] == "datetime")
- {
- std::string param;
- if (tokens.size() > 2) param = tokens[2];
-
- S32 secFromEpoch = (S32) substitutions["datetime"].asInteger();
- found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch);
- }
-
- if (found_replacement)
- {
- output += replacement;
- res++;
- }
- else
- {
- // we had no replacement, use the string as is
- // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
- output += std::string(s, key_start, start-key_start);
- }
- tokens.clear();
- }
- // send the remainder of the string (with no further matches for bracketed names)
- output += std::string(s, start);
- s = output;
- return res;
-}
-
-////////////////////////////////////////////////////////////
-// Testing
-
-#ifdef _DEBUG
-
-template<class T>
-void LLStringUtilBase<T>::testHarness()
-{
- std::string s1;
-
- llassert( s1.c_str() == NULL );
- llassert( s1.size() == 0 );
- llassert( s1.empty() );
-
- std::string s2( "hello");
- llassert( !strcmp( s2.c_str(), "hello" ) );
- llassert( s2.size() == 5 );
- llassert( !s2.empty() );
- std::string s3( s2 );
-
- llassert( "hello" == s2 );
- llassert( s2 == "hello" );
- llassert( s2 > "gello" );
- llassert( "gello" < s2 );
- llassert( "gello" != s2 );
- llassert( s2 != "gello" );
-
- std::string s4 = s2;
- llassert( !s4.empty() );
- s4.empty();
- llassert( s4.empty() );
-
- std::string s5("");
- llassert( s5.empty() );
-
- llassert( isValidIndex(s5, 0) );
- llassert( !isValidIndex(s5, 1) );
-
- s3 = s2;
- s4 = "hello again";
-
- s4 += "!";
- s4 += s4;
- llassert( s4 == "hello again!hello again!" );
-
-
- std::string s6 = s2 + " " + s2;
- std::string s7 = s6;
- llassert( s6 == s7 );
- llassert( !( s6 != s7) );
- llassert( !(s6 < s7) );
- llassert( !(s6 > s7) );
-
- llassert( !(s6 == "hi"));
- llassert( s6 == "hello hello");
- llassert( s6 < "hi");
-
- llassert( s6[1] == 'e' );
- s6[1] = 'f';
- llassert( s6[1] == 'f' );
-
- s2.erase( 4, 1 );
- llassert( s2 == "hell");
- s2.insert( 0, "y" );
- llassert( s2 == "yhell");
- s2.erase( 1, 3 );
- llassert( s2 == "yl");
- s2.insert( 1, "awn, don't yel");
- llassert( s2 == "yawn, don't yell");
-
- std::string s8 = s2.substr( 6, 5 );
- llassert( s8 == "don't" );
-
- std::string s9 = " \t\ntest \t\t\n ";
- trim(s9);
- llassert( s9 == "test" );
-
- s8 = "abc123&*(ABC";
-
- s9 = s8;
- toUpper(s9);
- llassert( s9 == "ABC123&*(ABC" );
-
- s9 = s8;
- toLower(s9);
- llassert( s9 == "abc123&*(abc" );
-
-
- std::string s10( 10, 'x' );
- llassert( s10 == "xxxxxxxxxx" );
-
- std::string s11( "monkey in the middle", 7, 2 );
- llassert( s11 == "in" );
-
- std::string s12; //empty
- s12 += "foo";
- llassert( s12 == "foo" );
-
- std::string s13; //empty
- s13 += 'f';
- llassert( s13 == "f" );
-}
-
-
-#endif // _DEBUG
+/** + * @file llstring.cpp + * @brief String utility functions and the std::string class. + * + * $LicenseInfo:firstyear=2001&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "linden_common.h" + +#include "llstring.h" +#include "llerror.h" +#include "llfasttimer.h" +#include "llsd.h" +#include <vector> + +#if LL_WINDOWS +#include "llwin32headerslean.h" +#include <winnls.h> // for WideCharToMultiByte +#endif + +std::string ll_safe_string(const char* in) +{ + if(in) return std::string(in); + return std::string(); +} + +std::string ll_safe_string(const char* in, S32 maxlen) +{ + if(in && maxlen > 0 ) return std::string(in, maxlen); + + return std::string(); +} + +bool is_char_hex(char hex) +{ + if((hex >= '0') && (hex <= '9')) + { + return true; + } + else if((hex >= 'a') && (hex <='f')) + { + return true; + } + else if((hex >= 'A') && (hex <='F')) + { + return true; + } + return false; // uh - oh, not hex any more... +} + +U8 hex_as_nybble(char hex) +{ + if((hex >= '0') && (hex <= '9')) + { + return (U8)(hex - '0'); + } + else if((hex >= 'a') && (hex <='f')) + { + return (U8)(10 + hex - 'a'); + } + else if((hex >= 'A') && (hex <='F')) + { + return (U8)(10 + hex - 'A'); + } + return 0; // uh - oh, not hex any more... +} + +bool iswindividual(llwchar elem) +{ + U32 cur_char = (U32)elem; + bool result = false; + if (0x2E80<= cur_char && cur_char <= 0x9FFF) + { + result = true; + } + else if (0xAC00<= cur_char && cur_char <= 0xD7A0 ) + { + result = true; + } + else if (0xF900<= cur_char && cur_char <= 0xFA60 ) + { + result = true; + } + return result; +} + +bool _read_file_into_string(std::string& str, const std::string& filename) +{ + llifstream ifs(filename.c_str(), llifstream::binary); + if (!ifs.is_open()) + { + LL_INFOS() << "Unable to open file " << filename << LL_ENDL; + return false; + } + + std::ostringstream oss; + + oss << ifs.rdbuf(); + str = oss.str(); + ifs.close(); + return true; +} + + + + +// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c +// for the Unicode implementation - this doesn't match because it was written before finding +// it. + + +std::ostream& operator<<(std::ostream &s, const LLWString &wstr) +{ + std::string utf8_str = wstring_to_utf8str(wstr); + s << utf8_str; + return s; +} + +std::string rawstr_to_utf8(const std::string& raw) +{ + LLWString wstr(utf8str_to_wstring(raw)); + return wstring_to_utf8str(wstr); +} + +std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars) +{ + U32 cur_char = (U32)in_char; + char* base = outchars; + if (cur_char < 0x80) + { + *outchars++ = (U8)cur_char; + } + else if (cur_char < 0x800) + { + *outchars++ = 0xC0 | (cur_char >> 6); + *outchars++ = 0x80 | (cur_char & 0x3F); + } + else if (cur_char < 0x10000) + { + *outchars++ = 0xE0 | (cur_char >> 12); + *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); + *outchars++ = 0x80 | (cur_char & 0x3F); + } + else if (cur_char < 0x200000) + { + *outchars++ = 0xF0 | (cur_char >> 18); + *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); + *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); + *outchars++ = 0x80 | (cur_char & 0x3F); + } + else if (cur_char < 0x4000000) + { + *outchars++ = 0xF8 | (cur_char >> 24); + *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F); + *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); + *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); + *outchars++ = 0x80 | (cur_char & 0x3F); + } + else if (cur_char < 0x80000000) + { + *outchars++ = 0xFC | (cur_char >> 30); + *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F); + *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F); + *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F); + *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F); + *outchars++ = 0x80 | (cur_char & 0x3F); + } + else + { + LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL; + *outchars++ = LL_UNKNOWN_CHAR; + } + return outchars - base; +} + +auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar) +{ + const U16* base = inchars; + U16 cur_char = *inchars++; + llwchar char32 = cur_char; + if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF)) + { + // Surrogates + char32 = ((llwchar)(cur_char - 0xD800)) << 10; + cur_char = *inchars++; + char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL; + } + else + { + char32 = (llwchar)cur_char; + } + *outchar = char32; + return inchars - base; +} + +llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) +{ + llutf16string out; + + S32 i = 0; + while (i < len) + { + U32 cur_char = utf32str[i]; + if (cur_char > 0xFFFF) + { + out += (0xD7C0 + (cur_char >> 10)); + out += (0xDC00 | (cur_char & 0x3FF)); + } + else + { + out += cur_char; + } + i++; + } + return out; +} + +llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) +{ + LLWString wstr = utf8str_to_wstring ( utf8str, len ); + return wstring_to_utf16str ( wstr ); +} + +LLWString utf16str_to_wstring(const U16* utf16str, size_t len) +{ + LLWString wout; + if (len == 0) return wout; + + S32 i = 0; + const U16* chars16 = utf16str; + while (i < len) + { + llwchar cur_char; + i += utf16chars_to_wchar(chars16+i, &cur_char); + wout += cur_char; + } + return wout; +} + +// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. +S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len) +{ + S32 surrogate_pairs = 0; + // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): + const U16 *const utf16_chars = &(*(utf16str.begin())); + S32 i = 0; + while (i < utf16_len) + { + const U16 c = utf16_chars[i++]; + if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16 + { // Have first byte of a surrogate pair + if (i >= utf16_len) + { + break; + } + const U16 d = utf16_chars[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { // Have valid second byte of a surrogate pair + surrogate_pairs++; + i++; + } + } + } + return utf16_len - surrogate_pairs; +} + +// Length in utf16string (UTF-16) of wlen wchars beginning at woffset. +S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen) +{ + const S32 end = llmin((S32)wstr.length(), woffset + wlen); + if (end < woffset) + { + return 0; + } + else + { + S32 length = end - woffset; + for (S32 i = woffset; i < end; i++) + { + if (wstr[i] >= 0x10000) + { + length++; + } + } + return length; + } +} + +// Given a wstring and an offset in it, returns the length as wstring (i.e., +// number of llwchars) of the longest substring that starts at the offset +// and whose equivalent utf-16 string does not exceeds the given utf16_length. +S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, bool *unaligned) +{ + const auto end = wstr.length(); + bool u{ false }; + S32 n = woffset + utf16_length; + S32 i = woffset; + while (i < end) + { + if (wstr[i] >= 0x10000) + { + --n; + } + if (i >= n) + { + u = (i > n); + break; + } + i++; + } + if (unaligned) + { + *unaligned = u; + } + return i - woffset; +} + +S32 wchar_utf8_length(const llwchar wc) +{ + if (wc < 0x80) + { + return 1; + } + else if (wc < 0x800) + { + return 2; + } + else if (wc < 0x10000) + { + return 3; + } + else if (wc < 0x200000) + { + return 4; + } + else if (wc < 0x4000000) + { + return 5; + } + else + { + return 6; + } +} + +std::string wchar_utf8_preview(const llwchar wc) +{ + std::ostringstream oss; + oss << std::hex << std::uppercase << (U32)wc; + + U8 out_bytes[8]; + U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes); + + if (size > 1) + { + oss << " ["; + for (U32 i = 0; i < size; ++i) + { + if (i) + { + oss << ", "; + } + oss << (int)out_bytes[i]; + } + oss << "]"; + } + + return oss.str(); +} + +S32 wstring_utf8_length(const LLWString& wstr) +{ + S32 len = 0; + for (S32 i = 0; i < (S32)wstr.length(); i++) + { + len += wchar_utf8_length(wstr[i]); + } + return len; +} + +LLWString utf8str_to_wstring(const char* utf8str, size_t len) +{ + LLWString wout; + + S32 i = 0; + while (i < len) + { + llwchar unichar; + U8 cur_char = utf8str[i]; + + if (cur_char < 0x80) + { + // Ascii character, just add it + unichar = cur_char; + } + else + { + S32 cont_bytes = 0; + if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32 + { + unichar = (0x1F&cur_char); + cont_bytes = 1; + } + else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32 + { + unichar = (0x0F&cur_char); + cont_bytes = 2; + } + else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32 + { + unichar = (0x07&cur_char); + cont_bytes = 3; + } + else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32 + { + unichar = (0x03&cur_char); + cont_bytes = 4; + } + else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32 + { + unichar = (0x01&cur_char); + cont_bytes = 5; + } + else + { + wout += LL_UNKNOWN_CHAR; + ++i; + continue; + } + + // Check that this character doesn't go past the end of the string + auto end = (len < (i + cont_bytes)) ? len : (i + cont_bytes); + do + { + ++i; + + cur_char = utf8str[i]; + if ( (cur_char >> 6) == 0x2 ) + { + unichar <<= 6; + unichar += (0x3F&cur_char); + } + else + { + // Malformed sequence - roll back to look at this as a new char + unichar = LL_UNKNOWN_CHAR; + --i; + break; + } + } while(i < end); + + // Handle overlong characters and NULL characters + if ( ((cont_bytes == 1) && (unichar < 0x80)) + || ((cont_bytes == 2) && (unichar < 0x800)) + || ((cont_bytes == 3) && (unichar < 0x10000)) + || ((cont_bytes == 4) && (unichar < 0x200000)) + || ((cont_bytes == 5) && (unichar < 0x4000000)) ) + { + unichar = LL_UNKNOWN_CHAR; + } + } + + wout += unichar; + ++i; + } + return wout; +} + +std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) +{ + std::string out; + + S32 i = 0; + while (i < len) + { + char tchars[8]; /* Flawfinder: ignore */ + auto n = wchar_to_utf8chars(utf32str[i], tchars); + tchars[n] = 0; + out += tchars; + i++; + } + return out; +} + +std::string utf16str_to_utf8str(const U16* utf16str, size_t len) +{ + return wstring_to_utf8str(utf16str_to_wstring(utf16str, len)); +} + +std::string utf8str_trim(const std::string& utf8str) +{ + LLWString wstr = utf8str_to_wstring(utf8str); + LLWStringUtil::trim(wstr); + return wstring_to_utf8str(wstr); +} + + +std::string utf8str_tolower(const std::string& utf8str) +{ + LLWString out_str = utf8str_to_wstring(utf8str); + LLWStringUtil::toLower(out_str); + return wstring_to_utf8str(out_str); +} + + +S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs) +{ + LLWString wlhs = utf8str_to_wstring(lhs); + LLWString wrhs = utf8str_to_wstring(rhs); + return LLWStringUtil::compareInsensitive(wlhs, wrhs); +} + +std::string utf8str_truncate(const std::string& utf8str, const S32 max_len) +{ + if (0 == max_len) + { + return std::string(); + } + if ((S32)utf8str.length() <= max_len) + { + return utf8str; + } + else + { + S32 cur_char = max_len; + + // If we're ASCII, we don't need to do anything + if ((U8)utf8str[cur_char] > 0x7f) + { + // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back + // to the first character + while (0x80 == (0xc0 & utf8str[cur_char])) + { + cur_char--; + // Keep moving forward until we hit the first char; + if (cur_char == 0) + { + // Make sure we don't trash memory if we've got a bogus string. + break; + } + } + } + // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars + return utf8str.substr(0, cur_char); + } +} + +std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len) +{ + if (0 == symbol_len) + { + return std::string(); + } + if ((S32)utf8str.length() <= symbol_len) + { + return utf8str; + } + else + { + int len = 0, byteIndex = 0; + const char* aStr = utf8str.c_str(); + size_t origSize = utf8str.size(); + + for (byteIndex = 0; len < symbol_len && byteIndex < origSize; byteIndex++) + { + if ((aStr[byteIndex] & 0xc0) != 0x80) + { + len += 1; + } + } + return utf8str.substr(0, byteIndex); + } +} + +std::string utf8str_substChar( + const std::string& utf8str, + const llwchar target_char, + const llwchar replace_char) +{ + LLWString wstr = utf8str_to_wstring(utf8str); + LLWStringUtil::replaceChar(wstr, target_char, replace_char); + //wstr = wstring_substChar(wstr, target_char, replace_char); + return wstring_to_utf8str(wstr); +} + +std::string utf8str_makeASCII(const std::string& utf8str) +{ + LLWString wstr = utf8str_to_wstring(utf8str); + LLWStringUtil::_makeASCII(wstr); + return wstring_to_utf8str(wstr); +} + +std::string mbcsstring_makeASCII(const std::string& wstr) +{ + // Replace non-ASCII chars with replace_char + std::string out_str = wstr; + for (S32 i = 0; i < (S32)out_str.length(); i++) + { + if ((U8)out_str[i] > 0x7f) + { + out_str[i] = LL_UNKNOWN_CHAR; + } + } + return out_str; +} + +std::string utf8str_removeCRLF(const std::string& utf8str) +{ + if (0 == utf8str.length()) + { + return std::string(); + } + const char CR = 13; + + std::string out; + out.reserve(utf8str.length()); + const S32 len = (S32)utf8str.length(); + for( S32 i = 0; i < len; i++ ) + { + if( utf8str[i] != CR ) + { + out.push_back(utf8str[i]); + } + } + return out; +} + +llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length) +{ + switch (length) + { + case 2: + return ((utf8str[offset] & 0x1F) << 6) + + (utf8str[offset + 1] & 0x3F); + case 3: + return ((utf8str[offset] & 0x0F) << 12) + + ((utf8str[offset + 1] & 0x3F) << 6) + + (utf8str[offset + 2] & 0x3F); + case 4: + return ((utf8str[offset] & 0x07) << 18) + + ((utf8str[offset + 1] & 0x3F) << 12) + + ((utf8str[offset + 2] & 0x3F) << 6) + + (utf8str[offset + 3] & 0x3F); + case 5: + return ((utf8str[offset] & 0x03) << 24) + + ((utf8str[offset + 1] & 0x3F) << 18) + + ((utf8str[offset + 2] & 0x3F) << 12) + + ((utf8str[offset + 3] & 0x3F) << 6) + + (utf8str[offset + 4] & 0x3F); + case 6: + return ((utf8str[offset] & 0x01) << 30) + + ((utf8str[offset + 1] & 0x3F) << 24) + + ((utf8str[offset + 2] & 0x3F) << 18) + + ((utf8str[offset + 3] & 0x3F) << 12) + + ((utf8str[offset + 4] & 0x3F) << 6) + + (utf8str[offset + 5] & 0x3F); + case 7: + return ((utf8str[offset + 1] & 0x03) << 30) + + ((utf8str[offset + 2] & 0x3F) << 24) + + ((utf8str[offset + 3] & 0x3F) << 18) + + ((utf8str[offset + 4] & 0x3F) << 12) + + ((utf8str[offset + 5] & 0x3F) << 6) + + (utf8str[offset + 6] & 0x3F); + } + return LL_UNKNOWN_CHAR; +} + +std::string utf8str_showBytesUTF8(const std::string& utf8str) +{ + std::string result; + + bool in_sequence = false; + size_t sequence_size = 0; + size_t byte_index = 0; + size_t source_length = utf8str.size(); + + auto open_sequence = [&]() + { + if (!result.empty() && result.back() != '\n') + result += '\n'; // Use LF as a separator before new UTF-8 sequence + result += '['; + in_sequence = true; + }; + + auto close_sequence = [&]() + { + llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size); + if (unicode != LL_UNKNOWN_CHAR) + { + result += llformat("+%04X", unicode); + } + result += ']'; + in_sequence = false; + sequence_size = 0; + }; + + while (byte_index < source_length) + { + U8 byte = utf8str[byte_index]; + if (byte >= 0x80) // Part of an UTF-8 sequence + { + if (!in_sequence) // Start new UTF-8 sequence + { + open_sequence(); + } + else if (byte >= 0xC0) // Start another UTF-8 sequence + { + close_sequence(); + open_sequence(); + } + else // Continue the same UTF-8 sequence + { + result += '.'; + } + result += llformat("%02X", byte); // The byte is represented in hexadecimal form + ++sequence_size; + } + else // ASCII symbol is represented as a character + { + if (in_sequence) // End of UTF-8 sequence + { + close_sequence(); + if (byte != '\n') + { + result += '\n'; // Use LF as a separator between UTF-8 and ASCII + } + } + result += byte; + } + ++byte_index; + } + + if (in_sequence) // End of UTF-8 sequence + { + close_sequence(); + } + + return result; +} + +// Search for any emoji symbol, return true if found +bool wstring_has_emoji(const LLWString& wstr) +{ + for (const llwchar& wch : wstr) + { + if (LLStringOps::isEmoji(wch)) + return true; + } + + return false; +} + +// Cut emoji symbols if exist +bool wstring_remove_emojis(LLWString& wstr) +{ + bool found = false; + for (size_t i = 0; i < wstr.size(); ++i) + { + if (LLStringOps::isEmoji(wstr[i])) + { + wstr.erase(i--, 1); + found = true; + } + } + return found; +} + +// Cut emoji symbols if exist +bool utf8str_remove_emojis(std::string& utf8str) +{ + LLWString wstr = utf8str_to_wstring(utf8str); + if (!wstring_remove_emojis(wstr)) + return false; + utf8str = wstring_to_utf8str(wstr); + return true; +} + +#if LL_WINDOWS +unsigned int ll_wstring_default_code_page() +{ + return CP_UTF8; +} + +std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page) +{ + std::string out; + if(in) + { + int len_out = WideCharToMultiByte( + code_page, + 0, + in, + len_in, + NULL, + 0, + 0, + 0); + // We will need two more bytes for the double NULL ending + // created in WideCharToMultiByte(). + char* pout = new char [len_out + 2]; + memset(pout, 0, len_out + 2); + if(pout) + { + WideCharToMultiByte( + code_page, + 0, + in, + len_in, + pout, + len_out, + 0, + 0); + out.assign(pout); + delete[] pout; + } + } + return out; +} + +std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page) +{ + // From review: + // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, + // plus one for a null terminator, and be guaranteed to not overflow. + + // Normally, I'd call that sort of thing premature optimization, + // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets. +// int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0); + + // reserve an output buffer that will be destroyed on exit, with a place + // to put NULL terminator + std::vector<wchar_t> w_out(len + 1); + + memset(&w_out[0], 0, w_out.size()); + int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len, + &w_out[0], w_out.size() - 1); + + //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. + w_out[real_output_str_len] = 0; + + // construct string<wchar_t> from our temporary output buffer + return {&w_out[0]}; +} + +LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len) +{ + // Whether or not std::wstring and llutf16string are distinct types, they + // both hold UTF-16LE characters. (See header file comments.) Pretend this + // wchar_t* sequence is really a U16* sequence and use the conversion we + // define above. + return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len); +} + +std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len) +{ + // first, convert to llutf16string, for which we have a real implementation + auto utf16str{ wstring_to_utf16str(in, len) }; + // then, because each U16 char must be UTF-16LE encoded, pretend the U16* + // string pointer is a wchar_t* and instantiate a std::wstring of the same + // length. + return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() }; +} + +std::string ll_convert_string_to_utf8_string(const std::string& in) +{ + // If you pass code_page, you must also pass length, otherwise the code + // page parameter will be mistaken for length. + auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP); + // CP_UTF8 is default -- see ll_wstring_default_code_page() above. + return ll_convert_wide_to_string(w_mesg); +} + +namespace +{ + +void HeapFree_deleter(void* ptr) +{ + // instead of LocalFree(), per https://stackoverflow.com/a/31541205 + HeapFree(GetProcessHeap(), NULL, ptr); +} + +} // anonymous namespace + +template<> +std::wstring windows_message<std::wstring>(DWORD error) +{ + // derived from https://stackoverflow.com/a/455533 + wchar_t* rawptr = nullptr; + auto okay = FormatMessageW( + // use system message tables for GetLastError() codes + FORMAT_MESSAGE_FROM_SYSTEM | + // internally allocate buffer and return its pointer + FORMAT_MESSAGE_ALLOCATE_BUFFER | + // you cannot pass insertion parameters (thanks Gandalf) + FORMAT_MESSAGE_IGNORE_INSERTS | + // ignore line breaks in message definition text + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM + error, // dwMessageId + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId + (LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t* + 0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER + NULL); // Arguments, unused + + // make a unique_ptr from rawptr so it gets cleaned up properly + std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter); + + if (okay && bufferptr) + { + // got the message, return it ('okay' is length in characters) + return { bufferptr.get(), okay }; + } + + // did not get the message, synthesize one + auto format_message_error = GetLastError(); + std::wostringstream out; + out << L"GetLastError() " << error << L" (FormatMessageW() failed with " + << format_message_error << L")"; + return out.str(); +} + +std::optional<std::wstring> llstring_getoptenv(const std::string& key) +{ + auto wkey = ll_convert_string_to_wide(key); + // Take a wild guess as to how big the buffer should be. + std::vector<wchar_t> buffer(1024); + auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + // If our initial guess was too short, n will indicate the size (in + // wchar_t's) that buffer should have been, including the terminating nul. + if (n > (buffer.size() - 1)) + { + // make it big enough + buffer.resize(n); + // and try again + n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + } + // did that (ultimately) succeed? + if (n) + { + // great, return populated std::optional + return std::make_optional<std::wstring>(&buffer[0]); + } + + // not successful + auto last_error = GetLastError(); + // Don't bother warning for NOT_FOUND; that's an expected case + if (last_error != ERROR_ENVVAR_NOT_FOUND) + { + LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " + << windows_message<std::string>(last_error) << LL_ENDL; + } + // return empty std::optional + return {}; +} + +#else // ! LL_WINDOWS + +std::optional<std::string> llstring_getoptenv(const std::string& key) +{ + auto found = getenv(key.c_str()); + if (found) + { + // return populated std::optional + return std::make_optional<std::string>(found); + } + else + { + // return empty std::optional + return {}; + } +} + +#endif // ! LL_WINDOWS + +long LLStringOps::sPacificTimeOffset = 0; +long LLStringOps::sLocalTimeOffset = 0; +bool LLStringOps::sPacificDaylightTime = 0; +std::map<std::string, std::string> LLStringOps::datetimeToCodes; + +std::vector<std::string> LLStringOps::sWeekDayList; +std::vector<std::string> LLStringOps::sWeekDayShortList; +std::vector<std::string> LLStringOps::sMonthList; +std::vector<std::string> LLStringOps::sMonthShortList; + + +std::string LLStringOps::sDayFormat; +std::string LLStringOps::sAM; +std::string LLStringOps::sPM; + +// static +bool LLStringOps::isEmoji(llwchar a) +{ +#if 0 // Do not consider special characters that might have a corresponding + // glyph in the monochorme fallback fonts as a "genuine" emoji. HB + return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) || + (a >= 0x1f000 && a < 0x20000); +#else + // These are indeed "genuine" emojis, we *do want* rendered as such. HB + return a >= 0x1f000 && a < 0x20000; +#endif + } + +S32 LLStringOps::collate(const llwchar* a, const llwchar* b) +{ + #if LL_WINDOWS + // in Windows, wide string functions operator on 16-bit strings, + // not the proper 32 bit wide string + return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str()); + #else + return wcscoll(a, b); + #endif +} + +void LLStringOps::setupDatetimeInfo (bool daylight) +{ + time_t nowT, localT, gmtT; + struct tm * tmpT; + + nowT = time (NULL); + + tmpT = gmtime (&nowT); + gmtT = mktime (tmpT); + + tmpT = localtime (&nowT); + localT = mktime (tmpT); + + sLocalTimeOffset = (long) (gmtT - localT); + if (tmpT->tm_isdst) + { + sLocalTimeOffset -= 60 * 60; // 1 hour + } + + sPacificDaylightTime = daylight; + sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60; + + datetimeToCodes["wkday"] = "%a"; // Thu + datetimeToCodes["weekday"] = "%A"; // Thursday + datetimeToCodes["year4"] = "%Y"; // 2009 + datetimeToCodes["year"] = "%Y"; // 2009 + datetimeToCodes["year2"] = "%y"; // 09 + datetimeToCodes["mth"] = "%b"; // Aug + datetimeToCodes["month"] = "%B"; // August + datetimeToCodes["mthnum"] = "%m"; // 08 + datetimeToCodes["day"] = "%d"; // 31 + datetimeToCodes["sday"] = "%-d"; // 9 + datetimeToCodes["hour24"] = "%H"; // 14 + datetimeToCodes["hour"] = "%H"; // 14 + datetimeToCodes["hour12"] = "%I"; // 02 + datetimeToCodes["min"] = "%M"; // 59 + datetimeToCodes["ampm"] = "%p"; // AM + datetimeToCodes["second"] = "%S"; // 59 + datetimeToCodes["timezone"] = "%Z"; // PST +} + +void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output) +{ + output.clear(); + size_t length = data.size(); + + // tokenize it and put it in the array + std::string cur_word; + for(size_t i = 0; i < length; ++i) + { + if(data[i] == ':') + { + output.push_back(cur_word); + cur_word.clear(); + } + else + { + cur_word.append(1, data[i]); + } + } + output.push_back(cur_word); +} + +void LLStringOps::setupWeekDaysNames(const std::string& data) +{ + tokenizeStringToArray(data,sWeekDayList); +} +void LLStringOps::setupWeekDaysShortNames(const std::string& data) +{ + tokenizeStringToArray(data,sWeekDayShortList); +} +void LLStringOps::setupMonthNames(const std::string& data) +{ + tokenizeStringToArray(data,sMonthList); +} +void LLStringOps::setupMonthShortNames(const std::string& data) +{ + tokenizeStringToArray(data,sMonthShortList); +} +void LLStringOps::setupDayFormat(const std::string& data) +{ + sDayFormat = data; +} + + +std::string LLStringOps::getDatetimeCode (std::string key) +{ + std::map<std::string, std::string>::iterator iter; + + iter = datetimeToCodes.find (key); + if (iter != datetimeToCodes.end()) + { + return iter->second; + } + else + { + return std::string(""); + } +} + +std::string LLStringOps::getReadableNumber(F64 num) +{ + if (fabs(num)>=1e9) + { + return llformat("%.2lfB", num / 1e9); + } + else if (fabs(num)>=1e6) + { + return llformat("%.2lfM", num / 1e6); + } + else if (fabs(num)>=1e3) + { + return llformat("%.2lfK", num / 1e3); + } + else + { + return llformat("%.2lf", num); + } +} + +namespace LLStringFn +{ + // NOTE - this restricts output to ascii + void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement) + { + const char MIN = 0x20; + std::basic_string<char>::size_type len = string.size(); + for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) + { + if(string[ii] < MIN) + { + string[ii] = replacement; + } + } + } + + + // NOTE - this restricts output to ascii + void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str, + char replacement) + { + const char MIN = 0x20; + const char PIPE = 0x7c; + std::basic_string<char>::size_type len = str.size(); + for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) + { + if( (str[ii] < MIN) || (str[ii] == PIPE) ) + { + str[ii] = replacement; + } + } + } + + // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on + // allowable code points for XML. Specifically, they are: + // 0x09, 0x0a, 0x0d, and 0x20 on up. JC + std::string strip_invalid_xml(const std::string& instr) + { + std::string output; + output.reserve( instr.size() ); + std::string::const_iterator it = instr.begin(); + while (it != instr.end()) + { + // Must compare as unsigned for >= + // Test most likely match first + const unsigned char c = (unsigned char)*it; + if ( c >= (unsigned char)0x20 // SPACE + || c == (unsigned char)0x09 // TAB + || c == (unsigned char)0x0a // LINE_FEED + || c == (unsigned char)0x0d ) // CARRIAGE_RETURN + { + output.push_back(c); + } + ++it; + } + return output; + } + + /** + * @brief Replace all control characters (c < 0x20) with replacement in + * string. + */ + void replace_ascii_controlchars(std::basic_string<char>& string, char replacement) + { + const unsigned char MIN = 0x20; + std::basic_string<char>::size_type len = string.size(); + for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii) + { + const unsigned char c = (unsigned char) string[ii]; + if(c < MIN) + { + string[ii] = replacement; + } + } + } +} + +//////////////////////////////////////////////////////////// + +// Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime. +template<> +S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions); + +//static +template<> +void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims) +{ + // Starting at offset 0, scan forward for the next non-delimiter. We're + // done when the only characters left in 'instr' are delimiters. + for (std::string::size_type begIdx, endIdx = 0; + (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; ) + { + // Found a non-delimiter. After that, find the next delimiter. + endIdx = instr.find_first_of (delims, begIdx); + if (endIdx == std::string::npos) + { + // No more delimiters: this token extends to the end of the string. + endIdx = instr.length(); + } + + // extract the token between begIdx and endIdx; substr() needs length + std::string currToken(instr.substr(begIdx, endIdx - begIdx)); + LLStringUtil::trim (currToken); + tokens.push_back(currToken); + // next scan past delimiters starts at endIdx + } +} + +template<> +LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens) +{ + const std::string delims (","); + + // Find the first [ + size_type pos1 = instr.find('[', start); + if (pos1 == std::string::npos) + return std::string::npos; + + //Find the first ] after the initial [ + size_type pos2 = instr.find(']', pos1); + if (pos2 == std::string::npos) + return std::string::npos; + + // Find the last [ before ] in case of nested [[]] + pos1 = instr.find_last_of('[', pos2-1); + if (pos1 == std::string::npos || pos1 < start) + return std::string::npos; + + getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims); + start = pos2+1; + + return pos1; +} + +// static +template<> +bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions) +{ + // see if we have a replacement for the bracketed string (without the brackets) + // test first using has() because if we just look up with operator[] we get back an + // empty string even if the value is missing. We want to distinguish between + // missing replacements and deliberately empty replacement strings. + format_map_t::const_iterator iter = substitutions.find(token); + if (iter != substitutions.end()) + { + replacement = iter->second; + return true; + } + // if not, see if there's one WITH brackets + iter = substitutions.find(std::string("[" + token + "]")); + if (iter != substitutions.end()) + { + replacement = iter->second; + return true; + } + + return false; +} + +// static +template<> +bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions) +{ + // see if we have a replacement for the bracketed string (without the brackets) + // test first using has() because if we just look up with operator[] we get back an + // empty string even if the value is missing. We want to distinguish between + // missing replacements and deliberately empty replacement strings. + if (substitutions.has(token)) + { + replacement = substitutions[token].asString(); + return true; + } + // if not, see if there's one WITH brackets + else if (substitutions.has(std::string("[" + token + "]"))) + { + replacement = substitutions[std::string("[" + token + "]")].asString(); + return true; + } + + return false; +} + +//static +template<> +void LLStringUtil::setLocale(std::string inLocale) +{ + sLocale = inLocale; +}; + +//static +template<> +std::string LLStringUtil::getLocale(void) +{ + return sLocale; +}; + +// static +template<> +void LLStringUtil::formatNumber(std::string& numStr, std::string decimals) +{ + std::stringstream strStream; + S32 intDecimals = 0; + + convertToS32 (decimals, intDecimals); + if (!sLocale.empty()) + { + // std::locale() throws if the locale is unknown! (EXT-7926) + try + { + strStream.imbue(std::locale(sLocale.c_str())); + } catch (const std::exception &) + { + LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL; + } + } + + if (!intDecimals) + { + S32 intStr; + + if (convertToS32(numStr, intStr)) + { + strStream << intStr; + numStr = strStream.str(); + } + } + else + { + F32 floatStr; + + if (convertToF32(numStr, floatStr)) + { + strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr; + numStr = strStream.str(); + } + } +} + +// static +template<> +bool LLStringUtil::formatDatetime(std::string& replacement, std::string token, + std::string param, S32 secFromEpoch) +{ + if (param == "local") // local + { + secFromEpoch -= LLStringOps::getLocalTimeOffset(); + } + else if (param != "utc") // slt + { + secFromEpoch -= LLStringOps::getPacificTimeOffset(); + } + + // if never fell into those two ifs above, param must be utc + if (secFromEpoch < 0) secFromEpoch = 0; + + LLDate datetime((F64)secFromEpoch); + std::string code = LLStringOps::getDatetimeCode (token); + + // special case to handle timezone + if (code == "%Z") { + if (param == "utc") + { + replacement = "GMT"; + } + else if (param == "local") + { + replacement = ""; // user knows their own timezone + } + else + { +#if 0 + // EXT-1565 : Zai Lynch, James Linden : 15/Oct/09 + // [BSI] Feedback: Viewer clock mentions SLT, but would prefer it to show PST/PDT + // "slt" = Second Life Time, which is deprecated. + // If not utc or user local time, fallback to Pacific time + replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST"; +#else + // SL-20370 : Steeltoe Linden : 29/Sep/23 + // Change "PDT" to "SLT" on menu bar + replacement = "SLT"; +#endif + } + return true; + } + + //EXT-7013 + //few codes are not suppotred by strtime function (example - weekdays for Japanise) + //so use predefined ones + + //if sWeekDayList is not empty than current locale doesn't support + //weekday name. + time_t loc_seconds = (time_t) secFromEpoch; + if(LLStringOps::sWeekDayList.size() == 7 && code == "%A") + { + struct tm * gmt = gmtime (&loc_seconds); + replacement = LLStringOps::sWeekDayList[gmt->tm_wday]; + } + else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a") + { + struct tm * gmt = gmtime (&loc_seconds); + replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday]; + } + else if(LLStringOps::sMonthList.size() == 12 && code == "%B") + { + struct tm * gmt = gmtime (&loc_seconds); + replacement = LLStringOps::sMonthList[gmt->tm_mon]; + } + else if( !LLStringOps::sDayFormat.empty() && code == "%d" ) + { + struct tm * gmt = gmtime (&loc_seconds); + LLStringUtil::format_map_t args; + args["[MDAY]"] = llformat ("%d", gmt->tm_mday); + replacement = LLStringOps::sDayFormat; + LLStringUtil::format(replacement, args); + } + else if (code == "%-d") + { + struct tm * gmt = gmtime (&loc_seconds); + replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero + } + else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" ) + { + struct tm * gmt = gmtime (&loc_seconds); + if(gmt->tm_hour<12) + { + replacement = LLStringOps::sAM; + } + else + { + replacement = LLStringOps::sPM; + } + } + else + { + replacement = datetime.toHTTPDateString(code); + } + + // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format + // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738). + // We could have used '%l' format instead, but it's not supported by Windows. + if(code == "%I" && token == "hour12" && replacement.at(0) == '0') + { + replacement = replacement.at(1); + } + + return !code.empty(); +} + +// LLStringUtil::format recogizes the following patterns. +// All substitutions *must* be encased in []'s in the input string. +// The []'s are optional in the substitution map. +// [FOO_123] +// [FOO,number,precision] +// [FOO,datetime,format] + + +// static +template<> +S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING; + S32 res = 0; + + std::string output; + std::vector<std::string> tokens; + + std::string::size_type start = 0; + std::string::size_type prev_start = 0; + std::string::size_type key_start = 0; + while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) + { + output += std::string(s, prev_start, key_start-prev_start); + prev_start = start; + + bool found_replacement = false; + std::string replacement; + + if (tokens.size() == 0) + { + found_replacement = false; + } + else if (tokens.size() == 1) + { + found_replacement = simpleReplacement (replacement, tokens[0], substitutions); + } + else if (tokens[1] == "number") + { + std::string param = "0"; + + if (tokens.size() > 2) param = tokens[2]; + found_replacement = simpleReplacement (replacement, tokens[0], substitutions); + if (found_replacement) formatNumber (replacement, param); + } + else if (tokens[1] == "datetime") + { + std::string param; + if (tokens.size() > 2) param = tokens[2]; + + format_map_t::const_iterator iter = substitutions.find("datetime"); + if (iter != substitutions.end()) + { + S32 secFromEpoch = 0; + bool r = LLStringUtil::convertToS32(iter->second, secFromEpoch); + if (r) + { + found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch); + } + } + } + + if (found_replacement) + { + output += replacement; + res++; + } + else + { + // we had no replacement, use the string as is + // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" + output += std::string(s, key_start, start-key_start); + } + tokens.clear(); + } + // send the remainder of the string (with no further matches for bracketed names) + output += std::string(s, start); + s = output; + return res; +} + +//static +template<> +S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING; + S32 res = 0; + + if (!substitutions.isMap()) + { + return res; + } + + std::string output; + std::vector<std::string> tokens; + + std::string::size_type start = 0; + std::string::size_type prev_start = 0; + std::string::size_type key_start = 0; + while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) + { + output += std::string(s, prev_start, key_start-prev_start); + prev_start = start; + + bool found_replacement = false; + std::string replacement; + + if (tokens.size() == 0) + { + found_replacement = false; + } + else if (tokens.size() == 1) + { + found_replacement = simpleReplacement (replacement, tokens[0], substitutions); + } + else if (tokens[1] == "number") + { + std::string param = "0"; + + if (tokens.size() > 2) param = tokens[2]; + found_replacement = simpleReplacement (replacement, tokens[0], substitutions); + if (found_replacement) formatNumber (replacement, param); + } + else if (tokens[1] == "datetime") + { + std::string param; + if (tokens.size() > 2) param = tokens[2]; + + S32 secFromEpoch = (S32) substitutions["datetime"].asInteger(); + found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch); + } + + if (found_replacement) + { + output += replacement; + res++; + } + else + { + // we had no replacement, use the string as is + // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" + output += std::string(s, key_start, start-key_start); + } + tokens.clear(); + } + // send the remainder of the string (with no further matches for bracketed names) + output += std::string(s, start); + s = output; + return res; +} + +//////////////////////////////////////////////////////////// +// Testing + +#ifdef _DEBUG + +template<class T> +void LLStringUtilBase<T>::testHarness() +{ + std::string s1; + + llassert( s1.c_str() == NULL ); + llassert( s1.size() == 0 ); + llassert( s1.empty() ); + + std::string s2( "hello"); + llassert( !strcmp( s2.c_str(), "hello" ) ); + llassert( s2.size() == 5 ); + llassert( !s2.empty() ); + std::string s3( s2 ); + + llassert( "hello" == s2 ); + llassert( s2 == "hello" ); + llassert( s2 > "gello" ); + llassert( "gello" < s2 ); + llassert( "gello" != s2 ); + llassert( s2 != "gello" ); + + std::string s4 = s2; + llassert( !s4.empty() ); + s4.empty(); + llassert( s4.empty() ); + + std::string s5(""); + llassert( s5.empty() ); + + llassert( isValidIndex(s5, 0) ); + llassert( !isValidIndex(s5, 1) ); + + s3 = s2; + s4 = "hello again"; + + s4 += "!"; + s4 += s4; + llassert( s4 == "hello again!hello again!" ); + + + std::string s6 = s2 + " " + s2; + std::string s7 = s6; + llassert( s6 == s7 ); + llassert( !( s6 != s7) ); + llassert( !(s6 < s7) ); + llassert( !(s6 > s7) ); + + llassert( !(s6 == "hi")); + llassert( s6 == "hello hello"); + llassert( s6 < "hi"); + + llassert( s6[1] == 'e' ); + s6[1] = 'f'; + llassert( s6[1] == 'f' ); + + s2.erase( 4, 1 ); + llassert( s2 == "hell"); + s2.insert( 0, "y" ); + llassert( s2 == "yhell"); + s2.erase( 1, 3 ); + llassert( s2 == "yl"); + s2.insert( 1, "awn, don't yel"); + llassert( s2 == "yawn, don't yell"); + + std::string s8 = s2.substr( 6, 5 ); + llassert( s8 == "don't" ); + + std::string s9 = " \t\ntest \t\t\n "; + trim(s9); + llassert( s9 == "test" ); + + s8 = "abc123&*(ABC"; + + s9 = s8; + toUpper(s9); + llassert( s9 == "ABC123&*(ABC" ); + + s9 = s8; + toLower(s9); + llassert( s9 == "abc123&*(abc" ); + + + std::string s10( 10, 'x' ); + llassert( s10 == "xxxxxxxxxx" ); + + std::string s11( "monkey in the middle", 7, 2 ); + llassert( s11 == "in" ); + + std::string s12; //empty + s12 += "foo"; + llassert( s12 == "foo" ); + + std::string s13; //empty + s13 += 'f'; + llassert( s13 == "f" ); +} + + +#endif // _DEBUG |