diff options
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r-- | indra/llcommon/llstring.cpp | 279 |
1 files changed, 197 insertions, 82 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 6512bbc392..e38622b43b 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -31,10 +31,10 @@ #include "llfasttimer.h" #include "llsd.h" #include <vector> +#include <sstream> #if LL_WINDOWS -#include "llwin32headerslean.h" -#include <winnls.h> // for WideCharToMultiByte +#include "llwin32headers.h" #endif std::string ll_safe_string(const char* in) @@ -141,10 +141,10 @@ std::string rawstr_to_utf8(const std::string& raw) return wstring_to_utf8str(wstr); } -std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars) +std::string wchar_to_utf8chars(llwchar in_char) { - U32 cur_char = (U32)in_char; - char* base = outchars; + U32 cur_char(in_char); + char buff[8], *outchars = buff; if (cur_char < 0x80) { *outchars++ = (U8)cur_char; @@ -189,7 +189,7 @@ std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars) LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL; *outchars++ = LL_UNKNOWN_CHAR; } - return outchars - base; + return { buff, std::string::size_type(outchars - buff) }; } auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar) @@ -214,7 +214,8 @@ auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar) llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) { - llutf16string out; + // ostringstream for llutf16string + std::basic_ostringstream<U16> out; S32 i = 0; while (i < len) @@ -222,16 +223,16 @@ llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) U32 cur_char = utf32str[i]; if (cur_char > 0xFFFF) { - out += (0xD7C0 + (cur_char >> 10)); - out += (0xDC00 | (cur_char & 0x3FF)); + out.put(U16(0xD7C0 + (cur_char >> 10))); + out.put(U16(0xDC00 | (cur_char & 0x3FF))); } else { - out += cur_char; + out.put(U16(cur_char)); } i++; } - return out; + return out.str(); } llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) @@ -242,18 +243,26 @@ llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) LLWString utf16str_to_wstring(const U16* utf16str, size_t len) { - LLWString wout; - if (len == 0) return wout; + if (len == 0) return {}; + + // MS doesn't support std::basic_ostringstream<llwchar>; have to work + // around it. + std::vector<llwchar> wout; + // We want to minimize allocations. We don't know how many llwchars we'll + // generate from this utf16str, but we do know the length should be at + // most len. So if we reserve 'len' llwchars, we shouldn't need to expand + // wout incrementally. + wout.reserve(len); S32 i = 0; const U16* chars16 = utf16str; while (i < len) { llwchar cur_char; - i += utf16chars_to_wchar(chars16+i, &cur_char); - wout += cur_char; + i += (S32)utf16chars_to_wchar(chars16+i, &cur_char); + wout.push_back(cur_char); } - return wout; + return { wout.begin(), wout.end() }; } // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. @@ -308,10 +317,10 @@ S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wle // Given a wstring and an offset in it, returns the length as wstring (i.e., // number of llwchars) of the longest substring that starts at the offset // and whose equivalent utf-16 string does not exceeds the given utf16_length. -S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned) +S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, bool *unaligned) { const auto end = wstr.length(); - BOOL u = FALSE; + bool u{ false }; S32 n = woffset + utf16_length; S32 i = woffset; while (i < end) @@ -367,13 +376,12 @@ std::string wchar_utf8_preview(const llwchar wc) std::ostringstream oss; oss << std::hex << std::uppercase << (U32)wc; - U8 out_bytes[8]; - U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes); + auto out_bytes = wchar_to_utf8chars(wc); - if (size > 1) + if (out_bytes.length() > 1) { oss << " ["; - for (U32 i = 0; i < size; ++i) + for (U32 i = 0; i < out_bytes.length(); ++i) { if (i) { @@ -399,7 +407,14 @@ S32 wstring_utf8_length(const LLWString& wstr) LLWString utf8str_to_wstring(const char* utf8str, size_t len) { - LLWString wout; + // MS doesn't support std::basic_ostringstream<llwchar>; have to work + // around it. + std::vector<llwchar> wout; + // We want to minimize allocations. We don't know how many llwchars we'll + // generate from this utf8str, but we do know the length should be at most + // len. So if we reserve 'len' llwchars, we shouldn't need to expand wout + // incrementally. + wout.reserve(len); S32 i = 0; while (i < len) @@ -442,7 +457,7 @@ LLWString utf8str_to_wstring(const char* utf8str, size_t len) } else { - wout += LL_UNKNOWN_CHAR; + wout.push_back(LL_UNKNOWN_CHAR); ++i; continue; } @@ -479,26 +494,21 @@ LLWString utf8str_to_wstring(const char* utf8str, size_t len) } } - wout += unichar; + wout.push_back(unichar); ++i; } - return wout; + return { wout.begin(), wout.end() }; } std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) { - std::string out; + std::ostringstream out; - S32 i = 0; - while (i < len) + for (size_t i = 0; i < len; ++i) { - char tchars[8]; /* Flawfinder: ignore */ - auto n = wchar_to_utf8chars(utf32str[i], tchars); - tchars[n] = 0; - out += tchars; - i++; + out << wchar_to_utf8chars(utf32str[i]); } - return out; + return out.str(); } std::string utf16str_to_utf8str(const U16* utf16str, size_t len) @@ -686,7 +696,21 @@ llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t lengt std::string utf8str_showBytesUTF8(const std::string& utf8str) { - std::string result; + std::ostringstream result; + char lastchar = '\0'; + auto append = [&result, &lastchar](char c) + { + lastchar = c; + result << c; + }; + auto appends = [&result, &lastchar](const std::string& s) + { + if (! s.empty()) + { + lastchar = s.back(); + result << s; + } + }; bool in_sequence = false; size_t sequence_size = 0; @@ -695,9 +719,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) auto open_sequence = [&]() { - if (!result.empty() && result.back() != '\n') - result += '\n'; // Use LF as a separator before new UTF-8 sequence - result += '['; + if (lastchar != '\0' && lastchar != '\n') + append('\n'); // Use LF as a separator before new UTF-8 sequence + append('['); in_sequence = true; }; @@ -706,9 +730,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size); if (unicode != LL_UNKNOWN_CHAR) { - result += llformat("+%04X", unicode); + appends(llformat("+%04X", unicode)); } - result += ']'; + append(']'); in_sequence = false; sequence_size = 0; }; @@ -729,9 +753,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) } else // Continue the same UTF-8 sequence { - result += '.'; + append('.'); } - result += llformat("%02X", byte); // The byte is represented in hexadecimal form + appends(llformat("%02X", byte)); // The byte is represented in hexadecimal form ++sequence_size; } else // ASCII symbol is represented as a character @@ -741,10 +765,10 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) close_sequence(); if (byte != '\n') { - result += '\n'; // Use LF as a separator between UTF-8 and ASCII + append('\n'); // Use LF as a separator between UTF-8 and ASCII } } - result += byte; + append(byte); } ++byte_index; } @@ -754,11 +778,11 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) close_sequence(); } - return result; + return result.str(); } // Search for any emoji symbol, return true if found -bool wstring_has_emoji(const LLWString& wstr) +bool wstring_has_emoji(LLWStringView wstr) { for (const llwchar& wch : wstr) { @@ -809,7 +833,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned code_page, 0, in, - len_in, + static_cast<int>(len_in), NULL, 0, 0, @@ -824,7 +848,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned code_page, 0, in, - len_in, + static_cast<int>(len_in), pout, len_out, 0, @@ -851,8 +875,8 @@ std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int std::vector<wchar_t> w_out(len + 1); memset(&w_out[0], 0, w_out.size()); - int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len, - &w_out[0], w_out.size() - 1); + int real_output_str_len = MultiByteToWideChar(code_page, 0, in, static_cast<int>(len), + &w_out[0], static_cast<int>(w_out.size() - 1)); //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. w_out[real_output_str_len] = 0; @@ -900,6 +924,11 @@ void HeapFree_deleter(void* ptr) } // anonymous namespace +unsigned long windows_get_last_error() +{ + return GetLastError(); +} + template<> std::wstring windows_message<std::wstring>(DWORD error) { @@ -938,12 +967,12 @@ std::wstring windows_message<std::wstring>(DWORD error) return out.str(); } -boost::optional<std::wstring> llstring_getoptenv(const std::string& key) +std::optional<std::wstring> llstring_getoptenv(const std::string& key) { auto wkey = ll_convert_string_to_wide(key); // Take a wild guess as to how big the buffer should be. std::vector<wchar_t> buffer(1024); - auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], static_cast<DWORD>(buffer.size())); // If our initial guess was too short, n will indicate the size (in // wchar_t's) that buffer should have been, including the terminating nul. if (n > (buffer.size() - 1)) @@ -951,13 +980,13 @@ boost::optional<std::wstring> llstring_getoptenv(const std::string& key) // make it big enough buffer.resize(n); // and try again - n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], static_cast<DWORD>(buffer.size())); } // did that (ultimately) succeed? if (n) { - // great, return populated boost::optional - return boost::optional<std::wstring>(&buffer[0]); + // great, return populated std::optional + return std::make_optional<std::wstring>(&buffer[0]); } // not successful @@ -968,23 +997,23 @@ boost::optional<std::wstring> llstring_getoptenv(const std::string& key) LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " << windows_message<std::string>(last_error) << LL_ENDL; } - // return empty boost::optional + // return empty std::optional return {}; } #else // ! LL_WINDOWS -boost::optional<std::string> llstring_getoptenv(const std::string& key) +std::optional<std::string> llstring_getoptenv(const std::string& key) { auto found = getenv(key.c_str()); if (found) { - // return populated boost::optional - return boost::optional<std::string>(found); + // return populated std::optional + return std::make_optional<std::string>(found); } else { - // return empty boost::optional + // return empty std::optional return {}; } } @@ -1017,7 +1046,7 @@ bool LLStringOps::isEmoji(llwchar a) // These are indeed "genuine" emojis, we *do want* rendered as such. HB return a >= 0x1f000 && a < 0x20000; #endif -} + } S32 LLStringOps::collate(const llwchar* a, const llwchar* b) { @@ -1115,18 +1144,27 @@ void LLStringOps::setupDayFormat(const std::string& data) } -std::string LLStringOps::getDatetimeCode (std::string key) +std::string LLStringOps::getDatetimeCode(std::string key) { - std::map<std::string, std::string>::iterator iter; + std::map<std::string, std::string>::iterator iter = datetimeToCodes.find(key); + return iter == datetimeToCodes.end() ? LLStringUtil::null : iter->second; +} - iter = datetimeToCodes.find (key); - if (iter != datetimeToCodes.end()) +void LLStringOps::splitString(const std::string& text, char delimiter, + std::function<void(const std::string&)> handler) +{ + std::size_t from = 0; + for (std::size_t i = 0; i < text.size(); ++i) { - return iter->second; + if (text[i] == delimiter) + { + handler(text.substr(from, i - from)); + from = i + 1; + } } - else + if (from <= text.size()) { - return std::string(""); + handler(text.substr(from)); } } @@ -1208,6 +1246,75 @@ namespace LLStringFn return output; } + using literals_t = std::map<char, std::string>; + static const literals_t xml_elem_literals = + { + { '<', "<" }, + { '>', ">" }, + { '&', "&" } + }; + static const literals_t xml_attr_literals = + { + { '"', """ }, + { '\'', "'" } + }; + + static void literals_encode(std::string& text, const literals_t& literals) + { + for (const std::pair<char, std::string> it : literals) + { + std::string::size_type pos = 0; + while ((pos = text.find(it.first, pos)) != std::string::npos) + { + text.replace(pos, 1, it.second); + pos += it.second.size(); + } + } + } + + static void literals_decode(std::string& text, const literals_t& literals) + { + for (const std::pair<char, std::string> it : literals) + { + std::string::size_type pos = 0; + while ((pos = text.find(it.second, pos)) != std::string::npos) + { + text[pos++] = it.first; + text.erase(pos, it.second.size() - 1); + } + } + } + + /** + * @brief Replace all characters that are not allowed in XML 1.0 + * with corresponding literals: [ < > & ] => [ < > & ] + */ + std::string xml_encode(const std::string& input, bool for_attribute) + { + std::string result(input); + literals_encode(result, xml_elem_literals); + if (for_attribute) + { + literals_encode(result, xml_attr_literals); + } + return result; + } + + /** + * @brief Replace some of XML literals that are defined in XML 1.0 + * with corresponding characters: [ < > & ] => [ < > & ] + */ + std::string xml_decode(const std::string& input, bool for_attribute) + { + std::string result(input); + literals_decode(result, xml_elem_literals); + if (for_attribute) + { + literals_decode(result, xml_attr_literals); + } + return result; + } + /** * @brief Replace all control characters (c < 0x20) with replacement in * string. @@ -1336,6 +1443,14 @@ bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token template<> void LLStringUtil::setLocale(std::string inLocale) { + if(startsWith(inLocale, "MissingString")) + { + // it seems this hasn't been working for some time, and I'm not sure how it is intentded to + // properly discover the correct locale. early out now to avoid failures later in + // formatNumber() + LL_WARNS() << "Failed attempting to set invalid locale: " << inLocale << LL_ENDL; + return; + } sLocale = inLocale; }; @@ -1513,7 +1628,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING; S32 res = 0; - std::string output; + std::ostringstream output; std::vector<std::string> tokens; std::string::size_type start = 0; @@ -1521,7 +1636,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) std::string::size_type key_start = 0; while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) { - output += std::string(s, prev_start, key_start-prev_start); + output << std::string(s, prev_start, key_start-prev_start); prev_start = start; bool found_replacement = false; @@ -1552,7 +1667,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) if (iter != substitutions.end()) { S32 secFromEpoch = 0; - BOOL r = LLStringUtil::convertToS32(iter->second, secFromEpoch); + bool r = LLStringUtil::convertToS32(iter->second, secFromEpoch); if (r) { found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch); @@ -1562,20 +1677,20 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) if (found_replacement) { - output += replacement; + output << replacement; res++; } else { // we had no replacement, use the string as is // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" - output += std::string(s, key_start, start-key_start); + output << std::string(s, key_start, start-key_start); } tokens.clear(); } // send the remainder of the string (with no further matches for bracketed names) - output += std::string(s, start); - s = output; + output << std::string(s, start); + s = output.str(); return res; } @@ -1591,7 +1706,7 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) return res; } - std::string output; + std::ostringstream output; std::vector<std::string> tokens; std::string::size_type start = 0; @@ -1599,7 +1714,7 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) std::string::size_type key_start = 0; while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) { - output += std::string(s, prev_start, key_start-prev_start); + output << std::string(s, prev_start, key_start-prev_start); prev_start = start; bool found_replacement = false; @@ -1632,20 +1747,20 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) if (found_replacement) { - output += replacement; + output << replacement; res++; } else { // we had no replacement, use the string as is // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" - output += std::string(s, key_start, start-key_start); + output << std::string(s, key_start, start-key_start); } tokens.clear(); } // send the remainder of the string (with no further matches for bracketed names) - output += std::string(s, start); - s = output; + output << std::string(s, start); + s = output.str(); return res; } |