diff options
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r-- | indra/llcommon/llstring.cpp | 131 |
1 files changed, 78 insertions, 53 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 505789f9ea..005864a843 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -31,6 +31,7 @@ #include "llfasttimer.h" #include "llsd.h" #include <vector> +#include <sstream> #if LL_WINDOWS #include "llwin32headerslean.h" @@ -141,10 +142,10 @@ std::string rawstr_to_utf8(const std::string& raw) return wstring_to_utf8str(wstr); } -std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars) +std::string wchar_to_utf8chars(llwchar in_char) { - U32 cur_char = (U32)in_char; - char* base = outchars; + U32 cur_char(in_char); + char buff[8], *outchars = buff; if (cur_char < 0x80) { *outchars++ = (U8)cur_char; @@ -189,7 +190,7 @@ std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars) LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL; *outchars++ = LL_UNKNOWN_CHAR; } - return outchars - base; + return { buff, std::string::size_type(outchars - buff) }; } auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar) @@ -214,7 +215,8 @@ auto utf16chars_to_wchar(const U16* inchars, llwchar* outchar) llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) { - llutf16string out; + // ostringstream for llutf16string + std::basic_ostringstream<U16> out; S32 i = 0; while (i < len) @@ -222,16 +224,16 @@ llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) U32 cur_char = utf32str[i]; if (cur_char > 0xFFFF) { - out += (0xD7C0 + (cur_char >> 10)); - out += (0xDC00 | (cur_char & 0x3FF)); + out.put(U16(0xD7C0 + (cur_char >> 10))); + out.put(U16(0xDC00 | (cur_char & 0x3FF))); } else { - out += cur_char; + out.put(U16(cur_char)); } i++; } - return out; + return out.str(); } llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) @@ -242,8 +244,16 @@ llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) LLWString utf16str_to_wstring(const U16* utf16str, size_t len) { - LLWString wout; - if (len == 0) return wout; + if (len == 0) return {}; + + // MS doesn't support std::basic_ostringstream<llwchar>; have to work + // around it. + std::vector<llwchar> wout; + // We want to minimize allocations. We don't know how many llwchars we'll + // generate from this utf16str, but we do know the length should be at + // most len. So if we reserve 'len' llwchars, we shouldn't need to expand + // wout incrementally. + wout.reserve(len); S32 i = 0; const U16* chars16 = utf16str; @@ -251,9 +261,9 @@ LLWString utf16str_to_wstring(const U16* utf16str, size_t len) { llwchar cur_char; i += (S32)utf16chars_to_wchar(chars16+i, &cur_char); - wout += cur_char; + wout.push_back(cur_char); } - return wout; + return { wout.begin(), wout.end() }; } // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. @@ -367,13 +377,12 @@ std::string wchar_utf8_preview(const llwchar wc) std::ostringstream oss; oss << std::hex << std::uppercase << (U32)wc; - U8 out_bytes[8]; - U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes); + auto out_bytes = wchar_to_utf8chars(wc); - if (size > 1) + if (out_bytes.length() > 1) { oss << " ["; - for (U32 i = 0; i < size; ++i) + for (U32 i = 0; i < out_bytes.length(); ++i) { if (i) { @@ -399,7 +408,14 @@ S32 wstring_utf8_length(const LLWString& wstr) LLWString utf8str_to_wstring(const char* utf8str, size_t len) { - LLWString wout; + // MS doesn't support std::basic_ostringstream<llwchar>; have to work + // around it. + std::vector<llwchar> wout; + // We want to minimize allocations. We don't know how many llwchars we'll + // generate from this utf8str, but we do know the length should be at most + // len. So if we reserve 'len' llwchars, we shouldn't need to expand wout + // incrementally. + wout.reserve(len); S32 i = 0; while (i < len) @@ -442,7 +458,7 @@ LLWString utf8str_to_wstring(const char* utf8str, size_t len) } else { - wout += LL_UNKNOWN_CHAR; + wout.push_back(LL_UNKNOWN_CHAR); ++i; continue; } @@ -479,26 +495,21 @@ LLWString utf8str_to_wstring(const char* utf8str, size_t len) } } - wout += unichar; + wout.push_back(unichar); ++i; } - return wout; + return { wout.begin(), wout.end() }; } std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) { - std::string out; + std::ostringstream out; - S32 i = 0; - while (i < len) + for (size_t i = 0; i < len; ++i) { - char tchars[8]; /* Flawfinder: ignore */ - auto n = wchar_to_utf8chars(utf32str[i], tchars); - tchars[n] = 0; - out += tchars; - i++; + out << wchar_to_utf8chars(utf32str[i]); } - return out; + return out.str(); } std::string utf16str_to_utf8str(const U16* utf16str, size_t len) @@ -686,7 +697,21 @@ llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t lengt std::string utf8str_showBytesUTF8(const std::string& utf8str) { - std::string result; + std::ostringstream result; + char lastchar = '\0'; + auto append = [&result, &lastchar](char c) + { + lastchar = c; + result << c; + }; + auto appends = [&result, &lastchar](const std::string& s) + { + if (! s.empty()) + { + lastchar = s.back(); + result << s; + } + }; bool in_sequence = false; size_t sequence_size = 0; @@ -695,9 +720,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) auto open_sequence = [&]() { - if (!result.empty() && result.back() != '\n') - result += '\n'; // Use LF as a separator before new UTF-8 sequence - result += '['; + if (lastchar != '\0' && lastchar != '\n') + append('\n'); // Use LF as a separator before new UTF-8 sequence + append('['); in_sequence = true; }; @@ -706,9 +731,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size); if (unicode != LL_UNKNOWN_CHAR) { - result += llformat("+%04X", unicode); + appends(llformat("+%04X", unicode)); } - result += ']'; + append(']'); in_sequence = false; sequence_size = 0; }; @@ -729,9 +754,9 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) } else // Continue the same UTF-8 sequence { - result += '.'; + append('.'); } - result += llformat("%02X", byte); // The byte is represented in hexadecimal form + appends(llformat("%02X", byte)); // The byte is represented in hexadecimal form ++sequence_size; } else // ASCII symbol is represented as a character @@ -741,10 +766,10 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) close_sequence(); if (byte != '\n') { - result += '\n'; // Use LF as a separator between UTF-8 and ASCII + append('\n'); // Use LF as a separator between UTF-8 and ASCII } } - result += byte; + append(byte); } ++byte_index; } @@ -754,7 +779,7 @@ std::string utf8str_showBytesUTF8(const std::string& utf8str) close_sequence(); } - return result; + return result.str(); } // Search for any emoji symbol, return true if found @@ -1587,7 +1612,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING; S32 res = 0; - std::string output; + std::ostringstream output; std::vector<std::string> tokens; std::string::size_type start = 0; @@ -1595,7 +1620,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) std::string::size_type key_start = 0; while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) { - output += std::string(s, prev_start, key_start-prev_start); + output << std::string(s, prev_start, key_start-prev_start); prev_start = start; bool found_replacement = false; @@ -1636,20 +1661,20 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) if (found_replacement) { - output += replacement; + output << replacement; res++; } else { // we had no replacement, use the string as is // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" - output += std::string(s, key_start, start-key_start); + output << std::string(s, key_start, start-key_start); } tokens.clear(); } // send the remainder of the string (with no further matches for bracketed names) - output += std::string(s, start); - s = output; + output << std::string(s, start); + s = output.str(); return res; } @@ -1665,7 +1690,7 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) return res; } - std::string output; + std::ostringstream output; std::vector<std::string> tokens; std::string::size_type start = 0; @@ -1673,7 +1698,7 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) std::string::size_type key_start = 0; while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos) { - output += std::string(s, prev_start, key_start-prev_start); + output << std::string(s, prev_start, key_start-prev_start); prev_start = start; bool found_replacement = false; @@ -1706,20 +1731,20 @@ S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) if (found_replacement) { - output += replacement; + output << replacement; res++; } else { // we had no replacement, use the string as is // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-" - output += std::string(s, key_start, start-key_start); + output << std::string(s, key_start, start-key_start); } tokens.clear(); } // send the remainder of the string (with no further matches for bracketed names) - output += std::string(s, start); - s = output; + output << std::string(s, start); + s = output.str(); return res; } |