diff options
author | Alexander Gavriliuk <alexandrgproductengine@lindenlab.com> | 2024-02-07 21:26:57 +0100 |
---|---|---|
committer | Guru <alexandrgproductengine@lindenlab.com> | 2024-02-08 13:17:18 +0300 |
commit | 7075717b7c4a57d6bef60697ee506096a7c1b1ab (patch) | |
tree | 4fd55eb13166128aefa4a835a1a7922742922ca9 /indra/llcommon | |
parent | b348366d107a03fcc01397c1b2e9e2a22de48034 (diff) |
SL-20363 Add Advanced option 'Debug Unicode'
Diffstat (limited to 'indra/llcommon')
-rw-r--r-- | indra/llcommon/llstring.cpp | 49 | ||||
-rw-r--r-- | indra/llcommon/llstring.h | 1 |
2 files changed, 50 insertions, 0 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 17d69351ec..ab34262515 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -623,6 +623,7 @@ std::string mbcsstring_makeASCII(const std::string& wstr) } return out_str; } + std::string utf8str_removeCRLF(const std::string& utf8str) { if (0 == utf8str.length()) @@ -644,6 +645,54 @@ std::string utf8str_removeCRLF(const std::string& utf8str) return out; } +std::string utf8str_showBytesUTF8(const std::string& utf8str) +{ + std::string result; + + bool in_sequence = false; + for (U8 byte : utf8str) + { + if (byte >= 0x80) // Part of an UTF-8 sequence + { + if (!in_sequence) // Start new UTF-8 sequence + { + if (!result.empty() && result.back() != ' ') + result += ' '; // Use space as separator between ASCII and UTF-8 + result += '['; + } + else if (byte >= 0xC0) // Start another UTF-8 sequence + { + result += "] ["; // Use space as separator between UTF-8 and UTF-8 + } + else // Continue the same UTF-8 sequence + { + result += '.'; + } + result += llformat("%02X", byte); // The byte is represented in hexadecimal form + in_sequence = true; + } + else // ASCII symbol is represented as a character + { + if (in_sequence) // End of UTF-8 sequence + { + result += ']'; + if (byte != ' ') + { + result += ' '; // Use space as separator between UTF-8 and ASCII + } + } + result += byte; + in_sequence = false; + } + } + if (in_sequence) // End of UTF-8 sequence + { + result += ']'; + } + + return result; +} + #if LL_WINDOWS unsigned int ll_wstring_default_code_page() { diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 8def59ed7f..38b9c3e23c 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -743,6 +743,7 @@ LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); +LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str); #if LL_WINDOWS /* @name Windows string helpers |