summaryrefslogtreecommitdiff
path: root/indra/llcommon
diff options
context:
space:
mode:
authorAlexander Gavriliuk <alexandrgproductengine@lindenlab.com>2024-02-07 21:26:57 +0100
committerGuru <alexandrgproductengine@lindenlab.com>2024-02-08 13:17:18 +0300
commit7075717b7c4a57d6bef60697ee506096a7c1b1ab (patch)
tree4fd55eb13166128aefa4a835a1a7922742922ca9 /indra/llcommon
parentb348366d107a03fcc01397c1b2e9e2a22de48034 (diff)
SL-20363 Add Advanced option 'Debug Unicode'
Diffstat (limited to 'indra/llcommon')
-rw-r--r--indra/llcommon/llstring.cpp49
-rw-r--r--indra/llcommon/llstring.h1
2 files changed, 50 insertions, 0 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 17d69351ec..ab34262515 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -623,6 +623,7 @@ std::string mbcsstring_makeASCII(const std::string& wstr)
}
return out_str;
}
+
std::string utf8str_removeCRLF(const std::string& utf8str)
{
if (0 == utf8str.length())
@@ -644,6 +645,54 @@ std::string utf8str_removeCRLF(const std::string& utf8str)
return out;
}
+std::string utf8str_showBytesUTF8(const std::string& utf8str)
+{
+ std::string result;
+
+ bool in_sequence = false;
+ for (U8 byte : utf8str)
+ {
+ if (byte >= 0x80) // Part of an UTF-8 sequence
+ {
+ if (!in_sequence) // Start new UTF-8 sequence
+ {
+ if (!result.empty() && result.back() != ' ')
+ result += ' '; // Use space as separator between ASCII and UTF-8
+ result += '[';
+ }
+ else if (byte >= 0xC0) // Start another UTF-8 sequence
+ {
+ result += "] ["; // Use space as separator between UTF-8 and UTF-8
+ }
+ else // Continue the same UTF-8 sequence
+ {
+ result += '.';
+ }
+ result += llformat("%02X", byte); // The byte is represented in hexadecimal form
+ in_sequence = true;
+ }
+ else // ASCII symbol is represented as a character
+ {
+ if (in_sequence) // End of UTF-8 sequence
+ {
+ result += ']';
+ if (byte != ' ')
+ {
+ result += ' '; // Use space as separator between UTF-8 and ASCII
+ }
+ }
+ result += byte;
+ in_sequence = false;
+ }
+ }
+ if (in_sequence) // End of UTF-8 sequence
+ {
+ result += ']';
+ }
+
+ return result;
+}
+
#if LL_WINDOWS
unsigned int ll_wstring_default_code_page()
{
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 8def59ed7f..38b9c3e23c 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -743,6 +743,7 @@ LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str);
LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
+LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str);
#if LL_WINDOWS
/* @name Windows string helpers