summaryrefslogtreecommitdiff
path: root/indra/llcommon/llstring.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r--indra/llcommon/llstring.cpp49
1 files changed, 49 insertions, 0 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 17d69351ec..ab34262515 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -623,6 +623,7 @@ std::string mbcsstring_makeASCII(const std::string& wstr)
}
return out_str;
}
+
std::string utf8str_removeCRLF(const std::string& utf8str)
{
if (0 == utf8str.length())
@@ -644,6 +645,54 @@ std::string utf8str_removeCRLF(const std::string& utf8str)
return out;
}
+std::string utf8str_showBytesUTF8(const std::string& utf8str)
+{
+ std::string result;
+
+ bool in_sequence = false;
+ for (U8 byte : utf8str)
+ {
+ if (byte >= 0x80) // Part of an UTF-8 sequence
+ {
+ if (!in_sequence) // Start new UTF-8 sequence
+ {
+ if (!result.empty() && result.back() != ' ')
+ result += ' '; // Use space as separator between ASCII and UTF-8
+ result += '[';
+ }
+ else if (byte >= 0xC0) // Start another UTF-8 sequence
+ {
+ result += "] ["; // Use space as separator between UTF-8 and UTF-8
+ }
+ else // Continue the same UTF-8 sequence
+ {
+ result += '.';
+ }
+ result += llformat("%02X", byte); // The byte is represented in hexadecimal form
+ in_sequence = true;
+ }
+ else // ASCII symbol is represented as a character
+ {
+ if (in_sequence) // End of UTF-8 sequence
+ {
+ result += ']';
+ if (byte != ' ')
+ {
+ result += ' '; // Use space as separator between UTF-8 and ASCII
+ }
+ }
+ result += byte;
+ in_sequence = false;
+ }
+ }
+ if (in_sequence) // End of UTF-8 sequence
+ {
+ result += ']';
+ }
+
+ return result;
+}
+
#if LL_WINDOWS
unsigned int ll_wstring_default_code_page()
{