From 5a6ddb2ea666e895890d3cb690cce5101cf12652 Mon Sep 17 00:00:00 2001 From: Kitty Barnett Date: Thu, 7 Nov 2019 17:15:21 +0100 Subject: Fallback fonts can have first crack at adding an unknown character + set Twemoji as the viewer's fallback for all emoji blocks --- indra/llcommon/CMakeLists.txt | 2 ++ indra/llcommon/llstring.cpp | 26 ++++++++++++++++++++++++++ indra/llcommon/llstring.h | 2 ++ 3 files changed, 30 insertions(+) (limited to 'indra/llcommon') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index af41b9e460..ba87d93fec 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -4,6 +4,7 @@ project(llcommon) include(00-Common) +include(ICU4C) include(LLCommon) include(Linking) include(Boost) @@ -288,6 +289,7 @@ target_link_libraries( ${APRUTIL_LIBRARIES} ${APR_LIBRARIES} ${EXPAT_LIBRARIES} + ${ICU4C_LIBRARY} ${JSONCPP_LIBRARIES} ${ZLIB_LIBRARIES} ${WINDOWS_LIBRARIES} diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 0174c411b4..b272728200 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -30,6 +30,7 @@ #include "llerror.h" #include "llfasttimer.h" #include "llsd.h" +#include #include #if LL_WINDOWS @@ -888,6 +889,31 @@ std::string LLStringOps::sDayFormat; std::string LLStringOps::sAM; std::string LLStringOps::sPM; +// static +bool LLStringOps::isEmoji(llwchar wch) +{ + switch (ublock_getCode(wch)) + { + case UBLOCK_MISCELLANEOUS_SYMBOLS: + case UBLOCK_DINGBATS: + case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS: + case UBLOCK_EMOTICONS: + case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS: +#if U_ICU_VERSION_MAJOR_NUM > 56 + // Boost uses ICU so we can't update it independently + case UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS: +#endif // U_ICU_VERSION_MAJOR_NUM > 56 + return true; + default: +#if U_ICU_VERSION_MAJOR_NUM > 56 + return false; +#else + // See https://en.wikipedia.org/wiki/Supplemental_Symbols_and_Pictographs + return wch >= 0x1F900 && wch <= 0x1F9FF; +#endif // U_ICU_VERSION_MAJOR_NUM > 56 + } +} + S32 LLStringOps::collate(const llwchar* a, const llwchar* b) { diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index b619a9e48c..d31d0cafc7 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -193,6 +193,8 @@ public: static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; } static bool isAlnum(llwchar a) { return iswalnum(a) != 0; } + static bool isEmoji(llwchar wch); + static S32 collate(const char* a, const char* b) { return strcoll(a, b); } static S32 collate(const llwchar* a, const llwchar* b); -- cgit v1.2.3 From 3185bdea27b19e155c2ccc03c80624e113d312a6 Mon Sep 17 00:00:00 2001 From: Callum Prentice Date: Thu, 26 Jan 2023 14:45:45 -0800 Subject: DRTVWR-489-emoji: As part of the work to get macOS version of the Viewer working, the flag was introduced to warn (and therefore error out) when a virtual override was not marked with the 'override' keyword. Fixing this up involved a large number of changes and this commit represents just those changes - nothing specially from the DRTVWR-489 viewer --- indra/llcommon/llcoros.h | 2 +- indra/llcommon/llsingleton.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llcoros.h b/indra/llcommon/llcoros.h index dbff921f16..a6da94005b 100644 --- a/indra/llcommon/llcoros.h +++ b/indra/llcommon/llcoros.h @@ -92,7 +92,7 @@ class LL_COMMON_API LLCoros: public LLSingleton LLSINGLETON(LLCoros); ~LLCoros(); - void cleanupSingleton(); + void cleanupSingleton() override; public: /// The viewer's use of the term "coroutine" became deeply embedded before /// the industry term "fiber" emerged to distinguish userland threads from diff --git a/indra/llcommon/llsingleton.h b/indra/llcommon/llsingleton.h index 51ef514cf7..cbe5ab6406 100644 --- a/indra/llcommon/llsingleton.h +++ b/indra/llcommon/llsingleton.h @@ -802,7 +802,7 @@ public: private: \ /* implement LLSingleton pure virtual method whose sole purpose */ \ /* is to remind people to use this macro */ \ - virtual void you_must_use_LLSINGLETON_macro() {} \ + virtual void you_must_use_LLSINGLETON_macro() override {} \ friend class LLSingleton; \ DERIVED_CLASS(__VA_ARGS__) -- cgit v1.2.3 From 923733e591eb547ad5dfec395ce7d3e8f0468c16 Mon Sep 17 00:00:00 2001 From: Callum Prentice Date: Thu, 26 Jan 2023 18:01:25 -0800 Subject: DRTVWR-489-emoji: missed the override warning/error in the tests... grr... --- indra/llcommon/tests/llsingleton_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/tests/llsingleton_test.cpp b/indra/llcommon/tests/llsingleton_test.cpp index 15ffe68e67..49cf76b07e 100644 --- a/indra/llcommon/tests/llsingleton_test.cpp +++ b/indra/llcommon/tests/llsingleton_test.cpp @@ -47,8 +47,8 @@ public: \ DEP_INIT /* dependency in initSingleton */ \ } sDepFlag; \ \ - void initSingleton(); \ - void cleanupSingleton(); \ + void initSingleton() override; \ + void cleanupSingleton() override; \ }; \ \ CLS::dep_flag CLS::sDepFlag = DEP_NONE -- cgit v1.2.3 From 6f31fabbc2d082b77c8f09bce30234ec9c506e33 Mon Sep 17 00:00:00 2001 From: Callum Prentice Date: Thu, 26 Jan 2023 19:17:21 -0800 Subject: DRTVWR-489-emoji: less haste. more speed. Missed another test issue. Rebuilt locally with tests and confirmed it works now --- indra/llcommon/tests/llsingleton_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/tests/llsingleton_test.cpp b/indra/llcommon/tests/llsingleton_test.cpp index 49cf76b07e..6f8aaaa0cb 100644 --- a/indra/llcommon/tests/llsingleton_test.cpp +++ b/indra/llcommon/tests/llsingleton_test.cpp @@ -300,7 +300,7 @@ namespace tut { LLSINGLETON_EMPTY_CTOR(CircularPInit); public: - virtual void initSingleton() + virtual void initSingleton() override { // never mind indirection, just go straight for the circularity CircularPInit *pt = getInstance(); -- cgit v1.2.3 From 4df2c0b8d67af267d3c9c6d58d63df3ed063a89e Mon Sep 17 00:00:00 2001 From: Kitty Barnett Date: Wed, 8 Feb 2023 17:17:47 +0100 Subject: Post-merge fix for xcode-14.1 branch --- indra/llcommon/llsdserialize.cpp | 2 +- indra/llcommon/llsdserialize.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 73206f3d40..f66c4ff843 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -2388,7 +2388,7 @@ U8* unzip_llsdNavMesh( bool& valid, size_t& outsize, std::istream& is, S32 size return result; } -char* strip_deprecated_header(char* in, U32& cur_size, U32* header_size) +char* strip_deprecated_header(char* in, llssize& cur_size, U32* header_size) { const char* deprecated_header = ""; constexpr size_t deprecated_header_size = 17; diff --git a/indra/llcommon/llsdserialize.h b/indra/llcommon/llsdserialize.h index 2f12c6d1ff..5ddf0ff552 100644 --- a/indra/llcommon/llsdserialize.h +++ b/indra/llcommon/llsdserialize.h @@ -873,5 +873,5 @@ LL_COMMON_API std::string zip_llsd(LLSD& data); LL_COMMON_API U8* unzip_llsdNavMesh( bool& valid, size_t& outsize,std::istream& is, S32 size); // returns a pointer to the array or past the array if the deprecated header exists -LL_COMMON_API char* strip_deprecated_header(char* in, U32& cur_size, U32* header_size = nullptr); +LL_COMMON_API char* strip_deprecated_header(char* in, llssize& cur_size, U32* header_size = nullptr); #endif // LL_LLSDSERIALIZE_H -- cgit v1.2.3 From 337d2f984ff7f448fdbdc8dc2448e2b67cd23572 Mon Sep 17 00:00:00 2001 From: Ansariel Date: Tue, 4 Apr 2023 03:32:40 +0200 Subject: Fix CMake files related to ICU4C --- indra/llcommon/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 620b2c636c..0dcdf7f7cd 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -265,7 +265,6 @@ add_library (llcommon ${llcommon_SOURCE_FILES}) target_link_libraries( llcommon - ${ICU4C_LIBRARY} ll::apr ll::expat ll::jsoncpp @@ -274,6 +273,7 @@ target_link_libraries( ll::uriparser ll::oslibraries ll::tracy + ll::icu4c ) target_include_directories(llcommon INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) -- cgit v1.2.3 From 97b0ba2a6d2596da867043077e32065653d44f6e Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Wed, 19 Apr 2023 01:39:42 +0200 Subject: SL-19575 LLFloaterEmojiPicker - Add filter by category --- indra/llcommon/llstring.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 9afbea9afe..bdb90335e1 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -357,6 +357,7 @@ public: static void replaceNonstandardASCII( string_type& string, T replacement ); static void replaceChar( string_type& string, T target, T replacement ); static void replaceString( string_type& string, string_type target, string_type replacement ); + static void capitalize(string_type& str); static BOOL containsNonprintable(const string_type& string); static void stripNonprintable(string_type& string); @@ -1595,6 +1596,20 @@ void LLStringUtilBase::replaceTabsWithSpaces( string_type& str, size_type spa str = out_str; } +//static +template +void LLStringUtilBase::capitalize(string_type& str) +{ + if (str.size()) + { + auto last = str[0] = toupper(str[0]); + for (U32 i = 1; i < str.size(); ++i) + { + last = (last == ' ' || last == '-' || last == '_') ? str[i] = toupper(str[i]) : str[i]; + } + } +} + //static template BOOL LLStringUtilBase::containsNonprintable(const string_type& string) -- cgit v1.2.3 From 671978e3927bc3ba9fc34008bbb7efd6f07b6c81 Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Wed, 17 May 2023 14:28:36 +0200 Subject: SL-19575 Create emoji gallery (fix bug with drawing emojis in chat history) --- indra/llcommon/llstring.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index cda1791e45..d68cbaa22c 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -837,10 +837,19 @@ std::string LLStringOps::sPM; // static bool LLStringOps::isEmoji(llwchar wch) { - switch (ublock_getCode(wch)) - { + int ublock = ublock_getCode(wch); + switch (ublock) + { + case UBLOCK_GENERAL_PUNCTUATION: + case UBLOCK_LETTERLIKE_SYMBOLS: + case UBLOCK_ARROWS: + case UBLOCK_MISCELLANEOUS_TECHNICAL: + case UBLOCK_ENCLOSED_ALPHANUMERICS: + case UBLOCK_GEOMETRIC_SHAPES: case UBLOCK_MISCELLANEOUS_SYMBOLS: case UBLOCK_DINGBATS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS: case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS: case UBLOCK_EMOTICONS: case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS: -- cgit v1.2.3 From 16f0329d184f62437c296483143aef72f1aaa284 Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Fri, 7 Jul 2023 23:35:01 +0200 Subject: SL-19951 Collect used icons in a special group 'Recently used' --- indra/llcommon/llstring.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index bdb90335e1..62403969e4 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -357,6 +357,7 @@ public: static void replaceNonstandardASCII( string_type& string, T replacement ); static void replaceChar( string_type& string, T target, T replacement ); static void replaceString( string_type& string, string_type target, string_type replacement ); + static string_type capitalize(const string_type& str); static void capitalize(string_type& str); static BOOL containsNonprintable(const string_type& string); @@ -1596,6 +1597,15 @@ void LLStringUtilBase::replaceTabsWithSpaces( string_type& str, size_type spa str = out_str; } +//static +template +std::basic_string LLStringUtilBase::capitalize(const string_type& str) +{ + string_type result(str); + capitalize(result); + return result; +} + //static template void LLStringUtilBase::capitalize(string_type& str) -- cgit v1.2.3 From 2fad5a770b3583e576992d075c24bc0e25443053 Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Thu, 30 Nov 2023 13:59:14 +0100 Subject: SL-19801 Log unicode characters for debug --- indra/llcommon/llstring.cpp | 24 ++++++++++++++++++++++++ indra/llcommon/llstring.h | 2 ++ 2 files changed, 26 insertions(+) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index d68cbaa22c..81b0207038 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -365,6 +365,30 @@ S32 wchar_utf8_length(const llwchar wc) } } +std::string wchar_utf8_preview(const llwchar wc) +{ + std::ostringstream oss; + oss << std::hex << std::uppercase << (U32)wc; + + U8 out_bytes[8]; + U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes); + + if (size > 1) + { + oss << " ["; + for (U32 i = 0; i < size; ++i) + { + if (i) + { + oss << ", "; + } + oss << (int)out_bytes[i]; + } + oss << "]"; + } + + return oss.str(); +} S32 wstring_utf8_length(const LLWString& wstr) { diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 62403969e4..8def59ed7f 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -682,6 +682,8 @@ LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); // Length in bytes of this wide char in a UTF8 string LL_COMMON_API S32 wchar_utf8_length(const llwchar wc); +LL_COMMON_API std::string wchar_utf8_preview(const llwchar wc); + LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str); // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. -- cgit v1.2.3 From ae91ae43a51c58cc496f3947921fbf886c6be86e Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Mon, 15 Jan 2024 23:20:24 +0100 Subject: SL-20795 Part of previously typed emojis disappear in the 'Save settings as a preset...' option of the 'Preferences' floater --- indra/llcommon/llstring.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 81b0207038..17d69351ec 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -339,8 +339,6 @@ S32 wchar_utf8_length(const llwchar wc) { if (wc < 0x80) { - // This case will also catch negative values which are - // technically invalid. return 1; } else if (wc < 0x800) -- cgit v1.2.3 From 7075717b7c4a57d6bef60697ee506096a7c1b1ab Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Wed, 7 Feb 2024 21:26:57 +0100 Subject: SL-20363 Add Advanced option 'Debug Unicode' --- indra/llcommon/llstring.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++ indra/llcommon/llstring.h | 1 + 2 files changed, 50 insertions(+) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 17d69351ec..ab34262515 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -623,6 +623,7 @@ std::string mbcsstring_makeASCII(const std::string& wstr) } return out_str; } + std::string utf8str_removeCRLF(const std::string& utf8str) { if (0 == utf8str.length()) @@ -644,6 +645,54 @@ std::string utf8str_removeCRLF(const std::string& utf8str) return out; } +std::string utf8str_showBytesUTF8(const std::string& utf8str) +{ + std::string result; + + bool in_sequence = false; + for (U8 byte : utf8str) + { + if (byte >= 0x80) // Part of an UTF-8 sequence + { + if (!in_sequence) // Start new UTF-8 sequence + { + if (!result.empty() && result.back() != ' ') + result += ' '; // Use space as separator between ASCII and UTF-8 + result += '['; + } + else if (byte >= 0xC0) // Start another UTF-8 sequence + { + result += "] ["; // Use space as separator between UTF-8 and UTF-8 + } + else // Continue the same UTF-8 sequence + { + result += '.'; + } + result += llformat("%02X", byte); // The byte is represented in hexadecimal form + in_sequence = true; + } + else // ASCII symbol is represented as a character + { + if (in_sequence) // End of UTF-8 sequence + { + result += ']'; + if (byte != ' ') + { + result += ' '; // Use space as separator between UTF-8 and ASCII + } + } + result += byte; + in_sequence = false; + } + } + if (in_sequence) // End of UTF-8 sequence + { + result += ']'; + } + + return result; +} + #if LL_WINDOWS unsigned int ll_wstring_default_code_page() { diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 8def59ed7f..38b9c3e23c 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -743,6 +743,7 @@ LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); +LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str); #if LL_WINDOWS /* @name Windows string helpers -- cgit v1.2.3 From afc9252372b2b511bb3f7caaaa0856989bbd3f46 Mon Sep 17 00:00:00 2001 From: Alexander Gavriliuk Date: Thu, 8 Feb 2024 21:55:59 +0100 Subject: SL-20363 Option 'Debug Unicode' - show unicode values --- indra/llcommon/llstring.cpp | 87 +++++++++++++++++++++++++++++++++++++++------ indra/llcommon/llstring.h | 2 ++ 2 files changed, 78 insertions(+), 11 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index ab34262515..82dc7c9f80 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -645,49 +645,114 @@ std::string utf8str_removeCRLF(const std::string& utf8str) return out; } +llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length) +{ + switch (length) + { + case 2: + return ((utf8str[offset] & 0x1F) << 6) + + (utf8str[offset + 1] & 0x3F); + case 3: + return ((utf8str[offset] & 0x0F) << 12) + + ((utf8str[offset + 1] & 0x3F) << 6) + + (utf8str[offset + 2] & 0x3F); + case 4: + return ((utf8str[offset] & 0x07) << 18) + + ((utf8str[offset + 1] & 0x3F) << 12) + + ((utf8str[offset + 2] & 0x3F) << 6) + + (utf8str[offset + 3] & 0x3F); + case 5: + return ((utf8str[offset] & 0x03) << 24) + + ((utf8str[offset + 1] & 0x3F) << 18) + + ((utf8str[offset + 2] & 0x3F) << 12) + + ((utf8str[offset + 3] & 0x3F) << 6) + + (utf8str[offset + 4] & 0x3F); + case 6: + return ((utf8str[offset] & 0x01) << 30) + + ((utf8str[offset + 1] & 0x3F) << 24) + + ((utf8str[offset + 2] & 0x3F) << 18) + + ((utf8str[offset + 3] & 0x3F) << 12) + + ((utf8str[offset + 4] & 0x3F) << 6) + + (utf8str[offset + 5] & 0x3F); + case 7: + return ((utf8str[offset + 1] & 0x03) << 30) + + ((utf8str[offset + 2] & 0x3F) << 24) + + ((utf8str[offset + 3] & 0x3F) << 18) + + ((utf8str[offset + 4] & 0x3F) << 12) + + ((utf8str[offset + 5] & 0x3F) << 6) + + (utf8str[offset + 6] & 0x3F); + } + return LL_UNKNOWN_CHAR; +} + std::string utf8str_showBytesUTF8(const std::string& utf8str) { std::string result; bool in_sequence = false; - for (U8 byte : utf8str) + size_t sequence_size = 0; + size_t byte_index = 0; + size_t source_length = utf8str.size(); + + auto open_sequence = [&]() + { + if (!result.empty() && result.back() != '\n') + result += '\n'; // Use LF as a separator before new UTF-8 sequence + result += '['; + in_sequence = true; + }; + + auto close_sequence = [&]() + { + llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size); + if (unicode != LL_UNKNOWN_CHAR) + { + result += llformat("+%04X", unicode); + } + result += ']'; + in_sequence = false; + sequence_size = 0; + }; + + while (byte_index < source_length) { + U8 byte = utf8str[byte_index]; if (byte >= 0x80) // Part of an UTF-8 sequence { if (!in_sequence) // Start new UTF-8 sequence { - if (!result.empty() && result.back() != ' ') - result += ' '; // Use space as separator between ASCII and UTF-8 - result += '['; + open_sequence(); } else if (byte >= 0xC0) // Start another UTF-8 sequence { - result += "] ["; // Use space as separator between UTF-8 and UTF-8 + close_sequence(); + open_sequence(); } else // Continue the same UTF-8 sequence { result += '.'; } result += llformat("%02X", byte); // The byte is represented in hexadecimal form - in_sequence = true; + ++sequence_size; } else // ASCII symbol is represented as a character { if (in_sequence) // End of UTF-8 sequence { - result += ']'; - if (byte != ' ') + close_sequence(); + if (byte != '\n') { - result += ' '; // Use space as separator between UTF-8 and ASCII + result += '\n'; // Use LF as a separator between UTF-8 and ASCII } } result += byte; - in_sequence = false; } + ++byte_index; } + if (in_sequence) // End of UTF-8 sequence { - result += ']'; + close_sequence(); } return result; diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 38b9c3e23c..bfbf25d9ab 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -743,6 +743,8 @@ LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); +LL_COMMON_API llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length); + LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str); #if LL_WINDOWS -- cgit v1.2.3