diff options
Diffstat (limited to 'indra/llcommon')
| -rw-r--r-- | indra/llcommon/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | indra/llcommon/llcoros.h | 2 | ||||
| -rw-r--r-- | indra/llcommon/llsingleton.h | 2 | ||||
| -rw-r--r-- | indra/llcommon/llstring.cpp | 175 | ||||
| -rw-r--r-- | indra/llcommon/llstring.h | 32 | ||||
| -rw-r--r-- | indra/llcommon/tests/llsingleton_test.cpp | 6 | 
6 files changed, 212 insertions, 7 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 80bc95ffba..26955cfc08 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -3,6 +3,7 @@  project(llcommon)  include(00-Common) +include(ICU4C)  include(LLCommon)  include(bugsplat)  include(Linking) @@ -283,6 +284,7 @@ target_link_libraries(          ll::uriparser          ll::oslibraries          ll::tracy +        ll::icu4c      )  target_include_directories(llcommon INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/indra/llcommon/llcoros.h b/indra/llcommon/llcoros.h index 966ce03296..fd878f20ad 100644 --- a/indra/llcommon/llcoros.h +++ b/indra/llcommon/llcoros.h @@ -92,7 +92,7 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>      LLSINGLETON(LLCoros);      ~LLCoros(); -    void cleanupSingleton(); +    void cleanupSingleton() override;  public:      /// The viewer's use of the term "coroutine" became deeply embedded before      /// the industry term "fiber" emerged to distinguish userland threads from diff --git a/indra/llcommon/llsingleton.h b/indra/llcommon/llsingleton.h index 51ef514cf7..cbe5ab6406 100644 --- a/indra/llcommon/llsingleton.h +++ b/indra/llcommon/llsingleton.h @@ -802,7 +802,7 @@ public:  private:                                                                \      /* implement LLSingleton pure virtual method whose sole purpose */  \      /* is to remind people to use this macro */                         \ -    virtual void you_must_use_LLSINGLETON_macro() {}                    \ +    virtual void you_must_use_LLSINGLETON_macro() override {}                    \      friend class LLSingleton<DERIVED_CLASS>;                            \      DERIVED_CLASS(__VA_ARGS__) diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 48551ab375..98c9d20cdd 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -30,6 +30,7 @@  #include "llerror.h"  #include "llfasttimer.h"  #include "llsd.h" +#include <unicode/uchar.h>  #include <vector>  #if LL_WINDOWS @@ -338,8 +339,6 @@ S32 wchar_utf8_length(const llwchar wc)  {  	if (wc < 0x80)  	{ -		// This case will also catch negative values which are -		// technically invalid.  		return 1;  	}  	else if (wc < 0x800) @@ -364,6 +363,30 @@ S32 wchar_utf8_length(const llwchar wc)  	}  } +std::string wchar_utf8_preview(const llwchar wc) +{ +    std::ostringstream oss; +    oss << std::hex << std::uppercase << (U32)wc; + +    U8 out_bytes[8]; +    U32 size = (U32)wchar_to_utf8chars(wc, (char*)out_bytes); + +    if (size > 1) +    { +        oss << " ["; +        for (U32 i = 0; i < size; ++i) +        { +            if (i) +            { +                oss << ", "; +            } +            oss << (int)out_bytes[i]; +        } +        oss << "]"; +    } + +    return oss.str(); +}  S32 wstring_utf8_length(const LLWString& wstr)  { @@ -600,6 +623,7 @@ std::string mbcsstring_makeASCII(const std::string& wstr)  	}  	return out_str;  } +  std::string utf8str_removeCRLF(const std::string& utf8str)  {  	if (0 == utf8str.length()) @@ -621,6 +645,119 @@ std::string utf8str_removeCRLF(const std::string& utf8str)  	return out;  } +llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length) +{ +    switch (length) +    { +    case 2: +        return ((utf8str[offset] & 0x1F) << 6) + +                (utf8str[offset + 1] & 0x3F); +    case 3: +        return ((utf8str[offset] & 0x0F) << 12) + +                ((utf8str[offset + 1] & 0x3F) << 6) + +                (utf8str[offset + 2] & 0x3F); +    case 4: +        return ((utf8str[offset] & 0x07) << 18) + +                ((utf8str[offset + 1] & 0x3F) << 12) + +                ((utf8str[offset + 2] & 0x3F) << 6) + +                (utf8str[offset + 3] & 0x3F); +    case 5: +        return ((utf8str[offset] & 0x03) << 24) + +                ((utf8str[offset + 1] & 0x3F) << 18) + +                ((utf8str[offset + 2] & 0x3F) << 12) + +                ((utf8str[offset + 3] & 0x3F) << 6) + +                (utf8str[offset + 4] & 0x3F); +    case 6: +        return ((utf8str[offset] & 0x01) << 30) + +                ((utf8str[offset + 1] & 0x3F) << 24) + +                ((utf8str[offset + 2] & 0x3F) << 18) + +                ((utf8str[offset + 3] & 0x3F) << 12) + +                ((utf8str[offset + 4] & 0x3F) << 6) + +                (utf8str[offset + 5] & 0x3F); +    case 7: +        return ((utf8str[offset + 1] & 0x03) << 30) + +                ((utf8str[offset + 2] & 0x3F) << 24) + +                ((utf8str[offset + 3] & 0x3F) << 18) + +                ((utf8str[offset + 4] & 0x3F) << 12) + +                ((utf8str[offset + 5] & 0x3F) << 6) + +                (utf8str[offset + 6] & 0x3F); +    } +    return LL_UNKNOWN_CHAR; +} + +std::string utf8str_showBytesUTF8(const std::string& utf8str) +{ +    std::string result; + +    bool in_sequence = false; +    size_t sequence_size = 0; +    size_t byte_index = 0; +    size_t source_length = utf8str.size(); + +    auto open_sequence = [&]() +        { +            if (!result.empty() && result.back() != '\n') +                result += '\n'; // Use LF as a separator before new UTF-8 sequence +            result += '['; +            in_sequence = true; +        }; + +    auto close_sequence = [&]() +        { +            llwchar unicode = utf8str_to_wchar(utf8str, byte_index - sequence_size, sequence_size); +            if (unicode != LL_UNKNOWN_CHAR) +            { +                result += llformat("+%04X", unicode); +            } +            result += ']'; +            in_sequence = false; +            sequence_size = 0; +        }; + +    while (byte_index < source_length) +    { +        U8 byte = utf8str[byte_index]; +        if (byte >= 0x80) // Part of an UTF-8 sequence +        { +            if (!in_sequence) // Start new UTF-8 sequence +            { +                open_sequence(); +            } +            else if (byte >= 0xC0) // Start another UTF-8 sequence +            { +                close_sequence(); +                open_sequence(); +            } +            else // Continue the same UTF-8 sequence +            { +                result += '.'; +            } +            result += llformat("%02X", byte); // The byte is represented in hexadecimal form +            ++sequence_size; +        } +        else // ASCII symbol is represented as a character +        { +            if (in_sequence) // End of UTF-8 sequence +            { +                close_sequence(); +                if (byte != '\n') +                { +                    result += '\n'; // Use LF as a separator between UTF-8 and ASCII +                } +            } +            result += byte; +        } +        ++byte_index; +    } + +    if (in_sequence) // End of UTF-8 sequence +    { +        close_sequence(); +    } + +    return result; +} +  #if LL_WINDOWS  unsigned int ll_wstring_default_code_page()  { @@ -833,6 +970,40 @@ std::string LLStringOps::sDayFormat;  std::string LLStringOps::sAM;  std::string LLStringOps::sPM; +// static +bool LLStringOps::isEmoji(llwchar wch) +{ +	int ublock = ublock_getCode(wch); +	switch (ublock) +	{ +		case UBLOCK_GENERAL_PUNCTUATION: +		case UBLOCK_LETTERLIKE_SYMBOLS: +		case UBLOCK_ARROWS: +		case UBLOCK_MISCELLANEOUS_TECHNICAL: +		case UBLOCK_ENCLOSED_ALPHANUMERICS: +		case UBLOCK_GEOMETRIC_SHAPES: +		case UBLOCK_MISCELLANEOUS_SYMBOLS: +		case UBLOCK_DINGBATS: +		case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: +		case UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS: +		case UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS: +		case UBLOCK_EMOTICONS: +		case UBLOCK_TRANSPORT_AND_MAP_SYMBOLS: +#if U_ICU_VERSION_MAJOR_NUM > 56 +		// Boost uses ICU so we can't update it independently +		case UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS: +#endif // U_ICU_VERSION_MAJOR_NUM > 56 +			return true; +		default: +#if U_ICU_VERSION_MAJOR_NUM > 56 +			return false; +#else +			// See https://en.wikipedia.org/wiki/Supplemental_Symbols_and_Pictographs +			return wch >= 0x1F900 && wch <= 0x1F9FF; +#endif // U_ICU_VERSION_MAJOR_NUM > 56 +	} +} +  S32	LLStringOps::collate(const llwchar* a, const llwchar* b)  {  diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 6893b8ebff..605d0ac4d7 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -190,6 +190,8 @@ public:  	static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; }  	static bool isAlnum(llwchar a) { return iswalnum(a) != 0; } +	static bool isEmoji(llwchar wch); +  	static S32	collate(const char* a, const char* b) { return strcoll(a, b); }  	static S32	collate(const llwchar* a, const llwchar* b); @@ -356,6 +358,8 @@ public:  	static void	replaceNonstandardASCII( string_type& string, T replacement );  	static void	replaceChar( string_type& string, T target, T replacement );  	static void replaceString( string_type& string, string_type target, string_type replacement ); +	static string_type capitalize(const string_type& str); +	static void capitalize(string_type& str);  	static bool	containsNonprintable(const string_type& string);  	static void	stripNonprintable(string_type& string); @@ -679,6 +683,8 @@ LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr);  // Length in bytes of this wide char in a UTF8 string  LL_COMMON_API S32 wchar_utf8_length(const llwchar wc);  +LL_COMMON_API std::string wchar_utf8_preview(const llwchar wc); +  LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str);  // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. @@ -738,6 +744,9 @@ LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str);  LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); +LL_COMMON_API llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length); + +LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str);  #if LL_WINDOWS  /* @name Windows string helpers @@ -1595,6 +1604,29 @@ void LLStringUtilBase<T>::replaceTabsWithSpaces( string_type& str, size_type spa  }  //static +template<class T> +std::basic_string<T> LLStringUtilBase<T>::capitalize(const string_type& str) +{ +	string_type result(str); +	capitalize(result); +	return result; +} + +//static +template<class T> +void LLStringUtilBase<T>::capitalize(string_type& str) +{ +	if (str.size()) +	{ +		auto last = str[0] = toupper(str[0]); +		for (U32 i = 1; i < str.size(); ++i) +		{ +			last = (last == ' ' || last == '-' || last == '_') ? str[i] = toupper(str[i]) : str[i]; +		} +	} +} + +//static  template<class T>   bool LLStringUtilBase<T>::containsNonprintable(const string_type& string)  { diff --git a/indra/llcommon/tests/llsingleton_test.cpp b/indra/llcommon/tests/llsingleton_test.cpp index 15ffe68e67..6f8aaaa0cb 100644 --- a/indra/llcommon/tests/llsingleton_test.cpp +++ b/indra/llcommon/tests/llsingleton_test.cpp @@ -47,8 +47,8 @@ public:                                             \          DEP_INIT  /* dependency in initSingleton */ \      } sDepFlag;                                     \                                                      \ -    void initSingleton();                           \ -    void cleanupSingleton();                        \ +    void initSingleton() override;                  \ +    void cleanupSingleton() override;               \  };                                                  \                                                      \  CLS::dep_flag CLS::sDepFlag = DEP_NONE @@ -300,7 +300,7 @@ namespace tut      {          LLSINGLETON_EMPTY_CTOR(CircularPInit);      public: -        virtual void initSingleton() +        virtual void initSingleton() override          {              // never mind indirection, just go straight for the circularity              CircularPInit *pt = getInstance();  | 
