diff options
Diffstat (limited to 'indra/llcommon/llstring.h')
-rw-r--r-- | indra/llcommon/llstring.h | 4071 |
1 files changed, 2039 insertions, 2032 deletions
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index a20a40e205..ac05dc3cd0 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -1,2032 +1,2039 @@ -/** - * @file llstring.h - * @brief String utility functions and std::string class. - * - * $LicenseInfo:firstyear=2001&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA - * $/LicenseInfo$ - */ - -#ifndef LL_LLSTRING_H -#define LL_LLSTRING_H - -#include <boost/call_traits.hpp> -#include <optional> -#include <string> -#include <string_view> -#include <cstdio> -#include <cwchar> // std::wcslen() -//#include <locale> -#include <iomanip> -#include <algorithm> -#include <vector> -#include <map> -#include "llformat.h" - -#if LL_LINUX -#include <wctype.h> -#include <wchar.h> -#endif - -#include <string.h> -#include <boost/scoped_ptr.hpp> - -const char LL_UNKNOWN_CHAR = '?'; -class LLSD; - -#if LL_DARWIN || LL_LINUX -// Template specialization of char_traits for U16s. Only necessary on Mac and Linux (exists on Windows already) -#include <cstring> - -namespace std -{ -template<> -struct char_traits<U16> -{ - typedef U16 char_type; - typedef int int_type; - typedef streampos pos_type; - typedef streamoff off_type; - typedef mbstate_t state_type; - - static void - assign(char_type& __c1, const char_type& __c2) - { __c1 = __c2; } - - static bool - eq(const char_type& __c1, const char_type& __c2) - { return __c1 == __c2; } - - static bool - lt(const char_type& __c1, const char_type& __c2) - { return __c1 < __c2; } - - static int - compare(const char_type* __s1, const char_type* __s2, size_t __n) - { return memcmp(__s1, __s2, __n * sizeof(char_type)); } - - static size_t - length(const char_type* __s) - { - const char_type *cur_char = __s; - while (*cur_char != 0) - { - ++cur_char; - } - return cur_char - __s; - } - - static const char_type* - find(const char_type* __s, size_t __n, const char_type& __a) - { return static_cast<const char_type*>(memchr(__s, __a, __n * sizeof(char_type))); } - - static char_type* - move(char_type* __s1, const char_type* __s2, size_t __n) - { return static_cast<char_type*>(memmove(__s1, __s2, __n * sizeof(char_type))); } - - static char_type* - copy(char_type* __s1, const char_type* __s2, size_t __n) - { return static_cast<char_type*>(memcpy(__s1, __s2, __n * sizeof(char_type))); } /* Flawfinder: ignore */ - - static char_type* - assign(char_type* __s, size_t __n, char_type __a) - { - // This isn't right. - //return static_cast<char_type*>(memset(__s, __a, __n * sizeof(char_type))); - - // I don't think there's a standard 'memset' for 16-bit values. - // Do this the old-fashioned way. - - size_t __i; - for(__i = 0; __i < __n; __i++) - { - __s[__i] = __a; - } - return __s; - } - - static char_type - to_char_type(const int_type& __c) - { return static_cast<char_type>(__c); } - - static int_type - to_int_type(const char_type& __c) - { return static_cast<int_type>(__c); } - - static bool - eq_int_type(const int_type& __c1, const int_type& __c2) - { return __c1 == __c2; } - - static int_type - eof() { return static_cast<int_type>(EOF); } - - static int_type - not_eof(const int_type& __c) - { return (__c == eof()) ? 0 : __c; } - }; -}; -#endif - -class LL_COMMON_API LLStringOps -{ -private: - static long sPacificTimeOffset; - static long sLocalTimeOffset; - static bool sPacificDaylightTime; - - static std::map<std::string, std::string> datetimeToCodes; - -public: - static std::vector<std::string> sWeekDayList; - static std::vector<std::string> sWeekDayShortList; - static std::vector<std::string> sMonthList; - static std::vector<std::string> sMonthShortList; - static std::string sDayFormat; - - static std::string sAM; - static std::string sPM; - - static char toUpper(char elem) { return toupper((unsigned char)elem); } - static llwchar toUpper(llwchar elem) { return towupper(elem); } - - static char toLower(char elem) { return tolower((unsigned char)elem); } - static llwchar toLower(llwchar elem) { return towlower(elem); } - - static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; } - static bool isSpace(llwchar elem) { return iswspace(elem) != 0; } - - static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; } - static bool isUpper(llwchar elem) { return iswupper(elem) != 0; } - - static bool isLower(char elem) { return islower((unsigned char)elem) != 0; } - static bool isLower(llwchar elem) { return iswlower(elem) != 0; } - - static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; } - static bool isDigit(llwchar a) { return iswdigit(a) != 0; } - - static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; } - static bool isPunct(llwchar a) { return iswpunct(a) != 0; } - - static bool isAlpha(char a) { return isalpha((unsigned char)a) != 0; } - static bool isAlpha(llwchar a) { return iswalpha(a) != 0; } - - static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; } - static bool isAlnum(llwchar a) { return iswalnum(a) != 0; } - - static bool isEmoji(llwchar wch); - - static S32 collate(const char* a, const char* b) { return strcoll(a, b); } - static S32 collate(const llwchar* a, const llwchar* b); - - static void setupDatetimeInfo(bool pacific_daylight_time); - - static void setupWeekDaysNames(const std::string& data); - static void setupWeekDaysShortNames(const std::string& data); - static void setupMonthNames(const std::string& data); - static void setupMonthShortNames(const std::string& data); - static void setupDayFormat(const std::string& data); - - - static long getPacificTimeOffset(void) { return sPacificTimeOffset;} - static long getLocalTimeOffset(void) { return sLocalTimeOffset;} - // Is the Pacific time zone (aka server time zone) - // currently in daylight savings time? - static bool getPacificDaylightTime(void) { return sPacificDaylightTime;} - - static std::string getDatetimeCode (std::string key); - - // Express a value like 1234567 as "1.23M" - static std::string getReadableNumber(F64 num); -}; - -/** - * @brief Return a string constructed from in without crashing if the - * pointer is NULL. - */ -LL_COMMON_API std::string ll_safe_string(const char* in); -LL_COMMON_API std::string ll_safe_string(const char* in, S32 maxlen); - - -// Allowing assignments from non-strings into format_map_t is apparently -// *really* error-prone, so subclass std::string with just basic c'tors. -class LLFormatMapString -{ -public: - LLFormatMapString() {}; - LLFormatMapString(const char* s) : mString(ll_safe_string(s)) {}; - LLFormatMapString(const std::string& s) : mString(s) {}; - operator std::string() const { return mString; } - bool operator<(const LLFormatMapString& rhs) const { return mString < rhs.mString; } - std::size_t length() const { return mString.length(); } - -private: - std::string mString; -}; - -template <class T> -class LLStringUtilBase -{ -private: - static std::string sLocale; - -public: - typedef std::basic_string<T> string_type; - typedef typename string_type::size_type size_type; - -public: - ///////////////////////////////////////////////////////////////////////////////////////// - // Static Utility functions that operate on std::strings - - static const string_type null; - - typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t; - /// considers any sequence of delims as a single field separator - LL_COMMON_API static void getTokens(const string_type& instr, - std::vector<string_type >& tokens, - const string_type& delims); - /// like simple scan overload, but returns scanned vector - static std::vector<string_type> getTokens(const string_type& instr, - const string_type& delims); - /// add support for keep_delims and quotes (either could be empty string) - static void getTokens(const string_type& instr, - std::vector<string_type>& tokens, - const string_type& drop_delims, - const string_type& keep_delims, - const string_type& quotes=string_type()); - /// like keep_delims-and-quotes overload, but returns scanned vector - static std::vector<string_type> getTokens(const string_type& instr, - const string_type& drop_delims, - const string_type& keep_delims, - const string_type& quotes=string_type()); - /// add support for escapes (could be empty string) - static void getTokens(const string_type& instr, - std::vector<string_type>& tokens, - const string_type& drop_delims, - const string_type& keep_delims, - const string_type& quotes, - const string_type& escapes); - /// like escapes overload, but returns scanned vector - static std::vector<string_type> getTokens(const string_type& instr, - const string_type& drop_delims, - const string_type& keep_delims, - const string_type& quotes, - const string_type& escapes); - - LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals); - LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch); - LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions); - LL_COMMON_API static S32 format(string_type& s, const LLSD& substitutions); - LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const format_map_t& substitutions); - LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const LLSD& substitutions); - LL_COMMON_API static void setLocale (std::string inLocale); - LL_COMMON_API static std::string getLocale (void); - - static bool isValidIndex(const string_type& string, size_type i) - { - return !string.empty() && (0 <= i) && (i <= string.size()); - } - - static bool contains(const string_type& string, T c, size_type i=0) - { - return string.find(c, i) != string_type::npos; - } - - static void trimHead(string_type& string); - static void trimTail(string_type& string); - static void trim(string_type& string) { trimHead(string); trimTail(string); } - static void truncate(string_type& string, size_type count); - - static void toUpper(string_type& string); - static void toLower(string_type& string); - - // True if this is the head of s. - static bool isHead( const string_type& string, const T* s ); - - /** - * @brief Returns true if string starts with substr - * - * If etither string or substr are empty, this method returns false. - */ - static bool startsWith( - const string_type& string, - const string_type& substr); - - /** - * @brief Returns true if string ends in substr - * - * If etither string or substr are empty, this method returns false. - */ - static bool endsWith( - const string_type& string, - const string_type& substr); - - /** - * get environment string value with proper Unicode handling - * (key is always UTF-8) - * detect absence by return value == dflt - */ - static string_type getenv(const std::string& key, const string_type& dflt=""); - /** - * get optional environment string value with proper Unicode handling - * (key is always UTF-8) - * detect absence by (! return value) - */ - static std::optional<string_type> getoptenv(const std::string& key); - - static void addCRLF(string_type& string); - static void removeCRLF(string_type& string); - static void removeWindowsCR(string_type& string); - - static void replaceTabsWithSpaces( string_type& string, size_type spaces_per_tab ); - static void replaceNonstandardASCII( string_type& string, T replacement ); - static void replaceChar( string_type& string, T target, T replacement ); - static void replaceString( string_type& string, string_type target, string_type replacement ); - static string_type capitalize(const string_type& str); - static void capitalize(string_type& str); - - static bool containsNonprintable(const string_type& string); - static void stripNonprintable(string_type& string); - - /** - * Double-quote an argument string if needed, unless it's already - * double-quoted. Decide whether it's needed based on the presence of any - * character in @a triggers (default space or double-quote). If we quote - * it, escape any embedded double-quote with the @a escape string (default - * backslash). - * - * Passing triggers="" means always quote, unless it's already double-quoted. - */ - static string_type quote(const string_type& str, - const string_type& triggers=" \"", - const string_type& escape="\\"); - - /** - * @brief Unsafe way to make ascii characters. You should probably - * only call this when interacting with the host operating system. - * The 1 byte std::string does not work correctly. - * The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII - * should work. - */ - static void _makeASCII(string_type& string); - - // Conversion to other data types - static bool convertToBOOL(const string_type& string, bool& value); - static bool convertToU8(const string_type& string, U8& value); - static bool convertToS8(const string_type& string, S8& value); - static bool convertToS16(const string_type& string, S16& value); - static bool convertToU16(const string_type& string, U16& value); - static bool convertToU32(const string_type& string, U32& value); - static bool convertToS32(const string_type& string, S32& value); - static bool convertToF32(const string_type& string, F32& value); - static bool convertToF64(const string_type& string, F64& value); - - ///////////////////////////////////////////////////////////////////////////////////////// - // Utility functions for working with char*'s and strings - - // Like strcmp but also handles empty strings. Uses - // current locale. - static S32 compareStrings(const T* lhs, const T* rhs); - static S32 compareStrings(const string_type& lhs, const string_type& rhs); - - // case insensitive version of above. Uses current locale on - // Win32, and falls back to a non-locale aware comparison on - // Linux. - static S32 compareInsensitive(const T* lhs, const T* rhs); - static S32 compareInsensitive(const string_type& lhs, const string_type& rhs); - - // Case sensitive comparison with good handling of numbers. Does not use current locale. - // a.k.a. strdictcmp() - static S32 compareDict(const string_type& a, const string_type& b); - - // Case *in*sensitive comparison with good handling of numbers. Does not use current locale. - // a.k.a. strdictcmp() - static S32 compareDictInsensitive(const string_type& a, const string_type& b); - - // Puts compareDict() in a form appropriate for LL container classes to use for sorting. - static bool precedesDict( const string_type& a, const string_type& b ); - - // A replacement for strncpy. - // If the dst buffer is dst_size bytes long or more, ensures that dst is null terminated and holds - // up to dst_size-1 characters of src. - static void copy(T* dst, const T* src, size_type dst_size); - - // Copies src into dst at a given offset. - static void copyInto(string_type& dst, const string_type& src, size_type offset); - - static bool isPartOfWord(T c) { return (c == (T)'_') || LLStringOps::isAlnum(c); } - - -#ifdef _DEBUG - LL_COMMON_API static void testHarness(); -#endif - -private: - LL_COMMON_API static size_type getSubstitution(const string_type& instr, size_type& start, std::vector<string_type >& tokens); -}; - -template<class T> const std::basic_string<T> LLStringUtilBase<T>::null; -template<class T> std::string LLStringUtilBase<T>::sLocale; - -typedef LLStringUtilBase<char> LLStringUtil; -typedef LLStringUtilBase<llwchar> LLWStringUtil; -typedef std::basic_string<llwchar> LLWString; - -//@ Use this where we want to disallow input in the form of "foo" -// This is used to catch places where english text is embedded in the code -// instead of in a translatable XUI file. -class LLStringExplicit : public std::string -{ -public: - explicit LLStringExplicit(const char* s) : std::string(s) {} - LLStringExplicit(const std::string& s) : std::string(s) {} - LLStringExplicit(const std::string& s, size_type pos, size_type n = std::string::npos) : std::string(s, pos, n) {} -}; - -struct LLDictionaryLess -{ -public: - bool operator()(const std::string& a, const std::string& b) const - { - return (LLStringUtil::precedesDict(a, b)); - } -}; - - -/** - * Simple support functions - */ - -/** - * @brief chop off the trailing characters in a string. - * - * This function works on bytes rather than glyphs, so this will - * incorrectly truncate non-single byte strings. - * Use utf8str_truncate() for utf8 strings - * @return a copy of in string minus the trailing count bytes. - */ -inline std::string chop_tail_copy( - const std::string& in, - std::string::size_type count) -{ - return std::string(in, 0, in.length() - count); -} - -/** - * @brief This translates a nybble stored as a hex value from 0-f back - * to a nybble in the low order bits of the return byte. - */ -LL_COMMON_API bool is_char_hex(char hex); -LL_COMMON_API U8 hex_as_nybble(char hex); - -/** - * @brief read the contents of a file into a string. - * - * Since this function has no concept of character encoding, most - * anything you do with this method ill-advised. Please avoid. - * @param str [out] The string which will have. - * @param filename The full name of the file to read. - * @return Returns true on success. If false, str is unmodified. - */ -LL_COMMON_API bool _read_file_into_string(std::string& str, const std::string& filename); -LL_COMMON_API bool iswindividual(llwchar elem); - -/** - * Unicode support - */ - -/// generic conversion aliases -template<typename TO, typename FROM, typename Enable=void> -struct ll_convert_impl -{ - // Don't even provide a generic implementation. We specialize for every - // combination we do support. - TO operator()(const FROM& in) const; -}; - -// Use a function template to get the nice ll_convert<TO>(from_value) API. -template<typename TO, typename FROM> -TO ll_convert(const FROM& in) -{ - return ll_convert_impl<TO, FROM>()(in); -} - -// degenerate case -template<typename T> -struct ll_convert_impl<T, T> -{ - T operator()(const T& in) const { return in; } -}; - -// simple construction from char* -template<typename T> -struct ll_convert_impl<T, const typename T::value_type*> -{ - T operator()(const typename T::value_type* in) const { return { in }; } -}; - -// specialize ll_convert_impl<TO, FROM> to return EXPR -#define ll_convert_alias(TO, FROM, EXPR) \ -template<> \ -struct ll_convert_impl<TO, FROM> \ -{ \ - /* param_type optimally passes both char* and string */ \ - TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \ -} - -// If all we're doing is copying characters, pass this to ll_convert_alias as -// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl -// specialization above, it implies TO{ in.begin(), in.end() }. -#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } - -// Generic name for strlen() / wcslen() - the default implementation should -// (!) work with U16 and llwchar, but we don't intend to engage it. -template <typename CHARTYPE> -size_t ll_convert_length(const CHARTYPE* zstr) -{ - const CHARTYPE* zp; - // classic C string scan - for (zp = zstr; *zp; ++zp) - ; - return (zp - zstr); -} - -// specialize where we have a library function; may use intrinsic operations -template <> -inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); } -template <> -inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); } - -// ll_convert_forms() is short for a bunch of boilerplate. It defines -// longname(const char*, len), longname(const char*), longname(const string&) -// and longname(const string&, len) so calls written pre-ll_convert() will -// work. Most of these overloads will be unified once we turn on C++17 and can -// use std::string_view. -// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*) -// and ll_convert<OUTSTR>(const string&) will work. -#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \ -LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ -inline auto longname(const INSTR& in, size_t len) \ -{ \ - return longname(in.c_str(), len); \ -} \ -inline auto longname(const INSTR::value_type* in) \ -{ \ - return longname(in, ll_convert_length(in)); \ -} \ -inline auto longname(const INSTR& in) \ -{ \ - return longname(in.c_str(), in.length()); \ -} \ -/* string param */ \ -aliasmacro(OUTSTR, INSTR, longname(in)); \ -/* char* param */ \ -aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) - -// Make the incoming string a utf8 string. Replaces any unknown glyph -// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest -// of the data may not be recovered. -LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); - -// -// We should never use UTF16 except when communicating with Win32! -// https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t -// nat 2018-12-14: I consider the whole llutf16string thing a mistake, because -// the Windows APIs we want to call are all defined in terms of wchar_t* -// (or worse, LPCTSTR). -// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types - -// While there is no point coding for an ASCII-only world (! defined(UNICODE)), -// use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going -// forward, we should code in terms of wchar_t and std::wstring so as to -// support either setting of /Zc:wchar_t. - -// The first link above states that char can be used to hold ASCII or any -// multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t -// (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base: -// * char and std::string always hold UTF-8 (of which ASCII is a subset). It -// is a BUG if they are used to pass strings in any other multi-byte -// encoding. -// * wchar_t and std::wstring should be our interface to Windows wide-string -// APIs, and therefore hold UTF-16LE. -// * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do -// not introduce new uses of U16 or llutf16string for string data. -// * llwchar and LLWString hold UTF-32 strings. -// * Do not introduce char16_t or std::u16string. -// * Do not introduce char32_t or std::u32string. -// -// This typedef may or may not be identical to std::wstring, depending on -// LL_WCHAR_T_NATIVE. -typedef std::basic_string<U16> llutf16string; - -// Considering wchar_t, llwchar and U16, there are three relevant cases: -#if LLWCHAR_IS_WCHAR_T // every which way but Windows -// llwchar is identical to wchar_t, LLWString is identical to std::wstring. -// U16 is distinct, llutf16string is distinct (though pretty useless). -// Given conversions to/from LLWString and to/from llutf16string, conversions -// involving std::wstring would collide. -#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing -// but we can define conversions involving llutf16string without collisions -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) - -#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t -// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. -// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string -// can all be defined. -#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) - -#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- -// wchar_t is identical to U16, std::wstring is identical to llutf16string. -// Given conversions to/from LLWString and to/from std::wstring, conversions -// involving llutf16string would collide. -#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -// but we can define conversions involving std::wstring without collisions -#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) -#endif - -ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring); -ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str); -ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str); -ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring); - -// Same function, better name. JC -inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } - -LL_COMMON_API std::ptrdiff_t wchar_to_utf8chars(llwchar inchar, char* outchars); - -ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str); -ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); - -// an older alias for utf16str_to_utf8str(llutf16string) -inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} - -// Length of this UTF32 string in bytes when transformed to UTF8 -LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); - -// Length in bytes of this wide char in a UTF8 string -LL_COMMON_API S32 wchar_utf8_length(const llwchar wc); - -LL_COMMON_API std::string wchar_utf8_preview(const llwchar wc); - -LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str); - -// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. -LL_COMMON_API S32 utf16str_wstring_length(const llutf16string &utf16str, S32 len); - -// Length in utf16string (UTF-16) of wlen wchars beginning at woffset. -LL_COMMON_API S32 wstring_utf16_length(const LLWString & wstr, S32 woffset, S32 wlen); - -// Length in wstring (i.e., llwchar count) of a part of a wstring specified by utf16 length (i.e., utf16 units.) -LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, S32 woffset, S32 utf16_length, bool *unaligned = nullptr); - -/** - * @brief Properly truncate a utf8 string to a maximum byte count. - * - * The returned string may be less than max_len if the truncation - * happens in the middle of a glyph. If max_len is longer than the - * string passed in, the return value == utf8str. - * @param utf8str A valid utf8 string to truncate. - * @param max_len The maximum number of bytes in the return value. - * @return Returns a valid utf8 string with byte count <= max_len. - */ -LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len); - -LL_COMMON_API std::string utf8str_trim(const std::string& utf8str); - -LL_COMMON_API S32 utf8str_compare_insensitive( - const std::string& lhs, - const std::string& rhs); - -/** -* @brief Properly truncate a utf8 string to a maximum character count. -* -* If symbol_len is longer than the string passed in, the return -* value == utf8str. -* @param utf8str A valid utf8 string to truncate. -* @param symbol_len The maximum number of symbols in the return value. -* @return Returns a valid utf8 string with symbol count <= max_len. -*/ -LL_COMMON_API std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len); - -/** - * @brief Replace all occurences of target_char with replace_char - * - * @param utf8str A utf8 string to process. - * @param target_char The wchar to be replaced - * @param replace_char The wchar which is written on replace - */ -LL_COMMON_API std::string utf8str_substChar( - const std::string& utf8str, - const llwchar target_char, - const llwchar replace_char); - -LL_COMMON_API std::string utf8str_makeASCII(const std::string& utf8str); - -// Hack - used for evil notecards. -LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); - -LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); - -LL_COMMON_API llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length); - -LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str); - -#if LL_WINDOWS -/* @name Windows string helpers - */ -//@{ - -/** - * @brief Convert a wide string to/from std::string - * Convert a Windows wide string to/from our LLWString - * - * This replaces the unsafe W2A macro from ATL. - */ -// Avoid requiring this header to #include the Windows header file declaring -// our actual default code_page by delegating this function to our .cpp file. -LL_COMMON_API unsigned int ll_wstring_default_code_page(); - -// This is like ll_convert_forms(), with the added complexity of a code page -// parameter that may or may not be passed. -#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ -/* declare the only nontrivial implementation (in .cpp file) */ \ -LL_COMMON_API OUTSTR longname( \ - const INSTR::value_type* in, \ - size_t len, \ - unsigned int code_page=ll_wstring_default_code_page()); \ -/* if passed only a char pointer, scan for nul terminator */ \ -inline auto longname(const INSTR::value_type* in) \ -{ \ - return longname(in, ll_convert_length(in)); \ -} \ -/* if passed string and length, extract its char pointer */ \ -inline auto longname( \ - const INSTR& in, \ - size_t len, \ - unsigned int code_page=ll_wstring_default_code_page()) \ -{ \ - return longname(in.c_str(), len, code_page); \ -} \ -/* if passed only a string object, no scan, pass known length */ \ -inline auto longname(const INSTR& in) \ -{ \ - return longname(in.c_str(), in.length()); \ -} \ -aliasmacro(OUTSTR, INSTR, longname(in)); \ -aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) - -ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string); -ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide); - ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring); - ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide); - -/** - * Converts incoming string into utf8 string - * - */ -LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in); - -/// Get Windows message string for passed GetLastError() code -// VS 2013 doesn't let us forward-declare this template, which is what we -// started with, so the implementation could reference the specialization we -// haven't yet declared. Somewhat weirdly, just stating the generic -// implementation in terms of the specialization works, even in this order... - -// the general case is just a conversion from the sole implementation -// Microsoft says DWORD is a typedef for unsigned long -// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types -// so rather than drag windows.h into everybody's include space... -template<typename STRING> -STRING windows_message(unsigned long error) -{ - return ll_convert<STRING>(windows_message<std::wstring>(error)); -} - -/// There's only one real implementation -template<> -LL_COMMON_API std::wstring windows_message<std::wstring>(unsigned long error); - -/// Get Windows message string, implicitly calling GetLastError() -template<typename STRING> -STRING windows_message() { return windows_message<STRING>(GetLastError()); } - -//@} - -LL_COMMON_API std::optional<std::wstring> llstring_getoptenv(const std::string& key); - -#else // ! LL_WINDOWS - -LL_COMMON_API std::optional<std::string> llstring_getoptenv(const std::string& key); - -#endif // ! LL_WINDOWS - -/** - * Many of the 'strip' and 'replace' methods of LLStringUtilBase need - * specialization to work with the signed char type. - * Sadly, it is not possible (AFAIK) to specialize a single method of - * a template class. - * That stuff should go here. - */ -namespace LLStringFn -{ - /** - * @brief Replace all non-printable characters with replacement in - * string. - * NOTE - this will zap non-ascii - * - * @param [in,out] string the to modify. out value is the string - * with zero non-printable characters. - * @param The replacement character. use LL_UNKNOWN_CHAR if unsure. - */ - LL_COMMON_API void replace_nonprintable_in_ascii( - std::basic_string<char>& string, - char replacement); - - - /** - * @brief Replace all non-printable characters and pipe characters - * with replacement in a string. - * NOTE - this will zap non-ascii - * - * @param [in,out] the string to modify. out value is the string - * with zero non-printable characters and zero pipe characters. - * @param The replacement character. use LL_UNKNOWN_CHAR if unsure. - */ - LL_COMMON_API void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str, - char replacement); - - - /** - * @brief Remove all characters that are not allowed in XML 1.0. - * Returns a copy of the string with those characters removed. - * Works with US ASCII and UTF-8 encoded strings. JC - */ - LL_COMMON_API std::string strip_invalid_xml(const std::string& input); - - - /** - * @brief Replace all control characters (0 <= c < 0x20) with replacement in - * string. This is safe for utf-8 - * - * @param [in,out] string the to modify. out value is the string - * with zero non-printable characters. - * @param The replacement character. use LL_UNKNOWN_CHAR if unsure. - */ - LL_COMMON_API void replace_ascii_controlchars( - std::basic_string<char>& string, - char replacement); -} - -//////////////////////////////////////////////////////////// -// NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp. -// There is no LLWStringUtil::format implementation currently. -// Calling these for anything other than LLStringUtil will produce link errors. - -//////////////////////////////////////////////////////////// - -// static -template <class T> -std::vector<typename LLStringUtilBase<T>::string_type> -LLStringUtilBase<T>::getTokens(const string_type& instr, const string_type& delims) -{ - std::vector<string_type> tokens; - getTokens(instr, tokens, delims); - return tokens; -} - -// static -template <class T> -std::vector<typename LLStringUtilBase<T>::string_type> -LLStringUtilBase<T>::getTokens(const string_type& instr, - const string_type& drop_delims, - const string_type& keep_delims, - const string_type& quotes) -{ - std::vector<string_type> tokens; - getTokens(instr, tokens, drop_delims, keep_delims, quotes); - return tokens; -} - -// static -template <class T> -std::vector<typename LLStringUtilBase<T>::string_type> -LLStringUtilBase<T>::getTokens(const string_type& instr, - const string_type& drop_delims, - const string_type& keep_delims, - const string_type& quotes, - const string_type& escapes) -{ - std::vector<string_type> tokens; - getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes); - return tokens; -} - -namespace LLStringUtilBaseImpl -{ - -/** - * Input string scanner helper for getTokens(), or really any other - * character-parsing routine that may have to deal with escape characters. - * This implementation defines the concept (also an interface, should you - * choose to implement the concept by subclassing) and provides trivial - * implementations for a string @em without escape processing. - */ -template <class T> -struct InString -{ - typedef std::basic_string<T> string_type; - typedef typename string_type::const_iterator const_iterator; - - InString(const_iterator b, const_iterator e): - mIter(b), - mEnd(e) - {} - virtual ~InString() {} - - bool done() const { return mIter == mEnd; } - /// Is the current character (*mIter) escaped? This implementation can - /// answer trivially because it doesn't support escapes. - virtual bool escaped() const { return false; } - /// Obtain the current character and advance @c mIter. - virtual T next() { return *mIter++; } - /// Does the current character match specified character? - virtual bool is(T ch) const { return (! done()) && *mIter == ch; } - /// Is the current character any one of the specified characters? - virtual bool oneof(const string_type& delims) const - { - return (! done()) && LLStringUtilBase<T>::contains(delims, *mIter); - } - - /** - * Scan forward from @from until either @a delim or end. This is primarily - * useful for processing quoted substrings. - * - * If we do see @a delim, append everything from @from until (excluding) - * @a delim to @a into, advance @c mIter to skip @a delim, and return @c - * true. - * - * If we do not see @a delim, do not alter @a into or @c mIter and return - * @c false. Do not pass GO, do not collect $200. - * - * @note The @c false case described above implements normal getTokens() - * treatment of an unmatched open quote: treat the quote character as if - * escaped, that is, simply collect it as part of the current token. Other - * plausible behaviors directly affect the way getTokens() deals with an - * unmatched quote: e.g. throwing an exception to treat it as an error, or - * assuming a close quote beyond end of string (in which case return @c - * true). - */ - virtual bool collect_until(string_type& into, const_iterator from, T delim) - { - const_iterator found = std::find(from, mEnd, delim); - // If we didn't find delim, change nothing, just tell caller. - if (found == mEnd) - return false; - // Found delim! Append everything between from and found. - into.append(from, found); - // advance past delim in input - mIter = found + 1; - return true; - } - - const_iterator mIter, mEnd; -}; - -/// InString subclass that handles escape characters -template <class T> -class InEscString: public InString<T> -{ -public: - typedef InString<T> super; - typedef typename super::string_type string_type; - typedef typename super::const_iterator const_iterator; - using super::done; - using super::mIter; - using super::mEnd; - - InEscString(const_iterator b, const_iterator e, const string_type& escapes): - super(b, e), - mEscapes(escapes) - { - // Even though we've already initialized 'mIter' via our base-class - // constructor, set it again to check for initial escape char. - setiter(b); - } - - /// This implementation uses the answer cached by setiter(). - virtual bool escaped() const { return mIsEsc; } - virtual T next() - { - // If we're looking at the escape character of an escape sequence, - // skip that character. This is the one time we can modify 'mIter' - // without using setiter: for this one case we DO NOT CARE if the - // escaped character is itself an escape. - if (mIsEsc) - ++mIter; - // If we were looking at an escape character, this is the escaped - // character; otherwise it's just the next character. - T result(*mIter); - // Advance mIter, checking for escape sequence. - setiter(mIter + 1); - return result; - } - - virtual bool is(T ch) const - { - // Like base-class is(), except that an escaped character matches - // nothing. - return (! done()) && (! mIsEsc) && *mIter == ch; - } - - virtual bool oneof(const string_type& delims) const - { - // Like base-class oneof(), except that an escaped character matches - // nothing. - return (! done()) && (! mIsEsc) && LLStringUtilBase<T>::contains(delims, *mIter); - } - - virtual bool collect_until(string_type& into, const_iterator from, T delim) - { - // Deal with escapes in the characters we collect; that is, an escaped - // character must become just that character without the preceding - // escape. Collect characters in a separate string rather than - // directly appending to 'into' in case we do not find delim, in which - // case we're supposed to leave 'into' unmodified. - string_type collected; - // For scanning purposes, we're going to work directly with 'mIter'. - // Save its current value in case we fail to see delim. - const_iterator save_iter(mIter); - // Okay, set 'mIter', checking for escape. - setiter(from); - while (! done()) - { - // If we see an unescaped delim, stop and report success. - if ((! mIsEsc) && *mIter == delim) - { - // Append collected chars to 'into'. - into.append(collected); - // Don't forget to advance 'mIter' past delim. - setiter(mIter + 1); - return true; - } - // We're not at end, and either we're not looking at delim or it's - // escaped. Collect this character and keep going. - collected.push_back(next()); - } - // Here we hit 'mEnd' without ever seeing delim. Restore mIter and tell - // caller. - setiter(save_iter); - return false; - } - -private: - void setiter(const_iterator i) - { - mIter = i; - - // Every time we change 'mIter', set 'mIsEsc' to be able to repetitively - // answer escaped() without having to rescan 'mEscapes'. mIsEsc caches - // contains(mEscapes, *mIter). - - // We're looking at an escaped char if we're not already at end (that - // is, *mIter is even meaningful); if *mIter is in fact one of the - // specified escape characters; and if there's one more character - // following it. That is, if an escape character is the very last - // character of the input string, it loses its special meaning. - mIsEsc = (! done()) && - LLStringUtilBase<T>::contains(mEscapes, *mIter) && - (mIter+1) != mEnd; - } - - const string_type mEscapes; - bool mIsEsc; -}; - -/// getTokens() implementation based on InString concept -template <typename INSTRING, typename string_type> -void getTokens(INSTRING& instr, std::vector<string_type>& tokens, - const string_type& drop_delims, const string_type& keep_delims, - const string_type& quotes) -{ - // There are times when we want to match either drop_delims or - // keep_delims. Concatenate them up front to speed things up. - string_type all_delims(drop_delims + keep_delims); - // no tokens yet - tokens.clear(); - - // try for another token - while (! instr.done()) - { - // scan past any drop_delims - while (instr.oneof(drop_delims)) - { - // skip this drop_delim - instr.next(); - // but if that was the end of the string, done - if (instr.done()) - return; - } - // found the start of another token: make a slot for it - tokens.push_back(string_type()); - if (instr.oneof(keep_delims)) - { - // *iter is a keep_delim, a token of exactly 1 character. Append - // that character to the new token and proceed. - tokens.back().push_back(instr.next()); - continue; - } - // Here we have a non-delimiter token, which might consist of a mix of - // quoted and unquoted parts. Use bash rules for quoting: you can - // embed a quoted substring in the midst of an unquoted token (e.g. - // ~/"sub dir"/myfile.txt); you can ram two quoted substrings together - // to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge - // from bash in that bash considers an unmatched quote an error. Our - // param signature doesn't allow for errors, so just pretend it's not - // a quote and embed it. - // At this level, keep scanning until we hit the next delimiter of - // either type (drop_delims or keep_delims). - while (! instr.oneof(all_delims)) - { - // If we're looking at an open quote, search forward for - // a close quote, collecting characters along the way. - if (instr.oneof(quotes) && - instr.collect_until(tokens.back(), instr.mIter+1, *instr.mIter)) - { - // collect_until is cleverly designed to do exactly what we - // need here. No further action needed if it returns true. - } - else - { - // Either *iter isn't a quote, or there's no matching close - // quote: in other words, just an ordinary char. Append it to - // current token. - tokens.back().push_back(instr.next()); - } - // having scanned that segment of this token, if we've reached the - // end of the string, we're done - if (instr.done()) - return; - } - } -} - -} // namespace LLStringUtilBaseImpl - -// static -template <class T> -void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens, - const string_type& drop_delims, const string_type& keep_delims, - const string_type& quotes) -{ - // Because this overload doesn't support escapes, use simple InString to - // manage input range. - LLStringUtilBaseImpl::InString<T> instring(string.begin(), string.end()); - LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims, quotes); -} - -// static -template <class T> -void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens, - const string_type& drop_delims, const string_type& keep_delims, - const string_type& quotes, const string_type& escapes) -{ - // This overload must deal with escapes. Delegate that to InEscString - // (unless there ARE no escapes). - std::unique_ptr< LLStringUtilBaseImpl::InString<T> > instrp; - if (escapes.empty()) - instrp.reset(new LLStringUtilBaseImpl::InString<T>(string.begin(), string.end())); - else - instrp.reset(new LLStringUtilBaseImpl::InEscString<T>(string.begin(), string.end(), escapes)); - LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims, quotes); -} - -// static -template<class T> -S32 LLStringUtilBase<T>::compareStrings(const T* lhs, const T* rhs) -{ - S32 result; - if( lhs == rhs ) - { - result = 0; - } - else - if ( !lhs || !lhs[0] ) - { - result = ((!rhs || !rhs[0]) ? 0 : 1); - } - else - if ( !rhs || !rhs[0]) - { - result = -1; - } - else - { - result = LLStringOps::collate(lhs, rhs); - } - return result; -} - -//static -template<class T> -S32 LLStringUtilBase<T>::compareStrings(const string_type& lhs, const string_type& rhs) -{ - return LLStringOps::collate(lhs.c_str(), rhs.c_str()); -} - -// static -template<class T> -S32 LLStringUtilBase<T>::compareInsensitive(const T* lhs, const T* rhs ) -{ - S32 result; - if( lhs == rhs ) - { - result = 0; - } - else - if ( !lhs || !lhs[0] ) - { - result = ((!rhs || !rhs[0]) ? 0 : 1); - } - else - if ( !rhs || !rhs[0] ) - { - result = -1; - } - else - { - string_type lhs_string(lhs); - string_type rhs_string(rhs); - LLStringUtilBase<T>::toUpper(lhs_string); - LLStringUtilBase<T>::toUpper(rhs_string); - result = LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str()); - } - return result; -} - -//static -template<class T> -S32 LLStringUtilBase<T>::compareInsensitive(const string_type& lhs, const string_type& rhs) -{ - string_type lhs_string(lhs); - string_type rhs_string(rhs); - LLStringUtilBase<T>::toUpper(lhs_string); - LLStringUtilBase<T>::toUpper(rhs_string); - return LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str()); -} - -// Case sensitive comparison with good handling of numbers. Does not use current locale. -// a.k.a. strdictcmp() - -//static -template<class T> -S32 LLStringUtilBase<T>::compareDict(const string_type& astr, const string_type& bstr) -{ - const T* a = astr.c_str(); - const T* b = bstr.c_str(); - T ca, cb; - S32 ai, bi, cnt = 0; - S32 bias = 0; - - ca = *(a++); - cb = *(b++); - while( ca && cb ){ - if( bias==0 ){ - if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); bias--; } - if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); bias++; } - }else{ - if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); } - if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); } - } - if( LLStringOps::isDigit(ca) ){ - if( cnt-->0 ){ - if( cb!=ca ) break; - }else{ - if( !LLStringOps::isDigit(cb) ) break; - for(ai=0; LLStringOps::isDigit(a[ai]); ai++); - for(bi=0; LLStringOps::isDigit(b[bi]); bi++); - if( ai<bi ){ ca=0; break; } - if( bi<ai ){ cb=0; break; } - if( ca!=cb ) break; - cnt = ai; - } - }else if( ca!=cb ){ break; - } - ca = *(a++); - cb = *(b++); - } - if( ca==cb ) ca += bias; - return ca-cb; -} - -// static -template<class T> -S32 LLStringUtilBase<T>::compareDictInsensitive(const string_type& astr, const string_type& bstr) -{ - const T* a = astr.c_str(); - const T* b = bstr.c_str(); - T ca, cb; - S32 ai, bi, cnt = 0; - - ca = *(a++); - cb = *(b++); - while( ca && cb ){ - if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); } - if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); } - if( LLStringOps::isDigit(ca) ){ - if( cnt-->0 ){ - if( cb!=ca ) break; - }else{ - if( !LLStringOps::isDigit(cb) ) break; - for(ai=0; LLStringOps::isDigit(a[ai]); ai++); - for(bi=0; LLStringOps::isDigit(b[bi]); bi++); - if( ai<bi ){ ca=0; break; } - if( bi<ai ){ cb=0; break; } - if( ca!=cb ) break; - cnt = ai; - } - }else if( ca!=cb ){ break; - } - ca = *(a++); - cb = *(b++); - } - return ca-cb; -} - -// Puts compareDict() in a form appropriate for LL container classes to use for sorting. -// static -template<class T> -bool LLStringUtilBase<T>::precedesDict( const string_type& a, const string_type& b ) -{ - if( a.size() && b.size() ) - { - return (LLStringUtilBase<T>::compareDict(a.c_str(), b.c_str()) < 0); - } - else - { - return (!b.empty()); - } -} - -//static -template<class T> -void LLStringUtilBase<T>::toUpper(string_type& string) -{ - if( !string.empty() ) - { - std::transform( - string.begin(), - string.end(), - string.begin(), - (T(*)(T)) &LLStringOps::toUpper); - } -} - -//static -template<class T> -void LLStringUtilBase<T>::toLower(string_type& string) -{ - if( !string.empty() ) - { - std::transform( - string.begin(), - string.end(), - string.begin(), - (T(*)(T)) &LLStringOps::toLower); - } -} - -//static -template<class T> -void LLStringUtilBase<T>::trimHead(string_type& string) -{ - if( !string.empty() ) - { - size_type i = 0; - while( i < string.length() && LLStringOps::isSpace( string[i] ) ) - { - i++; - } - string.erase(0, i); - } -} - -//static -template<class T> -void LLStringUtilBase<T>::trimTail(string_type& string) -{ - if( string.size() ) - { - size_type len = string.length(); - size_type i = len; - while( i > 0 && LLStringOps::isSpace( string[i-1] ) ) - { - i--; - } - - string.erase( i, len - i ); - } -} - - -// Replace line feeds with carriage return-line feed pairs. -//static -template<class T> -void LLStringUtilBase<T>::addCRLF(string_type& string) -{ - const T LF = 10; - const T CR = 13; - - // Count the number of line feeds - size_type count = 0; - size_type len = string.size(); - size_type i; - for( i = 0; i < len; i++ ) - { - if( string[i] == LF ) - { - count++; - } - } - - // Insert a carriage return before each line feed - if( count ) - { - size_type size = len + count; - T *t = new T[size]; - size_type j = 0; - for( i = 0; i < len; ++i ) - { - if( string[i] == LF ) - { - t[j] = CR; - ++j; - } - t[j] = string[i]; - ++j; - } - - string.assign(t, size); - delete[] t; - } -} - -// Remove all carriage returns -//static -template<class T> -void LLStringUtilBase<T>::removeCRLF(string_type& string) -{ - const T CR = 13; - - size_type cr_count = 0; - size_type len = string.size(); - size_type i; - for( i = 0; i < len - cr_count; i++ ) - { - if( string[i+cr_count] == CR ) - { - cr_count++; - } - - string[i] = string[i+cr_count]; - } - string.erase(i, cr_count); -} - -//static -template<class T> -void LLStringUtilBase<T>::removeWindowsCR(string_type& string) -{ - if (string.empty()) - { - return; - } - const T LF = 10; - const T CR = 13; - - size_type cr_count = 0; - size_type len = string.size(); - size_type i; - for( i = 0; i < len - cr_count - 1; i++ ) - { - if( string[i+cr_count] == CR && string[i+cr_count+1] == LF) - { - cr_count++; - } - - string[i] = string[i+cr_count]; - } - string.erase(i, cr_count); -} - -//static -template<class T> -void LLStringUtilBase<T>::replaceChar( string_type& string, T target, T replacement ) -{ - size_type found_pos = 0; - while( (found_pos = string.find(target, found_pos)) != string_type::npos ) - { - string[found_pos] = replacement; - found_pos++; // avoid infinite defeat if target == replacement - } -} - -//static -template<class T> -void LLStringUtilBase<T>::replaceString( string_type& string, string_type target, string_type replacement ) -{ - size_type found_pos = 0; - while( (found_pos = string.find(target, found_pos)) != string_type::npos ) - { - string.replace( found_pos, target.length(), replacement ); - found_pos += replacement.length(); // avoid infinite defeat if replacement contains target - } -} - -//static -template<class T> -void LLStringUtilBase<T>::replaceNonstandardASCII( string_type& string, T replacement ) -{ - const char LF = 10; - const S8 MIN = 32; -// const S8 MAX = 127; - - size_type len = string.size(); - for( size_type i = 0; i < len; i++ ) - { - // No need to test MAX < mText[i] because we treat mText[i] as a signed char, - // which has a max value of 127. - if( ( S8(string[i]) < MIN ) && (string[i] != LF) ) - { - string[i] = replacement; - } - } -} - -//static -template<class T> -void LLStringUtilBase<T>::replaceTabsWithSpaces( string_type& str, size_type spaces_per_tab ) -{ - const T TAB = '\t'; - const T SPACE = ' '; - - string_type out_str; - // Replace tabs with spaces - for (size_type i = 0; i < str.length(); i++) - { - if (str[i] == TAB) - { - for (size_type j = 0; j < spaces_per_tab; j++) - out_str += SPACE; - } - else - { - out_str += str[i]; - } - } - str = out_str; -} - -//static -template<class T> -std::basic_string<T> LLStringUtilBase<T>::capitalize(const string_type& str) -{ - string_type result(str); - capitalize(result); - return result; -} - -//static -template<class T> -void LLStringUtilBase<T>::capitalize(string_type& str) -{ - if (str.size()) - { - auto last = str[0] = toupper(str[0]); - for (U32 i = 1; i < str.size(); ++i) - { - last = (last == ' ' || last == '-' || last == '_') ? str[i] = toupper(str[i]) : str[i]; - } - } -} - -//static -template<class T> -bool LLStringUtilBase<T>::containsNonprintable(const string_type& string) -{ - const char MIN = 32; - bool rv = false; - for (size_type i = 0; i < string.size(); i++) - { - if(string[i] < MIN) - { - rv = true; - break; - } - } - return rv; -} - -// *TODO: reimplement in terms of algorithm -//static -template<class T> -void LLStringUtilBase<T>::stripNonprintable(string_type& string) -{ - const char MIN = 32; - size_type j = 0; - if (string.empty()) - { - return; - } - size_t src_size = string.size(); - char* c_string = new char[src_size + 1]; - if(c_string == NULL) - { - return; - } - copy(c_string, string.c_str(), src_size+1); - char* write_head = &c_string[0]; - for (size_type i = 0; i < src_size; i++) - { - char* read_head = &string[i]; - write_head = &c_string[j]; - if(!(*read_head < MIN)) - { - *write_head = *read_head; - ++j; - } - } - c_string[j]= '\0'; - string = c_string; - delete []c_string; -} - -// *TODO: reimplement in terms of algorithm -template<class T> -std::basic_string<T> LLStringUtilBase<T>::quote(const string_type& str, - const string_type& triggers, - const string_type& escape) -{ - size_type len(str.length()); - // If the string is already quoted, assume user knows what s/he's doing. - if (len >= 2 && str[0] == '"' && str[len-1] == '"') - { - return str; - } - - // Not already quoted: do we need to? triggers.empty() is a special case - // meaning "always quote." - if ((! triggers.empty()) && str.find_first_of(triggers) == string_type::npos) - { - // no trigger characters, don't bother quoting - return str; - } - - // For whatever reason, we must quote this string. - string_type result; - result.push_back('"'); - for (typename string_type::const_iterator ci(str.begin()), cend(str.end()); ci != cend; ++ci) - { - if (*ci == '"') - { - result.append(escape); - } - result.push_back(*ci); - } - result.push_back('"'); - return result; -} - -template<class T> -void LLStringUtilBase<T>::_makeASCII(string_type& string) -{ - // Replace non-ASCII chars with LL_UNKNOWN_CHAR - for (size_type i = 0; i < string.length(); i++) - { - if (string[i] > 0x7f) - { - string[i] = LL_UNKNOWN_CHAR; - } - } -} - -// static -template<class T> -void LLStringUtilBase<T>::copy( T* dst, const T* src, size_type dst_size ) -{ - if( dst_size > 0 ) - { - size_type min_len = 0; - if( src ) - { - min_len = llmin( dst_size - 1, strlen( src ) ); /* Flawfinder: ignore */ - memcpy(dst, src, min_len * sizeof(T)); /* Flawfinder: ignore */ - } - dst[min_len] = '\0'; - } -} - -// static -template<class T> -void LLStringUtilBase<T>::copyInto(string_type& dst, const string_type& src, size_type offset) -{ - if ( offset == dst.length() ) - { - // special case - append to end of string and avoid expensive - // (when strings are large) string manipulations - dst += src; - } - else - { - string_type tail = dst.substr(offset); - - dst = dst.substr(0, offset); - dst += src; - dst += tail; - }; -} - -// True if this is the head of s. -//static -template<class T> -bool LLStringUtilBase<T>::isHead( const string_type& string, const T* s ) -{ - if( string.empty() ) - { - // Early exit - return false; - } - else - { - return (strncmp( s, string.c_str(), string.size() ) == 0); - } -} - -// static -template<class T> -bool LLStringUtilBase<T>::startsWith( - const string_type& string, - const string_type& substr) -{ - if(string.empty() || (substr.empty())) return false; - if (substr.length() > string.length()) return false; - if (0 == string.compare(0, substr.length(), substr)) return true; - return false; -} - -// static -template<class T> -bool LLStringUtilBase<T>::endsWith( - const string_type& string, - const string_type& substr) -{ - if(string.empty() || (substr.empty())) return false; - size_t sub_len = substr.length(); - size_t str_len = string.length(); - if (sub_len > str_len) return false; - if (0 == string.compare(str_len - sub_len, sub_len, substr)) return true; - return false; -} - -// static -template<class T> -auto LLStringUtilBase<T>::getoptenv(const std::string& key) -> std::optional<string_type> -{ - auto found(llstring_getoptenv(key)); - if (found) - { - // return populated std::optional - return { ll_convert<string_type>(*found) }; - } - else - { - // empty std::optional - return {}; - } -} - -// static -template<class T> -auto LLStringUtilBase<T>::getenv(const std::string& key, const string_type& dflt) -> string_type -{ - auto found(getoptenv(key)); - if (found) - { - return *found; - } - else - { - return dflt; - } -} - -template<class T> -bool LLStringUtilBase<T>::convertToBOOL(const string_type& string, bool& value) -{ - if( string.empty() ) - { - return false; - } - - string_type temp( string ); - trim(temp); - if( - (temp == "1") || - (temp == "T") || - (temp == "t") || - (temp == "TRUE") || - (temp == "true") || - (temp == "True") ) - { - value = true; - return true; - } - else - if( - (temp == "0") || - (temp == "F") || - (temp == "f") || - (temp == "FALSE") || - (temp == "false") || - (temp == "False") ) - { - value = false; - return true; - } - - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToU8(const string_type& string, U8& value) -{ - S32 value32 = 0; - bool success = convertToS32(string, value32); - if( success && (U8_MIN <= value32) && (value32 <= U8_MAX) ) - { - value = (U8) value32; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToS8(const string_type& string, S8& value) -{ - S32 value32 = 0; - bool success = convertToS32(string, value32); - if( success && (S8_MIN <= value32) && (value32 <= S8_MAX) ) - { - value = (S8) value32; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToS16(const string_type& string, S16& value) -{ - S32 value32 = 0; - bool success = convertToS32(string, value32); - if( success && (S16_MIN <= value32) && (value32 <= S16_MAX) ) - { - value = (S16) value32; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToU16(const string_type& string, U16& value) -{ - S32 value32 = 0; - bool success = convertToS32(string, value32); - if( success && (U16_MIN <= value32) && (value32 <= U16_MAX) ) - { - value = (U16) value32; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToU32(const string_type& string, U32& value) -{ - if( string.empty() ) - { - return false; - } - - string_type temp( string ); - trim(temp); - U32 v; - std::basic_istringstream<T> i_stream((string_type)temp); - if(i_stream >> v) - { - value = v; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToS32(const string_type& string, S32& value) -{ - if( string.empty() ) - { - return false; - } - - string_type temp( string ); - trim(temp); - S32 v; - std::basic_istringstream<T> i_stream((string_type)temp); - if(i_stream >> v) - { - //TODO: figure out overflow and underflow reporting here - //if((LONG_MAX == v) || (LONG_MIN == v)) - //{ - // // Underflow or overflow - // return false; - //} - - value = v; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToF32(const string_type& string, F32& value) -{ - F64 value64 = 0.0; - bool success = convertToF64(string, value64); - if( success && (-F32_MAX <= value64) && (value64 <= F32_MAX) ) - { - value = (F32) value64; - return true; - } - return false; -} - -template<class T> -bool LLStringUtilBase<T>::convertToF64(const string_type& string, F64& value) -{ - if( string.empty() ) - { - return false; - } - - string_type temp( string ); - trim(temp); - F64 v; - std::basic_istringstream<T> i_stream((string_type)temp); - if(i_stream >> v) - { - //TODO: figure out overflow and underflow reporting here - //if( ((-HUGE_VAL == v) || (HUGE_VAL == v))) ) - //{ - // // Underflow or overflow - // return false; - //} - - value = v; - return true; - } - return false; -} - -template<class T> -void LLStringUtilBase<T>::truncate(string_type& string, size_type count) -{ - size_type cur_size = string.size(); - string.resize(count < cur_size ? count : cur_size); -} - -// The good thing about *declaration* macros, vs. usage macros, is that now -// we're done with them: we don't need them to bleed into the consuming source -// file. -#undef ll_convert_alias -#undef ll_convert_u16_alias -#undef ll_convert_wstr_alias -#undef LL_CONVERT_COPY_CHARS -#undef ll_convert_forms -#undef ll_convert_cp_forms - -#endif // LL_STRING_H +/**
+ * @file llstring.h
+ * @brief String utility functions and std::string class.
+ *
+ * $LicenseInfo:firstyear=2001&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_LLSTRING_H
+#define LL_LLSTRING_H
+
+#include <boost/call_traits.hpp>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <cstdio>
+#include <cwchar> // std::wcslen()
+//#include <locale>
+#include <iomanip>
+#include <algorithm>
+#include <vector>
+#include <map>
+#include "llformat.h"
+
+#if LL_LINUX
+#include <wctype.h>
+#include <wchar.h>
+#endif
+
+#include <string.h>
+#include <boost/scoped_ptr.hpp>
+
+const char LL_UNKNOWN_CHAR = '?';
+class LLSD;
+
+#if LL_DARWIN || LL_LINUX
+// Template specialization of char_traits for U16s. Only necessary on Mac and Linux (exists on Windows already)
+#include <cstring>
+
+namespace std
+{
+template<>
+struct char_traits<U16>
+{
+ typedef U16 char_type;
+ typedef int int_type;
+ typedef streampos pos_type;
+ typedef streamoff off_type;
+ typedef mbstate_t state_type;
+
+ static void
+ assign(char_type& __c1, const char_type& __c2)
+ { __c1 = __c2; }
+
+ static bool
+ eq(const char_type& __c1, const char_type& __c2)
+ { return __c1 == __c2; }
+
+ static bool
+ lt(const char_type& __c1, const char_type& __c2)
+ { return __c1 < __c2; }
+
+ static int
+ compare(const char_type* __s1, const char_type* __s2, size_t __n)
+ { return memcmp(__s1, __s2, __n * sizeof(char_type)); }
+
+ static size_t
+ length(const char_type* __s)
+ {
+ const char_type *cur_char = __s;
+ while (*cur_char != 0)
+ {
+ ++cur_char;
+ }
+ return cur_char - __s;
+ }
+
+ static const char_type*
+ find(const char_type* __s, size_t __n, const char_type& __a)
+ { return static_cast<const char_type*>(memchr(__s, __a, __n * sizeof(char_type))); }
+
+ static char_type*
+ move(char_type* __s1, const char_type* __s2, size_t __n)
+ { return static_cast<char_type*>(memmove(__s1, __s2, __n * sizeof(char_type))); }
+
+ static char_type*
+ copy(char_type* __s1, const char_type* __s2, size_t __n)
+ { return static_cast<char_type*>(memcpy(__s1, __s2, __n * sizeof(char_type))); } /* Flawfinder: ignore */
+
+ static char_type*
+ assign(char_type* __s, size_t __n, char_type __a)
+ {
+ // This isn't right.
+ //return static_cast<char_type*>(memset(__s, __a, __n * sizeof(char_type)));
+
+ // I don't think there's a standard 'memset' for 16-bit values.
+ // Do this the old-fashioned way.
+
+ size_t __i;
+ for(__i = 0; __i < __n; __i++)
+ {
+ __s[__i] = __a;
+ }
+ return __s;
+ }
+
+ static char_type
+ to_char_type(const int_type& __c)
+ { return static_cast<char_type>(__c); }
+
+ static int_type
+ to_int_type(const char_type& __c)
+ { return static_cast<int_type>(__c); }
+
+ static bool
+ eq_int_type(const int_type& __c1, const int_type& __c2)
+ { return __c1 == __c2; }
+
+ static int_type
+ eof() { return static_cast<int_type>(EOF); }
+
+ static int_type
+ not_eof(const int_type& __c)
+ { return (__c == eof()) ? 0 : __c; }
+ };
+};
+#endif
+
+class LL_COMMON_API LLStringOps
+{
+private:
+ static long sPacificTimeOffset;
+ static long sLocalTimeOffset;
+ static bool sPacificDaylightTime;
+
+ static std::map<std::string, std::string> datetimeToCodes;
+
+public:
+ static std::vector<std::string> sWeekDayList;
+ static std::vector<std::string> sWeekDayShortList;
+ static std::vector<std::string> sMonthList;
+ static std::vector<std::string> sMonthShortList;
+ static std::string sDayFormat;
+
+ static std::string sAM;
+ static std::string sPM;
+
+ static char toUpper(char elem) { return toupper((unsigned char)elem); }
+ static llwchar toUpper(llwchar elem) { return towupper(elem); }
+
+ static char toLower(char elem) { return tolower((unsigned char)elem); }
+ static llwchar toLower(llwchar elem) { return towlower(elem); }
+
+ static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; }
+ static bool isSpace(llwchar elem) { return iswspace(elem) != 0; }
+
+ static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; }
+ static bool isUpper(llwchar elem) { return iswupper(elem) != 0; }
+
+ static bool isLower(char elem) { return islower((unsigned char)elem) != 0; }
+ static bool isLower(llwchar elem) { return iswlower(elem) != 0; }
+
+ static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; }
+ static bool isDigit(llwchar a) { return iswdigit(a) != 0; }
+
+ static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; }
+ static bool isPunct(llwchar a) { return iswpunct(a) != 0; }
+
+ static bool isAlpha(char a) { return isalpha((unsigned char)a) != 0; }
+ static bool isAlpha(llwchar a) { return iswalpha(a) != 0; }
+
+ static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; }
+ static bool isAlnum(llwchar a) { return iswalnum(a) != 0; }
+
+ // Returns true when 'a' corresponds to a "genuine" emoji. HB
+ static bool isEmoji(llwchar a);
+
+ static S32 collate(const char* a, const char* b) { return strcoll(a, b); }
+ static S32 collate(const llwchar* a, const llwchar* b);
+
+ static void setupDatetimeInfo(bool pacific_daylight_time);
+
+ static void setupWeekDaysNames(const std::string& data);
+ static void setupWeekDaysShortNames(const std::string& data);
+ static void setupMonthNames(const std::string& data);
+ static void setupMonthShortNames(const std::string& data);
+ static void setupDayFormat(const std::string& data);
+
+
+ static long getPacificTimeOffset(void) { return sPacificTimeOffset;}
+ static long getLocalTimeOffset(void) { return sLocalTimeOffset;}
+ // Is the Pacific time zone (aka server time zone)
+ // currently in daylight savings time?
+ static bool getPacificDaylightTime(void) { return sPacificDaylightTime;}
+
+ static std::string getDatetimeCode (std::string key);
+
+ // Express a value like 1234567 as "1.23M"
+ static std::string getReadableNumber(F64 num);
+};
+
+/**
+ * @brief Return a string constructed from in without crashing if the
+ * pointer is NULL.
+ */
+LL_COMMON_API std::string ll_safe_string(const char* in);
+LL_COMMON_API std::string ll_safe_string(const char* in, S32 maxlen);
+
+
+// Allowing assignments from non-strings into format_map_t is apparently
+// *really* error-prone, so subclass std::string with just basic c'tors.
+class LLFormatMapString
+{
+public:
+ LLFormatMapString() {};
+ LLFormatMapString(const char* s) : mString(ll_safe_string(s)) {};
+ LLFormatMapString(const std::string& s) : mString(s) {};
+ operator std::string() const { return mString; }
+ bool operator<(const LLFormatMapString& rhs) const { return mString < rhs.mString; }
+ std::size_t length() const { return mString.length(); }
+
+private:
+ std::string mString;
+};
+
+template <class T>
+class LLStringUtilBase
+{
+private:
+ static std::string sLocale;
+
+public:
+ typedef std::basic_string<T> string_type;
+ typedef typename string_type::size_type size_type;
+
+public:
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // Static Utility functions that operate on std::strings
+
+ static const string_type null;
+
+ typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
+ /// considers any sequence of delims as a single field separator
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type >& tokens,
+ const string_type& delims);
+ /// like simple scan overload, but returns scanned vector
+ static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& delims);
+ /// add support for keep_delims and quotes (either could be empty string)
+ static void getTokens(const string_type& instr,
+ std::vector<string_type>& tokens,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes=string_type());
+ /// like keep_delims-and-quotes overload, but returns scanned vector
+ static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes=string_type());
+ /// add support for escapes (could be empty string)
+ static void getTokens(const string_type& instr,
+ std::vector<string_type>& tokens,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes);
+ /// like escapes overload, but returns scanned vector
+ static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes);
+
+ LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals);
+ LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch);
+ LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions);
+ LL_COMMON_API static S32 format(string_type& s, const LLSD& substitutions);
+ LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const format_map_t& substitutions);
+ LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const LLSD& substitutions);
+ LL_COMMON_API static void setLocale (std::string inLocale);
+ LL_COMMON_API static std::string getLocale (void);
+
+ static bool isValidIndex(const string_type& string, size_type i)
+ {
+ return !string.empty() && (0 <= i) && (i <= string.size());
+ }
+
+ static bool contains(const string_type& string, T c, size_type i=0)
+ {
+ return string.find(c, i) != string_type::npos;
+ }
+
+ static void trimHead(string_type& string);
+ static void trimTail(string_type& string);
+ static void trim(string_type& string) { trimHead(string); trimTail(string); }
+ static void truncate(string_type& string, size_type count);
+
+ static void toUpper(string_type& string);
+ static void toLower(string_type& string);
+
+ // True if this is the head of s.
+ static bool isHead( const string_type& string, const T* s );
+
+ /**
+ * @brief Returns true if string starts with substr
+ *
+ * If etither string or substr are empty, this method returns false.
+ */
+ static bool startsWith(
+ const string_type& string,
+ const string_type& substr);
+
+ /**
+ * @brief Returns true if string ends in substr
+ *
+ * If etither string or substr are empty, this method returns false.
+ */
+ static bool endsWith(
+ const string_type& string,
+ const string_type& substr);
+
+ /**
+ * get environment string value with proper Unicode handling
+ * (key is always UTF-8)
+ * detect absence by return value == dflt
+ */
+ static string_type getenv(const std::string& key, const string_type& dflt="");
+ /**
+ * get optional environment string value with proper Unicode handling
+ * (key is always UTF-8)
+ * detect absence by (! return value)
+ */
+ static std::optional<string_type> getoptenv(const std::string& key);
+
+ static void addCRLF(string_type& string);
+ static void removeCRLF(string_type& string);
+ static void removeWindowsCR(string_type& string);
+
+ static void replaceTabsWithSpaces( string_type& string, size_type spaces_per_tab );
+ static void replaceNonstandardASCII( string_type& string, T replacement );
+ static void replaceChar( string_type& string, T target, T replacement );
+ static void replaceString( string_type& string, string_type target, string_type replacement );
+ static string_type capitalize(const string_type& str);
+ static void capitalize(string_type& str);
+
+ static bool containsNonprintable(const string_type& string);
+ static void stripNonprintable(string_type& string);
+
+ /**
+ * Double-quote an argument string if needed, unless it's already
+ * double-quoted. Decide whether it's needed based on the presence of any
+ * character in @a triggers (default space or double-quote). If we quote
+ * it, escape any embedded double-quote with the @a escape string (default
+ * backslash).
+ *
+ * Passing triggers="" means always quote, unless it's already double-quoted.
+ */
+ static string_type quote(const string_type& str,
+ const string_type& triggers=" \"",
+ const string_type& escape="\\");
+
+ /**
+ * @brief Unsafe way to make ascii characters. You should probably
+ * only call this when interacting with the host operating system.
+ * The 1 byte std::string does not work correctly.
+ * The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII
+ * should work.
+ */
+ static void _makeASCII(string_type& string);
+
+ // Conversion to other data types
+ static bool convertToBOOL(const string_type& string, bool& value);
+ static bool convertToU8(const string_type& string, U8& value);
+ static bool convertToS8(const string_type& string, S8& value);
+ static bool convertToS16(const string_type& string, S16& value);
+ static bool convertToU16(const string_type& string, U16& value);
+ static bool convertToU32(const string_type& string, U32& value);
+ static bool convertToS32(const string_type& string, S32& value);
+ static bool convertToF32(const string_type& string, F32& value);
+ static bool convertToF64(const string_type& string, F64& value);
+
+ /////////////////////////////////////////////////////////////////////////////////////////
+ // Utility functions for working with char*'s and strings
+
+ // Like strcmp but also handles empty strings. Uses
+ // current locale.
+ static S32 compareStrings(const T* lhs, const T* rhs);
+ static S32 compareStrings(const string_type& lhs, const string_type& rhs);
+
+ // case insensitive version of above. Uses current locale on
+ // Win32, and falls back to a non-locale aware comparison on
+ // Linux.
+ static S32 compareInsensitive(const T* lhs, const T* rhs);
+ static S32 compareInsensitive(const string_type& lhs, const string_type& rhs);
+
+ // Case sensitive comparison with good handling of numbers. Does not use current locale.
+ // a.k.a. strdictcmp()
+ static S32 compareDict(const string_type& a, const string_type& b);
+
+ // Case *in*sensitive comparison with good handling of numbers. Does not use current locale.
+ // a.k.a. strdictcmp()
+ static S32 compareDictInsensitive(const string_type& a, const string_type& b);
+
+ // Puts compareDict() in a form appropriate for LL container classes to use for sorting.
+ static bool precedesDict( const string_type& a, const string_type& b );
+
+ // A replacement for strncpy.
+ // If the dst buffer is dst_size bytes long or more, ensures that dst is null terminated and holds
+ // up to dst_size-1 characters of src.
+ static void copy(T* dst, const T* src, size_type dst_size);
+
+ // Copies src into dst at a given offset.
+ static void copyInto(string_type& dst, const string_type& src, size_type offset);
+
+ static bool isPartOfWord(T c) { return (c == (T)'_') || LLStringOps::isAlnum(c); }
+
+
+#ifdef _DEBUG
+ LL_COMMON_API static void testHarness();
+#endif
+
+private:
+ LL_COMMON_API static size_type getSubstitution(const string_type& instr, size_type& start, std::vector<string_type >& tokens);
+};
+
+template<class T> const std::basic_string<T> LLStringUtilBase<T>::null;
+template<class T> std::string LLStringUtilBase<T>::sLocale;
+
+typedef LLStringUtilBase<char> LLStringUtil;
+typedef LLStringUtilBase<llwchar> LLWStringUtil;
+typedef std::basic_string<llwchar> LLWString;
+
+//@ Use this where we want to disallow input in the form of "foo"
+// This is used to catch places where english text is embedded in the code
+// instead of in a translatable XUI file.
+class LLStringExplicit : public std::string
+{
+public:
+ explicit LLStringExplicit(const char* s) : std::string(s) {}
+ LLStringExplicit(const std::string& s) : std::string(s) {}
+ LLStringExplicit(const std::string& s, size_type pos, size_type n = std::string::npos) : std::string(s, pos, n) {}
+};
+
+struct LLDictionaryLess
+{
+public:
+ bool operator()(const std::string& a, const std::string& b) const
+ {
+ return (LLStringUtil::precedesDict(a, b));
+ }
+};
+
+
+/**
+ * Simple support functions
+ */
+
+/**
+ * @brief chop off the trailing characters in a string.
+ *
+ * This function works on bytes rather than glyphs, so this will
+ * incorrectly truncate non-single byte strings.
+ * Use utf8str_truncate() for utf8 strings
+ * @return a copy of in string minus the trailing count bytes.
+ */
+inline std::string chop_tail_copy(
+ const std::string& in,
+ std::string::size_type count)
+{
+ return std::string(in, 0, in.length() - count);
+}
+
+/**
+ * @brief This translates a nybble stored as a hex value from 0-f back
+ * to a nybble in the low order bits of the return byte.
+ */
+LL_COMMON_API bool is_char_hex(char hex);
+LL_COMMON_API U8 hex_as_nybble(char hex);
+
+/**
+ * @brief read the contents of a file into a string.
+ *
+ * Since this function has no concept of character encoding, most
+ * anything you do with this method ill-advised. Please avoid.
+ * @param str [out] The string which will have.
+ * @param filename The full name of the file to read.
+ * @return Returns true on success. If false, str is unmodified.
+ */
+LL_COMMON_API bool _read_file_into_string(std::string& str, const std::string& filename);
+LL_COMMON_API bool iswindividual(llwchar elem);
+
+/**
+ * Unicode support
+ */
+
+/// generic conversion aliases
+template<typename TO, typename FROM, typename Enable=void>
+struct ll_convert_impl
+{
+ // Don't even provide a generic implementation. We specialize for every
+ // combination we do support.
+ TO operator()(const FROM& in) const;
+};
+
+// Use a function template to get the nice ll_convert<TO>(from_value) API.
+template<typename TO, typename FROM>
+TO ll_convert(const FROM& in)
+{
+ return ll_convert_impl<TO, FROM>()(in);
+}
+
+// degenerate case
+template<typename T>
+struct ll_convert_impl<T, T>
+{
+ T operator()(const T& in) const { return in; }
+};
+
+// simple construction from char*
+template<typename T>
+struct ll_convert_impl<T, const typename T::value_type*>
+{
+ T operator()(const typename T::value_type* in) const { return { in }; }
+};
+
+// specialize ll_convert_impl<TO, FROM> to return EXPR
+#define ll_convert_alias(TO, FROM, EXPR) \
+template<> \
+struct ll_convert_impl<TO, FROM> \
+{ \
+ /* param_type optimally passes both char* and string */ \
+ TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \
+}
+
+// If all we're doing is copying characters, pass this to ll_convert_alias as
+// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl
+// specialization above, it implies TO{ in.begin(), in.end() }.
+#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() }
+
+// Generic name for strlen() / wcslen() - the default implementation should
+// (!) work with U16 and llwchar, but we don't intend to engage it.
+template <typename CHARTYPE>
+size_t ll_convert_length(const CHARTYPE* zstr)
+{
+ const CHARTYPE* zp;
+ // classic C string scan
+ for (zp = zstr; *zp; ++zp)
+ ;
+ return (zp - zstr);
+}
+
+// specialize where we have a library function; may use intrinsic operations
+template <>
+inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); }
+template <>
+inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); }
+
+// ll_convert_forms() is short for a bunch of boilerplate. It defines
+// longname(const char*, len), longname(const char*), longname(const string&)
+// and longname(const string&, len) so calls written pre-ll_convert() will
+// work. Most of these overloads will be unified once we turn on C++17 and can
+// use std::string_view.
+// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*)
+// and ll_convert<OUTSTR>(const string&) will work.
+#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \
+LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \
+inline auto longname(const INSTR& in, size_t len) \
+{ \
+ return longname(in.c_str(), len); \
+} \
+inline auto longname(const INSTR::value_type* in) \
+{ \
+ return longname(in, ll_convert_length(in)); \
+} \
+inline auto longname(const INSTR& in) \
+{ \
+ return longname(in.c_str(), in.length()); \
+} \
+/* string param */ \
+aliasmacro(OUTSTR, INSTR, longname(in)); \
+/* char* param */ \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
+// Make the incoming string a utf8 string. Replaces any unknown glyph
+// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
+// of the data may not be recovered.
+LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
+
+//
+// We should never use UTF16 except when communicating with Win32!
+// https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t
+// nat 2018-12-14: I consider the whole llutf16string thing a mistake, because
+// the Windows APIs we want to call are all defined in terms of wchar_t*
+// (or worse, LPCTSTR).
+// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types
+
+// While there is no point coding for an ASCII-only world (! defined(UNICODE)),
+// use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going
+// forward, we should code in terms of wchar_t and std::wstring so as to
+// support either setting of /Zc:wchar_t.
+
+// The first link above states that char can be used to hold ASCII or any
+// multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t
+// (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base:
+// * char and std::string always hold UTF-8 (of which ASCII is a subset). It
+// is a BUG if they are used to pass strings in any other multi-byte
+// encoding.
+// * wchar_t and std::wstring should be our interface to Windows wide-string
+// APIs, and therefore hold UTF-16LE.
+// * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do
+// not introduce new uses of U16 or llutf16string for string data.
+// * llwchar and LLWString hold UTF-32 strings.
+// * Do not introduce char16_t or std::u16string.
+// * Do not introduce char32_t or std::u32string.
+//
+// This typedef may or may not be identical to std::wstring, depending on
+// LL_WCHAR_T_NATIVE.
+typedef std::basic_string<U16> llutf16string;
+
+// Considering wchar_t, llwchar and U16, there are three relevant cases:
+#if LLWCHAR_IS_WCHAR_T // every which way but Windows
+// llwchar is identical to wchar_t, LLWString is identical to std::wstring.
+// U16 is distinct, llutf16string is distinct (though pretty useless).
+// Given conversions to/from LLWString and to/from llutf16string, conversions
+// involving std::wstring would collide.
+#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing
+// but we can define conversions involving llutf16string without collisions
+#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+
+#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t
+// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types.
+// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string
+// can all be defined.
+#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+
+#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t-
+// wchar_t is identical to U16, std::wstring is identical to llutf16string.
+// Given conversions to/from LLWString and to/from std::wstring, conversions
+// involving llutf16string would collide.
+#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
+// but we can define conversions involving std::wstring without collisions
+#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+#endif
+
+ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str);
+ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring);
+
+// Same function, better name. JC
+inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
+
+LL_COMMON_API std::ptrdiff_t wchar_to_utf8chars(llwchar inchar, char* outchars);
+
+ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str);
+ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str);
+
+// an older alias for utf16str_to_utf8str(llutf16string)
+inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
+
+// Length of this UTF32 string in bytes when transformed to UTF8
+LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr);
+
+// Length in bytes of this wide char in a UTF8 string
+LL_COMMON_API S32 wchar_utf8_length(const llwchar wc);
+
+LL_COMMON_API std::string wchar_utf8_preview(const llwchar wc);
+
+LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str);
+
+// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
+LL_COMMON_API S32 utf16str_wstring_length(const llutf16string &utf16str, S32 len);
+
+// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
+LL_COMMON_API S32 wstring_utf16_length(const LLWString & wstr, S32 woffset, S32 wlen);
+
+// Length in wstring (i.e., llwchar count) of a part of a wstring specified by utf16 length (i.e., utf16 units.)
+LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, S32 woffset, S32 utf16_length, bool *unaligned = nullptr);
+
+/**
+ * @brief Properly truncate a utf8 string to a maximum byte count.
+ *
+ * The returned string may be less than max_len if the truncation
+ * happens in the middle of a glyph. If max_len is longer than the
+ * string passed in, the return value == utf8str.
+ * @param utf8str A valid utf8 string to truncate.
+ * @param max_len The maximum number of bytes in the return value.
+ * @return Returns a valid utf8 string with byte count <= max_len.
+ */
+LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len);
+
+LL_COMMON_API std::string utf8str_trim(const std::string& utf8str);
+
+LL_COMMON_API S32 utf8str_compare_insensitive(
+ const std::string& lhs,
+ const std::string& rhs);
+
+/**
+* @brief Properly truncate a utf8 string to a maximum character count.
+*
+* If symbol_len is longer than the string passed in, the return
+* value == utf8str.
+* @param utf8str A valid utf8 string to truncate.
+* @param symbol_len The maximum number of symbols in the return value.
+* @return Returns a valid utf8 string with symbol count <= max_len.
+*/
+LL_COMMON_API std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len);
+
+/**
+ * @brief Replace all occurences of target_char with replace_char
+ *
+ * @param utf8str A utf8 string to process.
+ * @param target_char The wchar to be replaced
+ * @param replace_char The wchar which is written on replace
+ */
+LL_COMMON_API std::string utf8str_substChar(
+ const std::string& utf8str,
+ const llwchar target_char,
+ const llwchar replace_char);
+
+LL_COMMON_API std::string utf8str_makeASCII(const std::string& utf8str);
+
+// Hack - used for evil notecards.
+LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str);
+
+LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
+
+LL_COMMON_API llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length);
+
+LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str);
+
+LL_COMMON_API bool wstring_has_emoji(const LLWString& wstr);
+
+LL_COMMON_API bool wstring_remove_emojis(LLWString& wstr);
+
+LL_COMMON_API bool utf8str_remove_emojis(std::string& utf8str);
+
+#if LL_WINDOWS
+/* @name Windows string helpers
+ */
+//@{
+
+/**
+ * @brief Convert a wide string to/from std::string
+ * Convert a Windows wide string to/from our LLWString
+ *
+ * This replaces the unsafe W2A macro from ATL.
+ */
+// Avoid requiring this header to #include the Windows header file declaring
+// our actual default code_page by delegating this function to our .cpp file.
+LL_COMMON_API unsigned int ll_wstring_default_code_page();
+
+// This is like ll_convert_forms(), with the added complexity of a code page
+// parameter that may or may not be passed.
+#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \
+/* declare the only nontrivial implementation (in .cpp file) */ \
+LL_COMMON_API OUTSTR longname( \
+ const INSTR::value_type* in, \
+ size_t len, \
+ unsigned int code_page=ll_wstring_default_code_page()); \
+/* if passed only a char pointer, scan for nul terminator */ \
+inline auto longname(const INSTR::value_type* in) \
+{ \
+ return longname(in, ll_convert_length(in)); \
+} \
+/* if passed string and length, extract its char pointer */ \
+inline auto longname( \
+ const INSTR& in, \
+ size_t len, \
+ unsigned int code_page=ll_wstring_default_code_page()) \
+{ \
+ return longname(in.c_str(), len, code_page); \
+} \
+/* if passed only a string object, no scan, pass known length */ \
+inline auto longname(const INSTR& in) \
+{ \
+ return longname(in.c_str(), in.length()); \
+} \
+aliasmacro(OUTSTR, INSTR, longname(in)); \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
+ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string);
+ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide);
+ ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring);
+ ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide);
+
+/**
+ * Converts incoming string into utf8 string
+ *
+ */
+LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in);
+
+/// Get Windows message string for passed GetLastError() code
+// VS 2013 doesn't let us forward-declare this template, which is what we
+// started with, so the implementation could reference the specialization we
+// haven't yet declared. Somewhat weirdly, just stating the generic
+// implementation in terms of the specialization works, even in this order...
+
+// the general case is just a conversion from the sole implementation
+// Microsoft says DWORD is a typedef for unsigned long
+// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types
+// so rather than drag windows.h into everybody's include space...
+template<typename STRING>
+STRING windows_message(unsigned long error)
+{
+ return ll_convert<STRING>(windows_message<std::wstring>(error));
+}
+
+/// There's only one real implementation
+template<>
+LL_COMMON_API std::wstring windows_message<std::wstring>(unsigned long error);
+
+/// Get Windows message string, implicitly calling GetLastError()
+template<typename STRING>
+STRING windows_message() { return windows_message<STRING>(GetLastError()); }
+
+//@}
+
+LL_COMMON_API std::optional<std::wstring> llstring_getoptenv(const std::string& key);
+
+#else // ! LL_WINDOWS
+
+LL_COMMON_API std::optional<std::string> llstring_getoptenv(const std::string& key);
+
+#endif // ! LL_WINDOWS
+
+/**
+ * Many of the 'strip' and 'replace' methods of LLStringUtilBase need
+ * specialization to work with the signed char type.
+ * Sadly, it is not possible (AFAIK) to specialize a single method of
+ * a template class.
+ * That stuff should go here.
+ */
+namespace LLStringFn
+{
+ /**
+ * @brief Replace all non-printable characters with replacement in
+ * string.
+ * NOTE - this will zap non-ascii
+ *
+ * @param [in,out] string the to modify. out value is the string
+ * with zero non-printable characters.
+ * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
+ */
+ LL_COMMON_API void replace_nonprintable_in_ascii(
+ std::basic_string<char>& string,
+ char replacement);
+
+
+ /**
+ * @brief Replace all non-printable characters and pipe characters
+ * with replacement in a string.
+ * NOTE - this will zap non-ascii
+ *
+ * @param [in,out] the string to modify. out value is the string
+ * with zero non-printable characters and zero pipe characters.
+ * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
+ */
+ LL_COMMON_API void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
+ char replacement);
+
+
+ /**
+ * @brief Remove all characters that are not allowed in XML 1.0.
+ * Returns a copy of the string with those characters removed.
+ * Works with US ASCII and UTF-8 encoded strings. JC
+ */
+ LL_COMMON_API std::string strip_invalid_xml(const std::string& input);
+
+
+ /**
+ * @brief Replace all control characters (0 <= c < 0x20) with replacement in
+ * string. This is safe for utf-8
+ *
+ * @param [in,out] string the to modify. out value is the string
+ * with zero non-printable characters.
+ * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
+ */
+ LL_COMMON_API void replace_ascii_controlchars(
+ std::basic_string<char>& string,
+ char replacement);
+}
+
+////////////////////////////////////////////////////////////
+// NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.
+// There is no LLWStringUtil::format implementation currently.
+// Calling these for anything other than LLStringUtil will produce link errors.
+
+////////////////////////////////////////////////////////////
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr, const string_type& delims)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, delims);
+ return tokens;
+}
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, drop_delims, keep_delims, quotes);
+ return tokens;
+}
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes);
+ return tokens;
+}
+
+namespace LLStringUtilBaseImpl
+{
+
+/**
+ * Input string scanner helper for getTokens(), or really any other
+ * character-parsing routine that may have to deal with escape characters.
+ * This implementation defines the concept (also an interface, should you
+ * choose to implement the concept by subclassing) and provides trivial
+ * implementations for a string @em without escape processing.
+ */
+template <class T>
+struct InString
+{
+ typedef std::basic_string<T> string_type;
+ typedef typename string_type::const_iterator const_iterator;
+
+ InString(const_iterator b, const_iterator e):
+ mIter(b),
+ mEnd(e)
+ {}
+ virtual ~InString() {}
+
+ bool done() const { return mIter == mEnd; }
+ /// Is the current character (*mIter) escaped? This implementation can
+ /// answer trivially because it doesn't support escapes.
+ virtual bool escaped() const { return false; }
+ /// Obtain the current character and advance @c mIter.
+ virtual T next() { return *mIter++; }
+ /// Does the current character match specified character?
+ virtual bool is(T ch) const { return (! done()) && *mIter == ch; }
+ /// Is the current character any one of the specified characters?
+ virtual bool oneof(const string_type& delims) const
+ {
+ return (! done()) && LLStringUtilBase<T>::contains(delims, *mIter);
+ }
+
+ /**
+ * Scan forward from @from until either @a delim or end. This is primarily
+ * useful for processing quoted substrings.
+ *
+ * If we do see @a delim, append everything from @from until (excluding)
+ * @a delim to @a into, advance @c mIter to skip @a delim, and return @c
+ * true.
+ *
+ * If we do not see @a delim, do not alter @a into or @c mIter and return
+ * @c false. Do not pass GO, do not collect $200.
+ *
+ * @note The @c false case described above implements normal getTokens()
+ * treatment of an unmatched open quote: treat the quote character as if
+ * escaped, that is, simply collect it as part of the current token. Other
+ * plausible behaviors directly affect the way getTokens() deals with an
+ * unmatched quote: e.g. throwing an exception to treat it as an error, or
+ * assuming a close quote beyond end of string (in which case return @c
+ * true).
+ */
+ virtual bool collect_until(string_type& into, const_iterator from, T delim)
+ {
+ const_iterator found = std::find(from, mEnd, delim);
+ // If we didn't find delim, change nothing, just tell caller.
+ if (found == mEnd)
+ return false;
+ // Found delim! Append everything between from and found.
+ into.append(from, found);
+ // advance past delim in input
+ mIter = found + 1;
+ return true;
+ }
+
+ const_iterator mIter, mEnd;
+};
+
+/// InString subclass that handles escape characters
+template <class T>
+class InEscString: public InString<T>
+{
+public:
+ typedef InString<T> super;
+ typedef typename super::string_type string_type;
+ typedef typename super::const_iterator const_iterator;
+ using super::done;
+ using super::mIter;
+ using super::mEnd;
+
+ InEscString(const_iterator b, const_iterator e, const string_type& escapes):
+ super(b, e),
+ mEscapes(escapes)
+ {
+ // Even though we've already initialized 'mIter' via our base-class
+ // constructor, set it again to check for initial escape char.
+ setiter(b);
+ }
+
+ /// This implementation uses the answer cached by setiter().
+ virtual bool escaped() const { return mIsEsc; }
+ virtual T next()
+ {
+ // If we're looking at the escape character of an escape sequence,
+ // skip that character. This is the one time we can modify 'mIter'
+ // without using setiter: for this one case we DO NOT CARE if the
+ // escaped character is itself an escape.
+ if (mIsEsc)
+ ++mIter;
+ // If we were looking at an escape character, this is the escaped
+ // character; otherwise it's just the next character.
+ T result(*mIter);
+ // Advance mIter, checking for escape sequence.
+ setiter(mIter + 1);
+ return result;
+ }
+
+ virtual bool is(T ch) const
+ {
+ // Like base-class is(), except that an escaped character matches
+ // nothing.
+ return (! done()) && (! mIsEsc) && *mIter == ch;
+ }
+
+ virtual bool oneof(const string_type& delims) const
+ {
+ // Like base-class oneof(), except that an escaped character matches
+ // nothing.
+ return (! done()) && (! mIsEsc) && LLStringUtilBase<T>::contains(delims, *mIter);
+ }
+
+ virtual bool collect_until(string_type& into, const_iterator from, T delim)
+ {
+ // Deal with escapes in the characters we collect; that is, an escaped
+ // character must become just that character without the preceding
+ // escape. Collect characters in a separate string rather than
+ // directly appending to 'into' in case we do not find delim, in which
+ // case we're supposed to leave 'into' unmodified.
+ string_type collected;
+ // For scanning purposes, we're going to work directly with 'mIter'.
+ // Save its current value in case we fail to see delim.
+ const_iterator save_iter(mIter);
+ // Okay, set 'mIter', checking for escape.
+ setiter(from);
+ while (! done())
+ {
+ // If we see an unescaped delim, stop and report success.
+ if ((! mIsEsc) && *mIter == delim)
+ {
+ // Append collected chars to 'into'.
+ into.append(collected);
+ // Don't forget to advance 'mIter' past delim.
+ setiter(mIter + 1);
+ return true;
+ }
+ // We're not at end, and either we're not looking at delim or it's
+ // escaped. Collect this character and keep going.
+ collected.push_back(next());
+ }
+ // Here we hit 'mEnd' without ever seeing delim. Restore mIter and tell
+ // caller.
+ setiter(save_iter);
+ return false;
+ }
+
+private:
+ void setiter(const_iterator i)
+ {
+ mIter = i;
+
+ // Every time we change 'mIter', set 'mIsEsc' to be able to repetitively
+ // answer escaped() without having to rescan 'mEscapes'. mIsEsc caches
+ // contains(mEscapes, *mIter).
+
+ // We're looking at an escaped char if we're not already at end (that
+ // is, *mIter is even meaningful); if *mIter is in fact one of the
+ // specified escape characters; and if there's one more character
+ // following it. That is, if an escape character is the very last
+ // character of the input string, it loses its special meaning.
+ mIsEsc = (! done()) &&
+ LLStringUtilBase<T>::contains(mEscapes, *mIter) &&
+ (mIter+1) != mEnd;
+ }
+
+ const string_type mEscapes;
+ bool mIsEsc;
+};
+
+/// getTokens() implementation based on InString concept
+template <typename INSTRING, typename string_type>
+void getTokens(INSTRING& instr, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes)
+{
+ // There are times when we want to match either drop_delims or
+ // keep_delims. Concatenate them up front to speed things up.
+ string_type all_delims(drop_delims + keep_delims);
+ // no tokens yet
+ tokens.clear();
+
+ // try for another token
+ while (! instr.done())
+ {
+ // scan past any drop_delims
+ while (instr.oneof(drop_delims))
+ {
+ // skip this drop_delim
+ instr.next();
+ // but if that was the end of the string, done
+ if (instr.done())
+ return;
+ }
+ // found the start of another token: make a slot for it
+ tokens.push_back(string_type());
+ if (instr.oneof(keep_delims))
+ {
+ // *iter is a keep_delim, a token of exactly 1 character. Append
+ // that character to the new token and proceed.
+ tokens.back().push_back(instr.next());
+ continue;
+ }
+ // Here we have a non-delimiter token, which might consist of a mix of
+ // quoted and unquoted parts. Use bash rules for quoting: you can
+ // embed a quoted substring in the midst of an unquoted token (e.g.
+ // ~/"sub dir"/myfile.txt); you can ram two quoted substrings together
+ // to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge
+ // from bash in that bash considers an unmatched quote an error. Our
+ // param signature doesn't allow for errors, so just pretend it's not
+ // a quote and embed it.
+ // At this level, keep scanning until we hit the next delimiter of
+ // either type (drop_delims or keep_delims).
+ while (! instr.oneof(all_delims))
+ {
+ // If we're looking at an open quote, search forward for
+ // a close quote, collecting characters along the way.
+ if (instr.oneof(quotes) &&
+ instr.collect_until(tokens.back(), instr.mIter+1, *instr.mIter))
+ {
+ // collect_until is cleverly designed to do exactly what we
+ // need here. No further action needed if it returns true.
+ }
+ else
+ {
+ // Either *iter isn't a quote, or there's no matching close
+ // quote: in other words, just an ordinary char. Append it to
+ // current token.
+ tokens.back().push_back(instr.next());
+ }
+ // having scanned that segment of this token, if we've reached the
+ // end of the string, we're done
+ if (instr.done())
+ return;
+ }
+ }
+}
+
+} // namespace LLStringUtilBaseImpl
+
+// static
+template <class T>
+void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes)
+{
+ // Because this overload doesn't support escapes, use simple InString to
+ // manage input range.
+ LLStringUtilBaseImpl::InString<T> instring(string.begin(), string.end());
+ LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims, quotes);
+}
+
+// static
+template <class T>
+void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes, const string_type& escapes)
+{
+ // This overload must deal with escapes. Delegate that to InEscString
+ // (unless there ARE no escapes).
+ std::unique_ptr< LLStringUtilBaseImpl::InString<T> > instrp;
+ if (escapes.empty())
+ instrp.reset(new LLStringUtilBaseImpl::InString<T>(string.begin(), string.end()));
+ else
+ instrp.reset(new LLStringUtilBaseImpl::InEscString<T>(string.begin(), string.end(), escapes));
+ LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims, quotes);
+}
+
+// static
+template<class T>
+S32 LLStringUtilBase<T>::compareStrings(const T* lhs, const T* rhs)
+{
+ S32 result;
+ if( lhs == rhs )
+ {
+ result = 0;
+ }
+ else
+ if ( !lhs || !lhs[0] )
+ {
+ result = ((!rhs || !rhs[0]) ? 0 : 1);
+ }
+ else
+ if ( !rhs || !rhs[0])
+ {
+ result = -1;
+ }
+ else
+ {
+ result = LLStringOps::collate(lhs, rhs);
+ }
+ return result;
+}
+
+//static
+template<class T>
+S32 LLStringUtilBase<T>::compareStrings(const string_type& lhs, const string_type& rhs)
+{
+ return LLStringOps::collate(lhs.c_str(), rhs.c_str());
+}
+
+// static
+template<class T>
+S32 LLStringUtilBase<T>::compareInsensitive(const T* lhs, const T* rhs )
+{
+ S32 result;
+ if( lhs == rhs )
+ {
+ result = 0;
+ }
+ else
+ if ( !lhs || !lhs[0] )
+ {
+ result = ((!rhs || !rhs[0]) ? 0 : 1);
+ }
+ else
+ if ( !rhs || !rhs[0] )
+ {
+ result = -1;
+ }
+ else
+ {
+ string_type lhs_string(lhs);
+ string_type rhs_string(rhs);
+ LLStringUtilBase<T>::toUpper(lhs_string);
+ LLStringUtilBase<T>::toUpper(rhs_string);
+ result = LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
+ }
+ return result;
+}
+
+//static
+template<class T>
+S32 LLStringUtilBase<T>::compareInsensitive(const string_type& lhs, const string_type& rhs)
+{
+ string_type lhs_string(lhs);
+ string_type rhs_string(rhs);
+ LLStringUtilBase<T>::toUpper(lhs_string);
+ LLStringUtilBase<T>::toUpper(rhs_string);
+ return LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
+}
+
+// Case sensitive comparison with good handling of numbers. Does not use current locale.
+// a.k.a. strdictcmp()
+
+//static
+template<class T>
+S32 LLStringUtilBase<T>::compareDict(const string_type& astr, const string_type& bstr)
+{
+ const T* a = astr.c_str();
+ const T* b = bstr.c_str();
+ T ca, cb;
+ S32 ai, bi, cnt = 0;
+ S32 bias = 0;
+
+ ca = *(a++);
+ cb = *(b++);
+ while( ca && cb ){
+ if( bias==0 ){
+ if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); bias--; }
+ if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); bias++; }
+ }else{
+ if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); }
+ if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); }
+ }
+ if( LLStringOps::isDigit(ca) ){
+ if( cnt-->0 ){
+ if( cb!=ca ) break;
+ }else{
+ if( !LLStringOps::isDigit(cb) ) break;
+ for(ai=0; LLStringOps::isDigit(a[ai]); ai++);
+ for(bi=0; LLStringOps::isDigit(b[bi]); bi++);
+ if( ai<bi ){ ca=0; break; }
+ if( bi<ai ){ cb=0; break; }
+ if( ca!=cb ) break;
+ cnt = ai;
+ }
+ }else if( ca!=cb ){ break;
+ }
+ ca = *(a++);
+ cb = *(b++);
+ }
+ if( ca==cb ) ca += bias;
+ return ca-cb;
+}
+
+// static
+template<class T>
+S32 LLStringUtilBase<T>::compareDictInsensitive(const string_type& astr, const string_type& bstr)
+{
+ const T* a = astr.c_str();
+ const T* b = bstr.c_str();
+ T ca, cb;
+ S32 ai, bi, cnt = 0;
+
+ ca = *(a++);
+ cb = *(b++);
+ while( ca && cb ){
+ if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); }
+ if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); }
+ if( LLStringOps::isDigit(ca) ){
+ if( cnt-->0 ){
+ if( cb!=ca ) break;
+ }else{
+ if( !LLStringOps::isDigit(cb) ) break;
+ for(ai=0; LLStringOps::isDigit(a[ai]); ai++);
+ for(bi=0; LLStringOps::isDigit(b[bi]); bi++);
+ if( ai<bi ){ ca=0; break; }
+ if( bi<ai ){ cb=0; break; }
+ if( ca!=cb ) break;
+ cnt = ai;
+ }
+ }else if( ca!=cb ){ break;
+ }
+ ca = *(a++);
+ cb = *(b++);
+ }
+ return ca-cb;
+}
+
+// Puts compareDict() in a form appropriate for LL container classes to use for sorting.
+// static
+template<class T>
+bool LLStringUtilBase<T>::precedesDict( const string_type& a, const string_type& b )
+{
+ if( a.size() && b.size() )
+ {
+ return (LLStringUtilBase<T>::compareDict(a.c_str(), b.c_str()) < 0);
+ }
+ else
+ {
+ return (!b.empty());
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::toUpper(string_type& string)
+{
+ if( !string.empty() )
+ {
+ std::transform(
+ string.begin(),
+ string.end(),
+ string.begin(),
+ (T(*)(T)) &LLStringOps::toUpper);
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::toLower(string_type& string)
+{
+ if( !string.empty() )
+ {
+ std::transform(
+ string.begin(),
+ string.end(),
+ string.begin(),
+ (T(*)(T)) &LLStringOps::toLower);
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::trimHead(string_type& string)
+{
+ if( !string.empty() )
+ {
+ size_type i = 0;
+ while( i < string.length() && LLStringOps::isSpace( string[i] ) )
+ {
+ i++;
+ }
+ string.erase(0, i);
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::trimTail(string_type& string)
+{
+ if( string.size() )
+ {
+ size_type len = string.length();
+ size_type i = len;
+ while( i > 0 && LLStringOps::isSpace( string[i-1] ) )
+ {
+ i--;
+ }
+
+ string.erase( i, len - i );
+ }
+}
+
+
+// Replace line feeds with carriage return-line feed pairs.
+//static
+template<class T>
+void LLStringUtilBase<T>::addCRLF(string_type& string)
+{
+ const T LF = 10;
+ const T CR = 13;
+
+ // Count the number of line feeds
+ size_type count = 0;
+ size_type len = string.size();
+ size_type i;
+ for( i = 0; i < len; i++ )
+ {
+ if( string[i] == LF )
+ {
+ count++;
+ }
+ }
+
+ // Insert a carriage return before each line feed
+ if( count )
+ {
+ size_type size = len + count;
+ T *t = new T[size];
+ size_type j = 0;
+ for( i = 0; i < len; ++i )
+ {
+ if( string[i] == LF )
+ {
+ t[j] = CR;
+ ++j;
+ }
+ t[j] = string[i];
+ ++j;
+ }
+
+ string.assign(t, size);
+ delete[] t;
+ }
+}
+
+// Remove all carriage returns
+//static
+template<class T>
+void LLStringUtilBase<T>::removeCRLF(string_type& string)
+{
+ const T CR = 13;
+
+ size_type cr_count = 0;
+ size_type len = string.size();
+ size_type i;
+ for( i = 0; i < len - cr_count; i++ )
+ {
+ if( string[i+cr_count] == CR )
+ {
+ cr_count++;
+ }
+
+ string[i] = string[i+cr_count];
+ }
+ string.erase(i, cr_count);
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::removeWindowsCR(string_type& string)
+{
+ if (string.empty())
+ {
+ return;
+ }
+ const T LF = 10;
+ const T CR = 13;
+
+ size_type cr_count = 0;
+ size_type len = string.size();
+ size_type i;
+ for( i = 0; i < len - cr_count - 1; i++ )
+ {
+ if( string[i+cr_count] == CR && string[i+cr_count+1] == LF)
+ {
+ cr_count++;
+ }
+
+ string[i] = string[i+cr_count];
+ }
+ string.erase(i, cr_count);
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::replaceChar( string_type& string, T target, T replacement )
+{
+ size_type found_pos = 0;
+ while( (found_pos = string.find(target, found_pos)) != string_type::npos )
+ {
+ string[found_pos] = replacement;
+ found_pos++; // avoid infinite defeat if target == replacement
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::replaceString( string_type& string, string_type target, string_type replacement )
+{
+ size_type found_pos = 0;
+ while( (found_pos = string.find(target, found_pos)) != string_type::npos )
+ {
+ string.replace( found_pos, target.length(), replacement );
+ found_pos += replacement.length(); // avoid infinite defeat if replacement contains target
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::replaceNonstandardASCII( string_type& string, T replacement )
+{
+ const char LF = 10;
+ const S8 MIN = 32;
+// const S8 MAX = 127;
+
+ size_type len = string.size();
+ for( size_type i = 0; i < len; i++ )
+ {
+ // No need to test MAX < mText[i] because we treat mText[i] as a signed char,
+ // which has a max value of 127.
+ if( ( S8(string[i]) < MIN ) && (string[i] != LF) )
+ {
+ string[i] = replacement;
+ }
+ }
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::replaceTabsWithSpaces( string_type& str, size_type spaces_per_tab )
+{
+ const T TAB = '\t';
+ const T SPACE = ' ';
+
+ string_type out_str;
+ // Replace tabs with spaces
+ for (size_type i = 0; i < str.length(); i++)
+ {
+ if (str[i] == TAB)
+ {
+ for (size_type j = 0; j < spaces_per_tab; j++)
+ out_str += SPACE;
+ }
+ else
+ {
+ out_str += str[i];
+ }
+ }
+ str = out_str;
+}
+
+//static
+template<class T>
+std::basic_string<T> LLStringUtilBase<T>::capitalize(const string_type& str)
+{
+ string_type result(str);
+ capitalize(result);
+ return result;
+}
+
+//static
+template<class T>
+void LLStringUtilBase<T>::capitalize(string_type& str)
+{
+ if (str.size())
+ {
+ auto last = str[0] = toupper(str[0]);
+ for (U32 i = 1; i < str.size(); ++i)
+ {
+ last = (last == ' ' || last == '-' || last == '_') ? str[i] = toupper(str[i]) : str[i];
+ }
+ }
+}
+
+//static
+template<class T>
+bool LLStringUtilBase<T>::containsNonprintable(const string_type& string)
+{
+ const char MIN = 32;
+ bool rv = false;
+ for (size_type i = 0; i < string.size(); i++)
+ {
+ if(string[i] < MIN)
+ {
+ rv = true;
+ break;
+ }
+ }
+ return rv;
+}
+
+// *TODO: reimplement in terms of algorithm
+//static
+template<class T>
+void LLStringUtilBase<T>::stripNonprintable(string_type& string)
+{
+ const char MIN = 32;
+ size_type j = 0;
+ if (string.empty())
+ {
+ return;
+ }
+ size_t src_size = string.size();
+ char* c_string = new char[src_size + 1];
+ if(c_string == NULL)
+ {
+ return;
+ }
+ copy(c_string, string.c_str(), src_size+1);
+ char* write_head = &c_string[0];
+ for (size_type i = 0; i < src_size; i++)
+ {
+ char* read_head = &string[i];
+ write_head = &c_string[j];
+ if(!(*read_head < MIN))
+ {
+ *write_head = *read_head;
+ ++j;
+ }
+ }
+ c_string[j]= '\0';
+ string = c_string;
+ delete []c_string;
+}
+
+// *TODO: reimplement in terms of algorithm
+template<class T>
+std::basic_string<T> LLStringUtilBase<T>::quote(const string_type& str,
+ const string_type& triggers,
+ const string_type& escape)
+{
+ size_type len(str.length());
+ // If the string is already quoted, assume user knows what s/he's doing.
+ if (len >= 2 && str[0] == '"' && str[len-1] == '"')
+ {
+ return str;
+ }
+
+ // Not already quoted: do we need to? triggers.empty() is a special case
+ // meaning "always quote."
+ if ((! triggers.empty()) && str.find_first_of(triggers) == string_type::npos)
+ {
+ // no trigger characters, don't bother quoting
+ return str;
+ }
+
+ // For whatever reason, we must quote this string.
+ string_type result;
+ result.push_back('"');
+ for (typename string_type::const_iterator ci(str.begin()), cend(str.end()); ci != cend; ++ci)
+ {
+ if (*ci == '"')
+ {
+ result.append(escape);
+ }
+ result.push_back(*ci);
+ }
+ result.push_back('"');
+ return result;
+}
+
+template<class T>
+void LLStringUtilBase<T>::_makeASCII(string_type& string)
+{
+ // Replace non-ASCII chars with LL_UNKNOWN_CHAR
+ for (size_type i = 0; i < string.length(); i++)
+ {
+ if (string[i] > 0x7f)
+ {
+ string[i] = LL_UNKNOWN_CHAR;
+ }
+ }
+}
+
+// static
+template<class T>
+void LLStringUtilBase<T>::copy( T* dst, const T* src, size_type dst_size )
+{
+ if( dst_size > 0 )
+ {
+ size_type min_len = 0;
+ if( src )
+ {
+ min_len = llmin( dst_size - 1, strlen( src ) ); /* Flawfinder: ignore */
+ memcpy(dst, src, min_len * sizeof(T)); /* Flawfinder: ignore */
+ }
+ dst[min_len] = '\0';
+ }
+}
+
+// static
+template<class T>
+void LLStringUtilBase<T>::copyInto(string_type& dst, const string_type& src, size_type offset)
+{
+ if ( offset == dst.length() )
+ {
+ // special case - append to end of string and avoid expensive
+ // (when strings are large) string manipulations
+ dst += src;
+ }
+ else
+ {
+ string_type tail = dst.substr(offset);
+
+ dst = dst.substr(0, offset);
+ dst += src;
+ dst += tail;
+ };
+}
+
+// True if this is the head of s.
+//static
+template<class T>
+bool LLStringUtilBase<T>::isHead( const string_type& string, const T* s )
+{
+ if( string.empty() )
+ {
+ // Early exit
+ return false;
+ }
+ else
+ {
+ return (strncmp( s, string.c_str(), string.size() ) == 0);
+ }
+}
+
+// static
+template<class T>
+bool LLStringUtilBase<T>::startsWith(
+ const string_type& string,
+ const string_type& substr)
+{
+ if(string.empty() || (substr.empty())) return false;
+ if (substr.length() > string.length()) return false;
+ if (0 == string.compare(0, substr.length(), substr)) return true;
+ return false;
+}
+
+// static
+template<class T>
+bool LLStringUtilBase<T>::endsWith(
+ const string_type& string,
+ const string_type& substr)
+{
+ if(string.empty() || (substr.empty())) return false;
+ size_t sub_len = substr.length();
+ size_t str_len = string.length();
+ if (sub_len > str_len) return false;
+ if (0 == string.compare(str_len - sub_len, sub_len, substr)) return true;
+ return false;
+}
+
+// static
+template<class T>
+auto LLStringUtilBase<T>::getoptenv(const std::string& key) -> std::optional<string_type>
+{
+ auto found(llstring_getoptenv(key));
+ if (found)
+ {
+ // return populated std::optional
+ return { ll_convert<string_type>(*found) };
+ }
+ else
+ {
+ // empty std::optional
+ return {};
+ }
+}
+
+// static
+template<class T>
+auto LLStringUtilBase<T>::getenv(const std::string& key, const string_type& dflt) -> string_type
+{
+ auto found(getoptenv(key));
+ if (found)
+ {
+ return *found;
+ }
+ else
+ {
+ return dflt;
+ }
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToBOOL(const string_type& string, bool& value)
+{
+ if( string.empty() )
+ {
+ return false;
+ }
+
+ string_type temp( string );
+ trim(temp);
+ if(
+ (temp == "1") ||
+ (temp == "T") ||
+ (temp == "t") ||
+ (temp == "TRUE") ||
+ (temp == "true") ||
+ (temp == "True") )
+ {
+ value = true;
+ return true;
+ }
+ else
+ if(
+ (temp == "0") ||
+ (temp == "F") ||
+ (temp == "f") ||
+ (temp == "FALSE") ||
+ (temp == "false") ||
+ (temp == "False") )
+ {
+ value = false;
+ return true;
+ }
+
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToU8(const string_type& string, U8& value)
+{
+ S32 value32 = 0;
+ bool success = convertToS32(string, value32);
+ if( success && (U8_MIN <= value32) && (value32 <= U8_MAX) )
+ {
+ value = (U8) value32;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToS8(const string_type& string, S8& value)
+{
+ S32 value32 = 0;
+ bool success = convertToS32(string, value32);
+ if( success && (S8_MIN <= value32) && (value32 <= S8_MAX) )
+ {
+ value = (S8) value32;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToS16(const string_type& string, S16& value)
+{
+ S32 value32 = 0;
+ bool success = convertToS32(string, value32);
+ if( success && (S16_MIN <= value32) && (value32 <= S16_MAX) )
+ {
+ value = (S16) value32;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToU16(const string_type& string, U16& value)
+{
+ S32 value32 = 0;
+ bool success = convertToS32(string, value32);
+ if( success && (U16_MIN <= value32) && (value32 <= U16_MAX) )
+ {
+ value = (U16) value32;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToU32(const string_type& string, U32& value)
+{
+ if( string.empty() )
+ {
+ return false;
+ }
+
+ string_type temp( string );
+ trim(temp);
+ U32 v;
+ std::basic_istringstream<T> i_stream((string_type)temp);
+ if(i_stream >> v)
+ {
+ value = v;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToS32(const string_type& string, S32& value)
+{
+ if( string.empty() )
+ {
+ return false;
+ }
+
+ string_type temp( string );
+ trim(temp);
+ S32 v;
+ std::basic_istringstream<T> i_stream((string_type)temp);
+ if(i_stream >> v)
+ {
+ //TODO: figure out overflow and underflow reporting here
+ //if((LONG_MAX == v) || (LONG_MIN == v))
+ //{
+ // // Underflow or overflow
+ // return false;
+ //}
+
+ value = v;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToF32(const string_type& string, F32& value)
+{
+ F64 value64 = 0.0;
+ bool success = convertToF64(string, value64);
+ if( success && (-F32_MAX <= value64) && (value64 <= F32_MAX) )
+ {
+ value = (F32) value64;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+bool LLStringUtilBase<T>::convertToF64(const string_type& string, F64& value)
+{
+ if( string.empty() )
+ {
+ return false;
+ }
+
+ string_type temp( string );
+ trim(temp);
+ F64 v;
+ std::basic_istringstream<T> i_stream((string_type)temp);
+ if(i_stream >> v)
+ {
+ //TODO: figure out overflow and underflow reporting here
+ //if( ((-HUGE_VAL == v) || (HUGE_VAL == v))) )
+ //{
+ // // Underflow or overflow
+ // return false;
+ //}
+
+ value = v;
+ return true;
+ }
+ return false;
+}
+
+template<class T>
+void LLStringUtilBase<T>::truncate(string_type& string, size_type count)
+{
+ size_type cur_size = string.size();
+ string.resize(count < cur_size ? count : cur_size);
+}
+
+// The good thing about *declaration* macros, vs. usage macros, is that now
+// we're done with them: we don't need them to bleed into the consuming source
+// file.
+#undef ll_convert_alias
+#undef ll_convert_u16_alias
+#undef ll_convert_wstr_alias
+#undef LL_CONVERT_COPY_CHARS
+#undef ll_convert_forms
+#undef ll_convert_cp_forms
+
+#endif // LL_STRING_H
|