diff options
Diffstat (limited to 'indra/llcommon')
| -rw-r--r-- | indra/llcommon/llstring.cpp | 16 | ||||
| -rw-r--r-- | indra/llcommon/llstring.h | 355 | ||||
| -rw-r--r-- | indra/llcommon/tests/StringVec.h | 37 | ||||
| -rw-r--r-- | indra/llcommon/tests/listener.h | 21 | ||||
| -rw-r--r-- | indra/llcommon/tests/llstring_test.cpp | 115 | 
5 files changed, 517 insertions, 27 deletions
| diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index e7fe656808..fa0eb9f72c 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -912,22 +912,24 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);  template<>   void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)  { -	std::string currToken; -	std::string::size_type begIdx, endIdx; - -	begIdx = instr.find_first_not_of (delims); -	while (begIdx != std::string::npos) +	// Starting at offset 0, scan forward for the next non-delimiter. We're +	// done when the only characters left in 'instr' are delimiters. +	for (std::string::size_type begIdx, endIdx = 0; +		 (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; )  	{ +		// Found a non-delimiter. After that, find the next delimiter.  		endIdx = instr.find_first_of (delims, begIdx);  		if (endIdx == std::string::npos)  		{ +			// No more delimiters: this token extends to the end of the string.  			endIdx = instr.length();  		} -		currToken = instr.substr(begIdx, endIdx - begIdx); +		// extract the token between begIdx and endIdx; substr() needs length +		std::string currToken(instr.substr(begIdx, endIdx - begIdx));  		LLStringUtil::trim (currToken);  		tokens.push_back(currToken); -		begIdx = instr.find_first_not_of (delims, endIdx); +		// next scan past delimiters starts at endIdx  	}  } diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 7b24b5e279..e4ae54cec5 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -40,6 +40,7 @@  #endif  #include <string.h> +#include <boost/scoped_ptr.hpp>  #if LL_SOLARIS  // stricmp and strnicmp do not exist on Solaris: @@ -247,7 +248,38 @@ public:  	static const string_type null;  	typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t; -	LL_COMMON_API static void getTokens(const string_type& instr, std::vector<string_type >& tokens, const string_type& delims); +	/// considers any sequence of delims as a single field separator +	LL_COMMON_API static void getTokens(const string_type& instr, +										std::vector<string_type >& tokens, +										const string_type& delims); +	/// like simple scan overload, but returns scanned vector +	LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr, +															const string_type& delims); +	/// add support for keep_delims and quotes (either could be empty string) +	LL_COMMON_API static void getTokens(const string_type& instr, +										std::vector<string_type>& tokens, +										const string_type& drop_delims, +										const string_type& keep_delims, +										const string_type& quotes=string_type()); +	/// like keep_delims-and-quotes overload, but returns scanned vector +	LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr, +															const string_type& drop_delims, +															const string_type& keep_delims, +															const string_type& quotes=string_type()); +	/// add support for escapes (could be empty string) +	LL_COMMON_API static void getTokens(const string_type& instr, +										std::vector<string_type>& tokens, +										const string_type& drop_delims, +										const string_type& keep_delims, +										const string_type& quotes, +										const string_type& escapes); +	/// like escapes overload, but returns scanned vector +	LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr, +															const string_type& drop_delims, +															const string_type& keep_delims, +															const string_type& quotes, +															const string_type& escapes); +  	LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals);  	LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch);  	LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions); @@ -262,6 +294,11 @@ public:  		return !string.empty() && (0 <= i) && (i <= string.size());  	} +	static bool contains(const string_type& string, T c, size_type i=0) +	{ +		return string.find(c, i) != string_type::npos; +	} +  	static void	trimHead(string_type& string);  	static void	trimTail(string_type& string);  	static void	trim(string_type& string)	{ trimHead(string); trimTail(string); } @@ -650,10 +687,324 @@ namespace LLStringFn  ////////////////////////////////////////////////////////////  // NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.  // There is no LLWStringUtil::format implementation currently. -// Calling thse for anything other than LLStringUtil will produce link errors. +// Calling these for anything other than LLStringUtil will produce link errors.  //////////////////////////////////////////////////////////// +// static +template <class T> +std::vector<typename LLStringUtilBase<T>::string_type> +LLStringUtilBase<T>::getTokens(const string_type& instr, const string_type& delims) +{ +	std::vector<string_type> tokens; +	getTokens(instr, tokens, delims); +	return tokens; +} + +// static +template <class T> +std::vector<typename LLStringUtilBase<T>::string_type> +LLStringUtilBase<T>::getTokens(const string_type& instr, +							   const string_type& drop_delims, +							   const string_type& keep_delims, +							   const string_type& quotes) +{ +	std::vector<string_type> tokens; +	getTokens(instr, tokens, drop_delims, keep_delims, quotes); +	return tokens; +} + +// static +template <class T> +std::vector<typename LLStringUtilBase<T>::string_type> +LLStringUtilBase<T>::getTokens(const string_type& instr, +							   const string_type& drop_delims, +							   const string_type& keep_delims, +							   const string_type& quotes, +							   const string_type& escapes) +{ +	std::vector<string_type> tokens; +	getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes); +	return tokens; +} + +namespace LLStringUtilBaseImpl +{ + +/** + * Input string scanner helper for getTokens(), or really any other + * character-parsing routine that may have to deal with escape characters. + * This implementation defines the concept (also an interface, should you + * choose to implement the concept by subclassing) and provides trivial + * implementations for a string @em without escape processing. + */ +template <class T> +struct InString +{ +	typedef std::basic_string<T> string_type; +	typedef typename string_type::const_iterator const_iterator; + +	InString(const_iterator b, const_iterator e): +		iter(b), +		end(e) +	{} + +	bool done() const { return iter == end; } +	/// Is the current character (*iter) escaped? This implementation can +	/// answer trivially because it doesn't support escapes. +	virtual bool escaped() const { return false; } +	/// Obtain the current character and advance @c iter. +	virtual T next() { return *iter++; } +	/// Does the current character match specified character? +	virtual bool is(T ch) const { return (! done()) && *iter == ch; } +	/// Is the current character any one of the specified characters? +	virtual bool oneof(const string_type& delims) const +	{ +		return (! done()) && LLStringUtilBase<T>::contains(delims, *iter); +	} + +	/** +	 * Scan forward from @from until either @a delim or end. This is primarily +	 * useful for processing quoted substrings. +	 * +	 * If we do see @a delim, append everything from @from until (excluding) +	 * @a delim to @a into, advance @c iter to skip @a delim, and return @c +	 * true. +	 * +	 * If we do not see @a delim, do not alter @a into or @c iter and return +	 * @c false. Do not pass GO, do not collect $200. +	 * +	 * @note The @c false case described above implements normal getTokens() +	 * treatment of an unmatched open quote: treat the quote character as if +	 * escaped, that is, simply collect it as part of the current token. Other +	 * plausible behaviors directly affect the way getTokens() deals with an +	 * unmatched quote: e.g. throwing an exception to treat it as an error, or +	 * assuming a close quote beyond end of string (in which case return @c +	 * true). +	 */ +	virtual bool collect_until(string_type& into, const_iterator from, T delim) +	{ +		const_iterator found = std::find(from, end, delim); +		// If we didn't find delim, change nothing, just tell caller. +		if (found == end) +			return false; +		// Found delim! Append everything between from and found. +		into.append(from, found); +		// advance past delim in input +		iter = found + 1; +		return true; +	} + +	const_iterator iter, end; +}; + +/// InString subclass that handles escape characters +template <class T> +class InEscString: public InString<T> +{ +public: +	typedef InString<T> super; +	typedef typename super::string_type string_type; +	typedef typename super::const_iterator const_iterator; +	using super::done; +	using super::iter; +	using super::end; + +	InEscString(const_iterator b, const_iterator e, const string_type& escapes_): +		super(b, e), +		escapes(escapes_) +	{ +		// Even though we've already initialized 'iter' via our base-class +		// constructor, set it again to check for initial escape char. +		setiter(b); +	} + +	/// This implementation uses the answer cached by setiter(). +	virtual bool escaped() const { return isesc; } +	virtual T next() +	{ +		// If we're looking at the escape character of an escape sequence, +		// skip that character. This is the one time we can modify 'iter' +		// without using setiter: for this one case we DO NOT CARE if the +		// escaped character is itself an escape. +		if (isesc) +			++iter; +		// If we were looking at an escape character, this is the escaped +		// character; otherwise it's just the next character. +		T result(*iter); +		// Advance iter, checking for escape sequence. +		setiter(iter + 1); +		return result; +	} + +	virtual bool is(T ch) const +	{ +		// Like base-class is(), except that an escaped character matches +		// nothing. +		return (! done()) && (! isesc) && *iter == ch; +	} + +	virtual bool oneof(const string_type& delims) const +	{ +		// Like base-class oneof(), except that an escaped character matches +		// nothing. +		return (! done()) && (! isesc) && LLStringUtilBase<T>::contains(delims, *iter); +	} + +	virtual bool collect_until(string_type& into, const_iterator from, T delim) +	{ +		// Deal with escapes in the characters we collect; that is, an escaped +		// character must become just that character without the preceding +		// escape. Collect characters in a separate string rather than +		// directly appending to 'into' in case we do not find delim, in which +		// case we're supposed to leave 'into' unmodified. +		string_type collected; +		// For scanning purposes, we're going to work directly with 'iter'. +		// Save its current value in case we fail to see delim. +		const_iterator save_iter(iter); +		// Okay, set 'iter', checking for escape. +		setiter(from); +		while (! done()) +		{ +			// If we see an unescaped delim, stop and report success. +			if ((! isesc) && *iter == delim) +			{ +				// Append collected chars to 'into'. +				into.append(collected); +				// Don't forget to advance 'iter' past delim. +				setiter(iter + 1); +				return true; +			} +			// We're not at end, and either we're not looking at delim or it's +			// escaped. Collect this character and keep going. +			collected.push_back(next()); +		} +		// Here we hit 'end' without ever seeing delim. Restore iter and tell +		// caller. +		setiter(save_iter); +		return false; +	} + +private: +	void setiter(const_iterator i) +	{ +		iter = i; + +		// Every time we change 'iter', set 'isesc' to be able to repetitively +		// answer escaped() without having to rescan 'escapes'. isesc caches +		// contains(escapes, *iter). + +		// We're looking at an escaped char if we're not already at end (that +		// is, *iter is even meaningful); if *iter is in fact one of the +		// specified escape characters; and if there's one more character +		// following it. That is, if an escape character is the very last +		// character of the input string, it loses its special meaning. +		isesc = (! done()) && +				LLStringUtilBase<T>::contains(escapes, *iter) && +				(iter+1) != end; +	} + +	const string_type escapes; +	bool isesc; +}; + +/// getTokens() implementation based on InString concept +template <typename INSTRING, typename string_type> +void getTokens(INSTRING& instr, std::vector<string_type>& tokens, +			   const string_type& drop_delims, const string_type& keep_delims, +			   const string_type& quotes) +{ +	// There are times when we want to match either drop_delims or +	// keep_delims. Concatenate them up front to speed things up. +	string_type all_delims(drop_delims + keep_delims); +	// no tokens yet +	tokens.clear(); + +	// try for another token +	while (! instr.done()) +	{ +		// scan past any drop_delims +		while (instr.oneof(drop_delims)) +		{ +			// skip this drop_delim +			instr.next(); +			// but if that was the end of the string, done +			if (instr.done()) +				return; +		} +		// found the start of another token: make a slot for it +		tokens.push_back(string_type()); +		if (instr.oneof(keep_delims)) +		{ +			// *iter is a keep_delim, a token of exactly 1 character. Append +			// that character to the new token and proceed. +			tokens.back().push_back(instr.next()); +			continue; +		} +		// Here we have a non-delimiter token, which might consist of a mix of +		// quoted and unquoted parts. Use bash rules for quoting: you can +		// embed a quoted substring in the midst of an unquoted token (e.g. +		// ~/"sub dir"/myfile.txt); you can ram two quoted substrings together +		// to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge +		// from bash in that bash considers an unmatched quote an error. Our +		// param signature doesn't allow for errors, so just pretend it's not +		// a quote and embed it. +		// At this level, keep scanning until we hit the next delimiter of +		// either type (drop_delims or keep_delims). +		while (! instr.oneof(all_delims)) +		{ +			// If we're looking at an open quote, search forward for +			// a close quote, collecting characters along the way. +			if (instr.oneof(quotes) && +				instr.collect_until(tokens.back(), instr.iter+1, *instr.iter)) +			{ +				// collect_until is cleverly designed to do exactly what we +				// need here. No further action needed if it returns true. +			} +			else +			{ +				// Either *iter isn't a quote, or there's no matching close +				// quote: in other words, just an ordinary char. Append it to +				// current token. +				tokens.back().push_back(instr.next()); +			} +			// having scanned that segment of this token, if we've reached the +			// end of the string, we're done +			if (instr.done()) +				return; +		} +	} +} + +} // namespace LLStringUtilBaseImpl + +// static +template <class T> +void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens, +									const string_type& drop_delims, const string_type& keep_delims, +									const string_type& quotes) +{ +	// Because this overload doesn't support escapes, use simple InString to +	// manage input range. +	LLStringUtilBaseImpl::InString<T> instring(string.begin(), string.end()); +	LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims, quotes); +} + +// static +template <class T> +void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens, +									const string_type& drop_delims, const string_type& keep_delims, +									const string_type& quotes, const string_type& escapes) +{ +	// This overload must deal with escapes. Delegate that to InEscString +	// (unless there ARE no escapes). +	boost::scoped_ptr< LLStringUtilBaseImpl::InString<T> > instrp; +	if (escapes.empty()) +		instrp.reset(new LLStringUtilBaseImpl::InString<T>(string.begin(), string.end())); +	else +		instrp.reset(new LLStringUtilBaseImpl::InEscString<T>(string.begin(), string.end(), escapes)); +	LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims, quotes); +}  // static  template<class T>  diff --git a/indra/llcommon/tests/StringVec.h b/indra/llcommon/tests/StringVec.h new file mode 100644 index 0000000000..a380b00a05 --- /dev/null +++ b/indra/llcommon/tests/StringVec.h @@ -0,0 +1,37 @@ +/** + * @file   StringVec.h + * @author Nat Goodspeed + * @date   2012-02-24 + * @brief  Extend TUT ensure_equals() to handle std::vector<std::string> + *  + * $LicenseInfo:firstyear=2012&license=viewerlgpl$ + * Copyright (c) 2012, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_STRINGVEC_H) +#define LL_STRINGVEC_H + +#include <vector> +#include <string> +#include <iostream> + +typedef std::vector<std::string> StringVec; + +std::ostream& operator<<(std::ostream& out, const StringVec& strings) +{ +    out << '('; +    StringVec::const_iterator begin(strings.begin()), end(strings.end()); +    if (begin != end) +    { +        out << '"' << *begin << '"'; +        while (++begin != end) +        { +            out << ", \"" << *begin << '"'; +        } +    } +    out << ')'; +    return out; +} + +#endif /* ! defined(LL_STRINGVEC_H) */ diff --git a/indra/llcommon/tests/listener.h b/indra/llcommon/tests/listener.h index dcdb2412be..9c5c18a150 100644 --- a/indra/llcommon/tests/listener.h +++ b/indra/llcommon/tests/listener.h @@ -30,6 +30,8 @@  #define LL_LISTENER_H  #include "llsd.h" +#include "llevents.h" +#include "tests/StringVec.h"  #include <iostream>  /***************************************************************************** @@ -133,24 +135,7 @@ struct Collect          return false;      }      void clear() { result.clear(); } -    typedef std::vector<std::string> StringList; -    StringList result; +    StringVec result;  }; -std::ostream& operator<<(std::ostream& out, const Collect::StringList& strings) -{ -    out << '('; -    Collect::StringList::const_iterator begin(strings.begin()), end(strings.end()); -    if (begin != end) -    { -        out << '"' << *begin << '"'; -        while (++begin != end) -        { -            out << ", \"" << *begin << '"'; -        } -    } -    out << ')'; -    return out; -} -  #endif /* ! defined(LL_LISTENER_H) */ diff --git a/indra/llcommon/tests/llstring_test.cpp b/indra/llcommon/tests/llstring_test.cpp index 6a1cbf652a..821deeac21 100644 --- a/indra/llcommon/tests/llstring_test.cpp +++ b/indra/llcommon/tests/llstring_test.cpp @@ -29,7 +29,11 @@  #include "linden_common.h"  #include "../test/lltut.h" +#include <boost/assign/list_of.hpp>  #include "../llstring.h" +#include "StringVec.h" + +using boost::assign::list_of;  namespace tut  { @@ -750,4 +754,115 @@ namespace tut  		ensure("empty substr.", !LLStringUtil::endsWith(empty, value));  		ensure("empty everything.", !LLStringUtil::endsWith(empty, empty));  	} + +	template<> template<> +	void string_index_object_t::test<41>() +	{ +		set_test_name("getTokens(\"delims\")"); +		ensure_equals("empty string", LLStringUtil::getTokens("", " "), StringVec()); +		ensure_equals("only delims", +					  LLStringUtil::getTokens("   \r\n   ", " \r\n"), StringVec()); +		ensure_equals("sequence of delims", +					  LLStringUtil::getTokens(",,, one ,,,", ","), list_of("one")); +		// nat considers this a dubious implementation side effect, but I'd +		// hate to change it now... +		ensure_equals("noncontiguous tokens", +					  LLStringUtil::getTokens(", ,, , one ,,,", ","), list_of("")("")("one")); +		ensure_equals("space-padded tokens", +					  LLStringUtil::getTokens(",    one  ,  two  ,", ","), list_of("one")("two")); +		ensure_equals("no delims", LLStringUtil::getTokens("one", ","), list_of("one")); +	} + +	// Shorthand for verifying that getTokens() behaves the same when you +	// don't pass a string of escape characters, when you pass an empty string +	// (different overloads), and when you pass a string of characters that +	// aren't actually present. +	void ensure_getTokens(const std::string& desc, +						  const std::string& string, +						  const std::string& drop_delims, +						  const std::string& keep_delims, +						  const std::string& quotes, +						  const std::vector<std::string>& expect) +	{ +		ensure_equals(desc + " - no esc", +					  LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes), +					  expect); +		ensure_equals(desc + " - empty esc", +					  LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes, ""), +					  expect); +		ensure_equals(desc + " - unused esc", +					  LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes, "!"), +					  expect); +	} + +	void ensure_getTokens(const std::string& desc, +						  const std::string& string, +						  const std::string& drop_delims, +						  const std::string& keep_delims, +						  const std::vector<std::string>& expect) +	{ +		ensure_getTokens(desc, string, drop_delims, keep_delims, "", expect); +	} + +	template<> template<> +	void string_index_object_t::test<42>() +	{ +		set_test_name("getTokens(\"delims\", etc.)"); +		// Signatures to test in this method: +		// getTokens(string, drop_delims, keep_delims [, quotes [, escapes]]) +		// If you omit keep_delims, you get the older function (test above). + +		// cases like the getTokens(string, delims) tests above +		ensure_getTokens("empty string", "", " ", "", StringVec()); +		ensure_getTokens("only delims", +						 "   \r\n   ", " \r\n", "", StringVec()); +		ensure_getTokens("sequence of delims", +						 ",,, one ,,,", ", ", "", list_of("one")); +		// Note contrast with the case in the previous method +		ensure_getTokens("noncontiguous tokens", +						 ", ,, , one ,,,", ", ", "", list_of("one")); +		ensure_getTokens("space-padded tokens", +						 ",    one  ,  two  ,", ", ", "", +                         list_of("one")("two")); +		ensure_getTokens("no delims", "one", ",", "", list_of("one")); + +		// drop_delims vs. keep_delims +		ensure_getTokens("arithmetic", +						 " ab+def  / xx*  yy ", " ", "+-*/", +						 list_of("ab")("+")("def")("/")("xx")("*")("yy")); + +		// quotes +		ensure_getTokens("no quotes", +						 "She said, \"Don't go.\"", " ", ",", "", +						 list_of("She")("said")(",")("\"Don't")("go.\"")); +		ensure_getTokens("quotes", +						 "She said, \"Don't go.\"", " ", ",", "\"", +						 list_of("She")("said")(",")("Don't go.")); +		ensure_getTokens("quotes and delims", +						 "run c:/'Documents and Settings'/someone", " ", "", "'", +						 list_of("run")("c:/Documents and Settings/someone")); +		ensure_getTokens("unmatched quote", +						 "baby don't leave", " ", "", "'", +						 list_of("baby")("don't")("leave")); +		ensure_getTokens("adjacent quoted", +						 "abc'def \"ghi'\"jkl' mno\"pqr", " ", "", "\"'", +						 list_of("abcdef \"ghijkl' mnopqr")); + +		// escapes +		// Don't use backslash as an escape for these tests -- you'll go nuts +		// between the C++ string scanner and getTokens() escapes. Test with +		// something else! +		ensure_equals("escaped delims", +					  LLStringUtil::getTokens("^ a - dog^-gone^ phrase", " ", "-", "", "^"), +					  list_of(" a")("-")("dog-gone phrase")); +		ensure_equals("escaped quotes", +					  LLStringUtil::getTokens("say: 'this isn^'t w^orking'.", " ", "", "'", "^"), +					  list_of("say:")("this isn't working.")); +		ensure_equals("escaped escape", +					  LLStringUtil::getTokens("want x^^2", " ", "", "", "^"), +					  list_of("want")("x^2")); +		ensure_equals("escape at end", +					  LLStringUtil::getTokens("it's^ up there^", " ", "", "'", "^"), +					  list_of("it's up")("there^")); +    }  } | 
