From 025329b6a2ecb8ddee3022d6a73344f862f0d326 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 24 Feb 2012 15:06:44 -0500 Subject: Add LLStringUtil::getTokens() overload handling quoted substrings. We didn't have any tokenizer suitable for scanning something like a bash command line. We do have a couple hacks, e.g. LLExternalEditor::tokenize() and LLCommandLineParser::parseCommandLineString(). Both try to work around boost::tokenizer limitations; but existing boost::tokenizer support just doesn't address this case. Neither of the above is available as a general scanner anyway, and parseCommandLineString() fails outright when passed "". New getTokens() also distinguishes between "drop delimiters" (e.g. space, return, newline) to be discarded from the token stream, versus "keep delimiters" (e.g. "+-*/") to be returned as tokens in their own right. There's an overload that honors escapes and a more efficient one that doesn't; each has a convenience overload that returns the scanned string vector rather than requiring a separate declaration. Tweak and comment older getTokens() implementation. Add unit tests for both old and new getTokens() implementations. Break out StringVec and std::ostream << StringVec from indra/llcommon/tests/listener.h to StringVec.h: that's coming in handy for a number of different TUT test sources. --- indra/llcommon/llstring.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'indra/llcommon/llstring.cpp') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index e7fe656808..fa0eb9f72c 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -912,22 +912,24 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions); template<> void LLStringUtil::getTokens(const std::string& instr, std::vector& tokens, const std::string& delims) { - std::string currToken; - std::string::size_type begIdx, endIdx; - - begIdx = instr.find_first_not_of (delims); - while (begIdx != std::string::npos) + // Starting at offset 0, scan forward for the next non-delimiter. We're + // done when the only characters left in 'instr' are delimiters. + for (std::string::size_type begIdx, endIdx = 0; + (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; ) { + // Found a non-delimiter. After that, find the next delimiter. endIdx = instr.find_first_of (delims, begIdx); if (endIdx == std::string::npos) { + // No more delimiters: this token extends to the end of the string. endIdx = instr.length(); } - currToken = instr.substr(begIdx, endIdx - begIdx); + // extract the token between begIdx and endIdx; substr() needs length + std::string currToken(instr.substr(begIdx, endIdx - begIdx)); LLStringUtil::trim (currToken); tokens.push_back(currToken); - begIdx = instr.find_first_not_of (delims, endIdx); + // next scan past delimiters starts at endIdx } } -- cgit v1.2.3