From 025329b6a2ecb8ddee3022d6a73344f862f0d326 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 24 Feb 2012 15:06:44 -0500 Subject: Add LLStringUtil::getTokens() overload handling quoted substrings. We didn't have any tokenizer suitable for scanning something like a bash command line. We do have a couple hacks, e.g. LLExternalEditor::tokenize() and LLCommandLineParser::parseCommandLineString(). Both try to work around boost::tokenizer limitations; but existing boost::tokenizer support just doesn't address this case. Neither of the above is available as a general scanner anyway, and parseCommandLineString() fails outright when passed "". New getTokens() also distinguishes between "drop delimiters" (e.g. space, return, newline) to be discarded from the token stream, versus "keep delimiters" (e.g. "+-*/") to be returned as tokens in their own right. There's an overload that honors escapes and a more efficient one that doesn't; each has a convenience overload that returns the scanned string vector rather than requiring a separate declaration. Tweak and comment older getTokens() implementation. Add unit tests for both old and new getTokens() implementations. Break out StringVec and std::ostream << StringVec from indra/llcommon/tests/listener.h to StringVec.h: that's coming in handy for a number of different TUT test sources. --- indra/llcommon/tests/llstring_test.cpp | 115 +++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) (limited to 'indra/llcommon/tests/llstring_test.cpp') diff --git a/indra/llcommon/tests/llstring_test.cpp b/indra/llcommon/tests/llstring_test.cpp index 6a1cbf652a..821deeac21 100644 --- a/indra/llcommon/tests/llstring_test.cpp +++ b/indra/llcommon/tests/llstring_test.cpp @@ -29,7 +29,11 @@ #include "linden_common.h" #include "../test/lltut.h" +#include #include "../llstring.h" +#include "StringVec.h" + +using boost::assign::list_of; namespace tut { @@ -750,4 +754,115 @@ namespace tut ensure("empty substr.", !LLStringUtil::endsWith(empty, value)); ensure("empty everything.", !LLStringUtil::endsWith(empty, empty)); } + + template<> template<> + void string_index_object_t::test<41>() + { + set_test_name("getTokens(\"delims\")"); + ensure_equals("empty string", LLStringUtil::getTokens("", " "), StringVec()); + ensure_equals("only delims", + LLStringUtil::getTokens(" \r\n ", " \r\n"), StringVec()); + ensure_equals("sequence of delims", + LLStringUtil::getTokens(",,, one ,,,", ","), list_of("one")); + // nat considers this a dubious implementation side effect, but I'd + // hate to change it now... + ensure_equals("noncontiguous tokens", + LLStringUtil::getTokens(", ,, , one ,,,", ","), list_of("")("")("one")); + ensure_equals("space-padded tokens", + LLStringUtil::getTokens(", one , two ,", ","), list_of("one")("two")); + ensure_equals("no delims", LLStringUtil::getTokens("one", ","), list_of("one")); + } + + // Shorthand for verifying that getTokens() behaves the same when you + // don't pass a string of escape characters, when you pass an empty string + // (different overloads), and when you pass a string of characters that + // aren't actually present. + void ensure_getTokens(const std::string& desc, + const std::string& string, + const std::string& drop_delims, + const std::string& keep_delims, + const std::string& quotes, + const std::vector& expect) + { + ensure_equals(desc + " - no esc", + LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes), + expect); + ensure_equals(desc + " - empty esc", + LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes, ""), + expect); + ensure_equals(desc + " - unused esc", + LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes, "!"), + expect); + } + + void ensure_getTokens(const std::string& desc, + const std::string& string, + const std::string& drop_delims, + const std::string& keep_delims, + const std::vector& expect) + { + ensure_getTokens(desc, string, drop_delims, keep_delims, "", expect); + } + + template<> template<> + void string_index_object_t::test<42>() + { + set_test_name("getTokens(\"delims\", etc.)"); + // Signatures to test in this method: + // getTokens(string, drop_delims, keep_delims [, quotes [, escapes]]) + // If you omit keep_delims, you get the older function (test above). + + // cases like the getTokens(string, delims) tests above + ensure_getTokens("empty string", "", " ", "", StringVec()); + ensure_getTokens("only delims", + " \r\n ", " \r\n", "", StringVec()); + ensure_getTokens("sequence of delims", + ",,, one ,,,", ", ", "", list_of("one")); + // Note contrast with the case in the previous method + ensure_getTokens("noncontiguous tokens", + ", ,, , one ,,,", ", ", "", list_of("one")); + ensure_getTokens("space-padded tokens", + ", one , two ,", ", ", "", + list_of("one")("two")); + ensure_getTokens("no delims", "one", ",", "", list_of("one")); + + // drop_delims vs. keep_delims + ensure_getTokens("arithmetic", + " ab+def / xx* yy ", " ", "+-*/", + list_of("ab")("+")("def")("/")("xx")("*")("yy")); + + // quotes + ensure_getTokens("no quotes", + "She said, \"Don't go.\"", " ", ",", "", + list_of("She")("said")(",")("\"Don't")("go.\"")); + ensure_getTokens("quotes", + "She said, \"Don't go.\"", " ", ",", "\"", + list_of("She")("said")(",")("Don't go.")); + ensure_getTokens("quotes and delims", + "run c:/'Documents and Settings'/someone", " ", "", "'", + list_of("run")("c:/Documents and Settings/someone")); + ensure_getTokens("unmatched quote", + "baby don't leave", " ", "", "'", + list_of("baby")("don't")("leave")); + ensure_getTokens("adjacent quoted", + "abc'def \"ghi'\"jkl' mno\"pqr", " ", "", "\"'", + list_of("abcdef \"ghijkl' mnopqr")); + + // escapes + // Don't use backslash as an escape for these tests -- you'll go nuts + // between the C++ string scanner and getTokens() escapes. Test with + // something else! + ensure_equals("escaped delims", + LLStringUtil::getTokens("^ a - dog^-gone^ phrase", " ", "-", "", "^"), + list_of(" a")("-")("dog-gone phrase")); + ensure_equals("escaped quotes", + LLStringUtil::getTokens("say: 'this isn^'t w^orking'.", " ", "", "'", "^"), + list_of("say:")("this isn't working.")); + ensure_equals("escaped escape", + LLStringUtil::getTokens("want x^^2", " ", "", "", "^"), + list_of("want")("x^2")); + ensure_equals("escape at end", + LLStringUtil::getTokens("it's^ up there^", " ", "", "'", "^"), + list_of("it's up")("there^")); + } } -- cgit v1.2.3 From bf7c215692435ceb85d8991a9337933688dc0cf0 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Sun, 26 Feb 2012 07:17:08 -0500 Subject: Add LLStringUtil::getTokens() test for quoted empty string. This is an important differentiator between getTokens() and the present LLCommandLineParser::parseCommandLineString() logic: you cannot currently --set SomeVar to an empty string value because parseCommandLineString() discards empty strings. --- indra/llcommon/tests/llstring_test.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'indra/llcommon/tests/llstring_test.cpp') diff --git a/indra/llcommon/tests/llstring_test.cpp b/indra/llcommon/tests/llstring_test.cpp index 821deeac21..93d3968dbf 100644 --- a/indra/llcommon/tests/llstring_test.cpp +++ b/indra/llcommon/tests/llstring_test.cpp @@ -847,6 +847,9 @@ namespace tut ensure_getTokens("adjacent quoted", "abc'def \"ghi'\"jkl' mno\"pqr", " ", "", "\"'", list_of("abcdef \"ghijkl' mnopqr")); + ensure_getTokens("quoted empty string", + "--set SomeVar ''", " ", "", "'", + list_of("--set")("SomeVar")("")); // escapes // Don't use backslash as an escape for these tests -- you'll go nuts -- cgit v1.2.3