diff options
author | Nat Goodspeed <nat@lindenlab.com> | 2022-11-22 16:21:44 -0500 |
---|---|---|
committer | Nat Goodspeed <nat@lindenlab.com> | 2022-11-22 16:21:44 -0500 |
commit | abe62c23d74d5319691a49881719b03cc9b5b090 (patch) | |
tree | d63f782540580ab5ad6f28d144c4909a420dec11 /indra/llcommon | |
parent | 15b5dedb2dc139c85461e7c867164b65cc6fc628 (diff) |
SL-18330: Make LLSDSerialize::deserialize() default to notation.
LLSDSerialize::serialize() emits a header string, e.g. "<? llsd/notation ?>"
for notation format. Until now, LLSDSerialize::deserialize() has required that
header to properly decode the input stream.
But none of LLSDBinaryFormatter, LLSDXMLFormatter or LLSDNotationFormatter
emit that header themselves. Nor do any of the Python llsd.format_binary(),
format_xml() or format_notation() functions. Until now, you could not use
LLSD::deserialize() to parse an arbitrary-format LLSD stream serialized by
anything but LLSDSerialize::serialize().
Change LLSDSerialize::deserialize() so that if no header is recognized,
instead of failing, it attempts to parse as notation. Add tests to exercise
this case.
The tricky part about this processing is that deserialize() necessarily reads
some number of bytes from the input stream first, to try to recognize the
header. If it fails to do so, it must prepend the bytes it has already read to
the rest of the input stream since they're probably the beginning of the
serialized data.
To support this use case, introduce cat_streambuf, a std::streambuf subclass
that (virtually) concatenates other std::streambuf instances. When read by a
std::istream, the sequence of underlying std::streambufs appears to the
consumer as a single continuous stream.
Diffstat (limited to 'indra/llcommon')
-rw-r--r-- | indra/llcommon/llsdserialize.cpp | 87 | ||||
-rw-r--r-- | indra/llcommon/llstreamtools.cpp | 26 | ||||
-rw-r--r-- | indra/llcommon/llstreamtools.h | 27 | ||||
-rw-r--r-- | indra/llcommon/tests/llsdserialize_test.cpp | 192 |
4 files changed, 233 insertions, 99 deletions
diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 8b4a0ee6d8..9ff4676b95 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -45,6 +45,7 @@ #endif #include "lldate.h" +#include "llmemorystream.h" #include "llsd.h" #include "llstring.h" #include "lluri.h" @@ -61,6 +62,23 @@ const std::string LLSD_NOTATION_HEADER("llsd/notation"); #define windowBits 15 #define ENABLE_ZLIB_GZIP 32 +// If we published this in llsdserialize.h, we could use it in the +// implementation of LLSDOStreamer's operator<<(). +template <class Formatter> +void format_using(const LLSD& data, std::ostream& ostr, + LLSDFormatter::EFormatterOptions options=LLSDFormatter::OPTIONS_PRETTY_BINARY) +{ + LLPointer<Formatter> f{ new Formatter }; + f->format(data, ostr, options); +} + +template <class Parser> +S32 parse_using(std::istream& istr, LLSD& data, size_t max_bytes, S32 max_depth=-1) +{ + LLPointer<Parser> p{ new Parser }; + return p->parse(istr, data, max_bytes, max_depth); +} + /** * LLSDSerialize */ @@ -83,10 +101,10 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize f = new LLSDXMLFormatter; break; - case LLSD_NOTATION: - str << "<? " << LLSD_NOTATION_HEADER << " ?>\n"; - f = new LLSDNotationFormatter; - break; + case LLSD_NOTATION: + str << "<? " << LLSD_NOTATION_HEADER << " ?>\n"; + f = new LLSDNotationFormatter; + break; default: LL_WARNS() << "serialize request for unknown ELLSD_Serialize" << LL_ENDL; @@ -101,10 +119,7 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize // static bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { - LLPointer<LLSDParser> p = NULL; char hdr_buf[MAX_HDR_LEN + 1] = ""; /* Flawfinder: ignore */ - int i; - int inbuf = 0; bool legacy_no_header = false; bool fail_if_not_legacy = false; std::string header; @@ -112,7 +127,10 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) /* * Get the first line before anything. */ - str.get(hdr_buf, MAX_HDR_LEN, '\n'); + // Remember str's original input position: if there's no header, we'll + // want to back up and retry. + str.get(hdr_buf, sizeof(hdr_buf), '\n'); + auto inbuf = str.gcount(); if (str.fail()) { str.clear(); @@ -122,16 +140,18 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) if (!strncasecmp(LEGACY_NON_HEADER, hdr_buf, strlen(LEGACY_NON_HEADER))) /* Flawfinder: ignore */ { legacy_no_header = true; - inbuf = (int)str.gcount(); } else { if (fail_if_not_legacy) - goto fail; + { + LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; + return false; + } /* * Remove the newline chars */ - for (i = 0; i < MAX_HDR_LEN; i++) + for (size_t i = 0; i < sizeof(hdr_buf); i++) { if (hdr_buf[i] == 0 || hdr_buf[i] == '\r' || hdr_buf[i] == '\n') @@ -149,50 +169,47 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { end = header.find_first_of(" ?", start); } - if ((start == std::string::npos) || (end == std::string::npos)) - goto fail; - - header = header.substr(start, end - start); - ws(str); + if (! (start == std::string::npos) || (end == std::string::npos)) + { + header = header.substr(start, end - start); + ws(str); + } } /* * Create the parser as appropriate */ if (legacy_no_header) { // Create a LLSD XML parser, and parse the first chunk read above - LLSDXMLParser* x = new LLSDXMLParser(); - x->parsePart(hdr_buf, inbuf); // Parse the first part that was already read - x->parseLines(str, sd); // Parse the rest of it - delete x; - return true; + LLSDXMLParser x; + x.parsePart(hdr_buf, inbuf); // Parse the first part that was already read + auto parsed = x.parseLines(str, sd); // Parse the rest of it + // Formally we should probably check (parsed != PARSE_FAILURE && + // parsed > 0), but since PARSE_FAILURE is -1, this suffices. + return (parsed > 0); } if (header == LLSD_BINARY_HEADER) { - p = new LLSDBinaryParser; + return (parse_using<LLSDBinaryParser>(str, sd, max_bytes) > 0); } else if (header == LLSD_XML_HEADER) { - p = new LLSDXMLParser; + return (parse_using<LLSDXMLParser>(str, sd, max_bytes) > 0); } else if (header == LLSD_NOTATION_HEADER) { - p = new LLSDNotationParser; + return (parse_using<LLSDNotationParser>(str, sd, max_bytes) > 0); } else { - LL_WARNS() << "deserialize request for unknown ELLSD_Serialize" << LL_ENDL; + LL_DEBUGS() << "deserialize request with no header, assuming notation" << LL_ENDL; + // Since we've already read 'inbuf' bytes into 'hdr_buf', prepend that + // data to whatever remains in 'str'. + LLMemoryStreamBuf already(reinterpret_cast<const U8*>(hdr_buf), inbuf); + cat_streambuf prebuff(&already, str.rdbuf()); + std::istream prepend(&prebuff); + return (parse_using<LLSDNotationParser>(prepend, sd, max_bytes) > 0); } - - if (p.notNull()) - { - p->parse(str, sd, max_bytes); - return true; - } - -fail: - LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; - return false; } /** diff --git a/indra/llcommon/llstreamtools.cpp b/indra/llcommon/llstreamtools.cpp index d7a6f47932..1674f6edc2 100644 --- a/indra/llcommon/llstreamtools.cpp +++ b/indra/llcommon/llstreamtools.cpp @@ -523,3 +523,29 @@ std::istream& operator>>(std::istream& str, const char *tocheck) } return str; } + +int cat_streambuf::underflow() +{ + if (gptr() == egptr()) + { + // here because our buffer is empty + std::streamsize size; + // Until we've run out of mInputs, try reading the first of them + // into mBuffer. If that fetches some characters, break the loop. + while (! mInputs.empty() + && ! (size = mInputs.front()->sgetn(mBuffer.data(), mBuffer.size()))) + { + // We tried to read mInputs.front() but got zero characters. + // Discard the first streambuf and try the next one. + mInputs.pop_front(); + } + // Either we ran out of mInputs or we succeeded in reading some + // characters, that is, size != 0. Tell base class what we have. + setg(mBuffer.data(), mBuffer.data(), mBuffer.data() + size); + } + // If we fell out of the above loop with mBuffer still empty, return + // eof(), otherwise return the next character. + return (gptr() == egptr()) + ? std::char_traits<char>::eof() + : std::char_traits<char>::to_int_type(*gptr()); +} diff --git a/indra/llcommon/llstreamtools.h b/indra/llcommon/llstreamtools.h index 1b04bf91d7..997f738840 100644 --- a/indra/llcommon/llstreamtools.h +++ b/indra/llcommon/llstreamtools.h @@ -27,8 +27,10 @@ #ifndef LL_STREAM_TOOLS_H #define LL_STREAM_TOOLS_H +#include <deque> #include <iostream> #include <string> +#include <vector> // unless specifed otherwise these all return input_stream.good() @@ -113,6 +115,27 @@ LL_COMMON_API std::streamsize fullread( LL_COMMON_API std::istream& operator>>(std::istream& str, const char *tocheck); -#endif - +/** + * cat_streambuf is a std::streambuf subclass that accepts a variadic number + * of std::streambuf* (e.g. some_istream.rdbuf()) and virtually concatenates + * their contents. + */ +// derived from https://stackoverflow.com/a/49441066/5533635 +class cat_streambuf: public std::streambuf +{ +private: + std::deque<std::streambuf*> mInputs; + std::vector<char> mBuffer; + +public: + // only valid for std::streambuf* arguments + template <typename... Inputs> + cat_streambuf(Inputs... inputs): + mInputs{inputs...}, + mBuffer(1024) + {} + + int underflow(); +}; +#endif diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index c246f5ee56..0334699c7f 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -60,6 +60,7 @@ using namespace boost::phoenix; #include "../test/lltut.h" #include "../test/namedtempfile.h" #include "stringize.h" +#include <functional> std::vector<U8> string_to_vector(const std::string& str) { @@ -112,7 +113,7 @@ namespace tut mSD = LLUUID::null; expected = "<llsd><uuid /></llsd>\n"; xml_test("null uuid", expected); - + mSD = LLUUID("c96f9b1e-f589-4100-9774-d98643ce0bed"); expected = "<llsd><uuid>c96f9b1e-f589-4100-9774-d98643ce0bed</uuid></llsd>\n"; xml_test("uuid", expected); @@ -136,7 +137,7 @@ namespace tut expected = "<llsd><binary encoding=\"base64\">aGVsbG8=</binary></llsd>\n"; xml_test("binary", expected); } - + template<> template<> void sd_xml_object::test<2>() { @@ -225,7 +226,7 @@ namespace tut expected = "<llsd><map><key>baz</key><undef /><key>foo</key><string>bar</string></map></llsd>\n"; xml_test("2 element map", expected); } - + template<> template<> void sd_xml_object::test<6>() { @@ -241,7 +242,7 @@ namespace tut expected = "<llsd><binary encoding=\"base64\">Nnw2fGFzZGZoYXBweWJveHw2MGU0NGVjNS0zMDVjLTQzYzItOWExOS1iNGI4OWIxYWUyYTZ8NjBlNDRlYzUtMzA1Yy00M2MyLTlhMTktYjRiODliMWFlMmE2fDYwZTQ0ZWM1LTMwNWMtNDNjMi05YTE5LWI0Yjg5YjFhZTJhNnwwMDAwMDAwMC0wMDAwLTAwMDAtMDAwMC0wMDAwMDAwMDAwMDB8N2ZmZmZmZmZ8N2ZmZmZmZmZ8MHwwfDgyMDAwfDQ1MGZlMzk0LTI5MDQtYzlhZC0yMTRjLWEwN2ViN2ZlZWMyOXwoTm8gRGVzY3JpcHRpb24pfDB8MTB8MA==</binary></llsd>\n"; xml_test("binary", expected); } - + class TestLLSDSerializeData { public: @@ -250,9 +251,34 @@ namespace tut void doRoundTripTests(const std::string&); void checkRoundTrip(const std::string&, const LLSD& v); - - LLPointer<LLSDFormatter> mFormatter; - LLPointer<LLSDParser> mParser; + + void setFormatterParser(LLPointer<LLSDFormatter> formatter, LLPointer<LLSDParser> parser) + { + mFormatter = [formatter](const LLSD& data, std::ostream& str) + { + formatter->format(data, str); + }; + // this lambda must be mutable since otherwise the bound 'parser' + // is assumed to point to a const LLSDParser + mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) mutable + { + // reset() call is needed since test code re-uses parser object + parser->reset(); + return (parser->parse(istr, data, max_bytes) > 0); + }; + } + + void setParser(bool (*parser)(LLSD&, std::istream&, size_t)) + { + // why does LLSDSerialize::deserialize() reverse the parse() params?? + mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) + { + return (parser(data, istr, max_bytes) > 0); + }; + } + + std::function<void(const LLSD& data, std::ostream& str)> mFormatter; + std::function<bool(std::istream& istr, LLSD& data, size_t max_bytes)> mParser; }; TestLLSDSerializeData::TestLLSDSerializeData() @@ -265,12 +291,11 @@ namespace tut void TestLLSDSerializeData::checkRoundTrip(const std::string& msg, const LLSD& v) { - std::stringstream stream; - mFormatter->format(v, stream); + std::stringstream stream; + mFormatter(v, stream); //LL_INFOS() << "checkRoundTrip: length " << stream.str().length() << LL_ENDL; LLSD w; - mParser->reset(); // reset() call is needed since test code re-uses mParser - mParser->parse(stream, w, stream.str().size()); + mParser(stream, w, stream.str().size()); try { @@ -299,52 +324,52 @@ namespace tut fillmap(root[key], width, depth - 1); } } - + void TestLLSDSerializeData::doRoundTripTests(const std::string& msg) { LLSD v; checkRoundTrip(msg + " undefined", v); - + v = true; checkRoundTrip(msg + " true bool", v); - + v = false; checkRoundTrip(msg + " false bool", v); - + v = 1; checkRoundTrip(msg + " positive int", v); - + v = 0; checkRoundTrip(msg + " zero int", v); - + v = -1; checkRoundTrip(msg + " negative int", v); - + v = 1234.5f; checkRoundTrip(msg + " positive float", v); - + v = 0.0f; checkRoundTrip(msg + " zero float", v); - + v = -1234.5f; checkRoundTrip(msg + " negative float", v); - + // FIXME: need a NaN test - + v = LLUUID::null; checkRoundTrip(msg + " null uuid", v); - + LLUUID newUUID; newUUID.generate(); v = newUUID; checkRoundTrip(msg + " new uuid", v); - + v = ""; checkRoundTrip(msg + " empty string", v); - + v = "some string"; checkRoundTrip(msg + " non-empty string", v); - + v = "Second Life is a 3-D virtual world entirely built and owned by its residents. " "Since opening to the public in 2003, it has grown explosively and today is " @@ -372,7 +397,7 @@ namespace tut for (U32 block = 0x000000; block <= 0x10ffff; block += block_size) { std::ostringstream out; - + for (U32 c = block; c < block + block_size; ++c) { if (c <= 0x000001f @@ -386,7 +411,7 @@ namespace tut if (0x00fdd0 <= c && c <= 0x00fdef) { continue; } if ((c & 0x00fffe) == 0x00fffe) { continue; } // see Unicode standard, section 15.8 - + if (c <= 0x00007f) { out << (char)(c & 0x7f); @@ -410,55 +435,55 @@ namespace tut out << (char)(0x80 | ((c >> 0) & 0x3f)); } } - + v = out.str(); std::ostringstream blockmsg; blockmsg << msg << " unicode string block 0x" << std::hex << block; checkRoundTrip(blockmsg.str(), v); } - + LLDate epoch; v = epoch; checkRoundTrip(msg + " epoch date", v); - + LLDate aDay("2002-12-07T05:07:15.00Z"); v = aDay; checkRoundTrip(msg + " date", v); - + LLURI path("http://slurl.com/secondlife/Ambleside/57/104/26/"); v = path; checkRoundTrip(msg + " url", v); - + const char source[] = "it must be a blue moon again"; std::vector<U8> data; // note, includes terminating '\0' copy(&source[0], &source[sizeof(source)], back_inserter(data)); - + v = data; checkRoundTrip(msg + " binary", v); - + v = LLSD::emptyMap(); checkRoundTrip(msg + " empty map", v); - + v = LLSD::emptyMap(); v["name"] = "luke"; //v.insert("name", "luke"); v["age"] = 3; //v.insert("age", 3); checkRoundTrip(msg + " map", v); - + v.clear(); v["a"]["1"] = true; v["b"]["0"] = false; checkRoundTrip(msg + " nested maps", v); - + v = LLSD::emptyArray(); checkRoundTrip(msg + " empty array", v); - + v = LLSD::emptyArray(); v.append("ali"); v.append(28); checkRoundTrip(msg + " array", v); - + v.clear(); v[0][0] = true; v[1][0] = false; @@ -468,7 +493,7 @@ namespace tut fillmap(v, 10, 3); // 10^6 maps checkRoundTrip(msg + " many nested maps", v); } - + typedef tut::test_group<TestLLSDSerializeData> TestLLSDSerializeGroup; typedef TestLLSDSerializeGroup::object TestLLSDSerializeObject; TestLLSDSerializeGroup gTestLLSDSerializeGroup("llsd serialization"); @@ -476,35 +501,78 @@ namespace tut template<> template<> void TestLLSDSerializeObject::test<1>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY), + new LLSDNotationParser()); doRoundTripTests("pretty binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<2>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); doRoundTripTests("raw binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<3>() { - mFormatter = new LLSDXMLFormatter(); - mParser = new LLSDXMLParser(); + setFormatterParser(new LLSDXMLFormatter(), new LLSDXMLParser()); doRoundTripTests("xml serialization"); } template<> template<> void TestLLSDSerializeObject::test<4>() { - mFormatter = new LLSDBinaryFormatter(); - mParser = new LLSDBinaryParser(); + setFormatterParser(new LLSDBinaryFormatter(), new LLSDBinaryParser()); doRoundTripTests("binary serialization"); } + template<> template<> + void TestLLSDSerializeObject::test<5>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_BINARY); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_BINARY)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<6>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_XML); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_XML)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<7>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_NOTATION); + }; + setParser(LLSDSerialize::deserialize); + // In this test, serialize(LLSD_NOTATION) emits a header recognized by + // deserialize(). + doRoundTripTests("serialize(LLSD_NOTATION)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<8>() + { + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDNotationFormatter does not + // emit an llsd/notation header. + doRoundTripTests("LLSDNotationFormatter -> deserialize"); + }; /** * @class TestLLSDParsing @@ -555,7 +623,7 @@ namespace tut public: TestLLSDXMLParsing() {} }; - + typedef tut::test_group<TestLLSDXMLParsing> TestLLSDXMLParsingGroup; typedef TestLLSDXMLParsingGroup::object TestLLSDXMLParsingObject; TestLLSDXMLParsingGroup gTestLLSDXMLParsingGroup("llsd XML parsing"); @@ -586,8 +654,8 @@ namespace tut LLSD(), LLSDParser::PARSE_FAILURE); } - - + + template<> template<> void TestLLSDXMLParsingObject::test<2>() { @@ -596,7 +664,7 @@ namespace tut v["amy"] = 23; v["bob"] = LLSD(); v["cam"] = 1.23; - + ensureParse( "unknown data type", "<llsd><map>" @@ -607,16 +675,16 @@ namespace tut v, v.size() + 1); } - + template<> template<> void TestLLSDXMLParsingObject::test<3>() { // test handling of nested bad data - + LLSD v; v["amy"] = 23; v["cam"] = 1.23; - + ensureParse( "map with html", "<llsd><map>" @@ -626,7 +694,7 @@ namespace tut "</map></llsd>", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["cam"] = 1.23; @@ -639,7 +707,7 @@ namespace tut "</map></llsd>", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["bob"] = LLSD::emptyMap(); @@ -661,7 +729,7 @@ namespace tut v[0] = 23; v[1] = LLSD(); v[2] = 1.23; - + ensureParse( "array value of html", "<llsd><array>" @@ -671,7 +739,7 @@ namespace tut "</array></llsd>", v, v.size() + 1); - + v.clear(); v[0] = 23; v[1] = LLSD::emptyMap(); @@ -1225,7 +1293,7 @@ namespace tut vec[0] = 'a'; vec[1] = 'b'; vec[2] = 'c'; vec[3] = '3'; vec[4] = '2'; vec[5] = '1'; LLSD value = vec; - + vec.resize(11); vec[0] = 'b'; // for binary vec[5] = 'a'; vec[6] = 'b'; vec[7] = 'c'; @@ -1909,6 +1977,6 @@ namespace tut "This string\n" "has several\n" "lines."); - + } } |