From abe62c23d74d5319691a49881719b03cc9b5b090 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 22 Nov 2022 16:21:44 -0500 Subject: SL-18330: Make LLSDSerialize::deserialize() default to notation. LLSDSerialize::serialize() emits a header string, e.g. "" for notation format. Until now, LLSDSerialize::deserialize() has required that header to properly decode the input stream. But none of LLSDBinaryFormatter, LLSDXMLFormatter or LLSDNotationFormatter emit that header themselves. Nor do any of the Python llsd.format_binary(), format_xml() or format_notation() functions. Until now, you could not use LLSD::deserialize() to parse an arbitrary-format LLSD stream serialized by anything but LLSDSerialize::serialize(). Change LLSDSerialize::deserialize() so that if no header is recognized, instead of failing, it attempts to parse as notation. Add tests to exercise this case. The tricky part about this processing is that deserialize() necessarily reads some number of bytes from the input stream first, to try to recognize the header. If it fails to do so, it must prepend the bytes it has already read to the rest of the input stream since they're probably the beginning of the serialized data. To support this use case, introduce cat_streambuf, a std::streambuf subclass that (virtually) concatenates other std::streambuf instances. When read by a std::istream, the sequence of underlying std::streambufs appears to the consumer as a single continuous stream. --- indra/llcommon/llsdserialize.cpp | 87 ++++++++----- indra/llcommon/llstreamtools.cpp | 26 ++++ indra/llcommon/llstreamtools.h | 27 +++- indra/llcommon/tests/llsdserialize_test.cpp | 192 +++++++++++++++++++--------- 4 files changed, 233 insertions(+), 99 deletions(-) diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 8b4a0ee6d8..9ff4676b95 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -45,6 +45,7 @@ #endif #include "lldate.h" +#include "llmemorystream.h" #include "llsd.h" #include "llstring.h" #include "lluri.h" @@ -61,6 +62,23 @@ const std::string LLSD_NOTATION_HEADER("llsd/notation"); #define windowBits 15 #define ENABLE_ZLIB_GZIP 32 +// If we published this in llsdserialize.h, we could use it in the +// implementation of LLSDOStreamer's operator<<(). +template +void format_using(const LLSD& data, std::ostream& ostr, + LLSDFormatter::EFormatterOptions options=LLSDFormatter::OPTIONS_PRETTY_BINARY) +{ + LLPointer f{ new Formatter }; + f->format(data, ostr, options); +} + +template +S32 parse_using(std::istream& istr, LLSD& data, size_t max_bytes, S32 max_depth=-1) +{ + LLPointer p{ new Parser }; + return p->parse(istr, data, max_bytes, max_depth); +} + /** * LLSDSerialize */ @@ -83,10 +101,10 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize f = new LLSDXMLFormatter; break; - case LLSD_NOTATION: - str << "\n"; - f = new LLSDNotationFormatter; - break; + case LLSD_NOTATION: + str << "\n"; + f = new LLSDNotationFormatter; + break; default: LL_WARNS() << "serialize request for unknown ELLSD_Serialize" << LL_ENDL; @@ -101,10 +119,7 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize // static bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { - LLPointer p = NULL; char hdr_buf[MAX_HDR_LEN + 1] = ""; /* Flawfinder: ignore */ - int i; - int inbuf = 0; bool legacy_no_header = false; bool fail_if_not_legacy = false; std::string header; @@ -112,7 +127,10 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) /* * Get the first line before anything. */ - str.get(hdr_buf, MAX_HDR_LEN, '\n'); + // Remember str's original input position: if there's no header, we'll + // want to back up and retry. + str.get(hdr_buf, sizeof(hdr_buf), '\n'); + auto inbuf = str.gcount(); if (str.fail()) { str.clear(); @@ -122,16 +140,18 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) if (!strncasecmp(LEGACY_NON_HEADER, hdr_buf, strlen(LEGACY_NON_HEADER))) /* Flawfinder: ignore */ { legacy_no_header = true; - inbuf = (int)str.gcount(); } else { if (fail_if_not_legacy) - goto fail; + { + LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; + return false; + } /* * Remove the newline chars */ - for (i = 0; i < MAX_HDR_LEN; i++) + for (size_t i = 0; i < sizeof(hdr_buf); i++) { if (hdr_buf[i] == 0 || hdr_buf[i] == '\r' || hdr_buf[i] == '\n') @@ -149,50 +169,47 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { end = header.find_first_of(" ?", start); } - if ((start == std::string::npos) || (end == std::string::npos)) - goto fail; - - header = header.substr(start, end - start); - ws(str); + if (! (start == std::string::npos) || (end == std::string::npos)) + { + header = header.substr(start, end - start); + ws(str); + } } /* * Create the parser as appropriate */ if (legacy_no_header) { // Create a LLSD XML parser, and parse the first chunk read above - LLSDXMLParser* x = new LLSDXMLParser(); - x->parsePart(hdr_buf, inbuf); // Parse the first part that was already read - x->parseLines(str, sd); // Parse the rest of it - delete x; - return true; + LLSDXMLParser x; + x.parsePart(hdr_buf, inbuf); // Parse the first part that was already read + auto parsed = x.parseLines(str, sd); // Parse the rest of it + // Formally we should probably check (parsed != PARSE_FAILURE && + // parsed > 0), but since PARSE_FAILURE is -1, this suffices. + return (parsed > 0); } if (header == LLSD_BINARY_HEADER) { - p = new LLSDBinaryParser; + return (parse_using(str, sd, max_bytes) > 0); } else if (header == LLSD_XML_HEADER) { - p = new LLSDXMLParser; + return (parse_using(str, sd, max_bytes) > 0); } else if (header == LLSD_NOTATION_HEADER) { - p = new LLSDNotationParser; + return (parse_using(str, sd, max_bytes) > 0); } else { - LL_WARNS() << "deserialize request for unknown ELLSD_Serialize" << LL_ENDL; + LL_DEBUGS() << "deserialize request with no header, assuming notation" << LL_ENDL; + // Since we've already read 'inbuf' bytes into 'hdr_buf', prepend that + // data to whatever remains in 'str'. + LLMemoryStreamBuf already(reinterpret_cast(hdr_buf), inbuf); + cat_streambuf prebuff(&already, str.rdbuf()); + std::istream prepend(&prebuff); + return (parse_using(prepend, sd, max_bytes) > 0); } - - if (p.notNull()) - { - p->parse(str, sd, max_bytes); - return true; - } - -fail: - LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; - return false; } /** diff --git a/indra/llcommon/llstreamtools.cpp b/indra/llcommon/llstreamtools.cpp index d7a6f47932..1674f6edc2 100644 --- a/indra/llcommon/llstreamtools.cpp +++ b/indra/llcommon/llstreamtools.cpp @@ -523,3 +523,29 @@ std::istream& operator>>(std::istream& str, const char *tocheck) } return str; } + +int cat_streambuf::underflow() +{ + if (gptr() == egptr()) + { + // here because our buffer is empty + std::streamsize size; + // Until we've run out of mInputs, try reading the first of them + // into mBuffer. If that fetches some characters, break the loop. + while (! mInputs.empty() + && ! (size = mInputs.front()->sgetn(mBuffer.data(), mBuffer.size()))) + { + // We tried to read mInputs.front() but got zero characters. + // Discard the first streambuf and try the next one. + mInputs.pop_front(); + } + // Either we ran out of mInputs or we succeeded in reading some + // characters, that is, size != 0. Tell base class what we have. + setg(mBuffer.data(), mBuffer.data(), mBuffer.data() + size); + } + // If we fell out of the above loop with mBuffer still empty, return + // eof(), otherwise return the next character. + return (gptr() == egptr()) + ? std::char_traits::eof() + : std::char_traits::to_int_type(*gptr()); +} diff --git a/indra/llcommon/llstreamtools.h b/indra/llcommon/llstreamtools.h index 1b04bf91d7..997f738840 100644 --- a/indra/llcommon/llstreamtools.h +++ b/indra/llcommon/llstreamtools.h @@ -27,8 +27,10 @@ #ifndef LL_STREAM_TOOLS_H #define LL_STREAM_TOOLS_H +#include #include #include +#include // unless specifed otherwise these all return input_stream.good() @@ -113,6 +115,27 @@ LL_COMMON_API std::streamsize fullread( LL_COMMON_API std::istream& operator>>(std::istream& str, const char *tocheck); -#endif - +/** + * cat_streambuf is a std::streambuf subclass that accepts a variadic number + * of std::streambuf* (e.g. some_istream.rdbuf()) and virtually concatenates + * their contents. + */ +// derived from https://stackoverflow.com/a/49441066/5533635 +class cat_streambuf: public std::streambuf +{ +private: + std::deque mInputs; + std::vector mBuffer; + +public: + // only valid for std::streambuf* arguments + template + cat_streambuf(Inputs... inputs): + mInputs{inputs...}, + mBuffer(1024) + {} + + int underflow(); +}; +#endif diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index c246f5ee56..0334699c7f 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -60,6 +60,7 @@ using namespace boost::phoenix; #include "../test/lltut.h" #include "../test/namedtempfile.h" #include "stringize.h" +#include std::vector string_to_vector(const std::string& str) { @@ -112,7 +113,7 @@ namespace tut mSD = LLUUID::null; expected = "\n"; xml_test("null uuid", expected); - + mSD = LLUUID("c96f9b1e-f589-4100-9774-d98643ce0bed"); expected = "c96f9b1e-f589-4100-9774-d98643ce0bed\n"; xml_test("uuid", expected); @@ -136,7 +137,7 @@ namespace tut expected = "aGVsbG8=\n"; xml_test("binary", expected); } - + template<> template<> void sd_xml_object::test<2>() { @@ -225,7 +226,7 @@ namespace tut expected = "bazfoobar\n"; xml_test("2 element map", expected); } - + template<> template<> void sd_xml_object::test<6>() { @@ -241,7 +242,7 @@ namespace tut expected = "Nnw2fGFzZGZoYXBweWJveHw2MGU0NGVjNS0zMDVjLTQzYzItOWExOS1iNGI4OWIxYWUyYTZ8NjBlNDRlYzUtMzA1Yy00M2MyLTlhMTktYjRiODliMWFlMmE2fDYwZTQ0ZWM1LTMwNWMtNDNjMi05YTE5LWI0Yjg5YjFhZTJhNnwwMDAwMDAwMC0wMDAwLTAwMDAtMDAwMC0wMDAwMDAwMDAwMDB8N2ZmZmZmZmZ8N2ZmZmZmZmZ8MHwwfDgyMDAwfDQ1MGZlMzk0LTI5MDQtYzlhZC0yMTRjLWEwN2ViN2ZlZWMyOXwoTm8gRGVzY3JpcHRpb24pfDB8MTB8MA==\n"; xml_test("binary", expected); } - + class TestLLSDSerializeData { public: @@ -250,9 +251,34 @@ namespace tut void doRoundTripTests(const std::string&); void checkRoundTrip(const std::string&, const LLSD& v); - - LLPointer mFormatter; - LLPointer mParser; + + void setFormatterParser(LLPointer formatter, LLPointer parser) + { + mFormatter = [formatter](const LLSD& data, std::ostream& str) + { + formatter->format(data, str); + }; + // this lambda must be mutable since otherwise the bound 'parser' + // is assumed to point to a const LLSDParser + mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) mutable + { + // reset() call is needed since test code re-uses parser object + parser->reset(); + return (parser->parse(istr, data, max_bytes) > 0); + }; + } + + void setParser(bool (*parser)(LLSD&, std::istream&, size_t)) + { + // why does LLSDSerialize::deserialize() reverse the parse() params?? + mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) + { + return (parser(data, istr, max_bytes) > 0); + }; + } + + std::function mFormatter; + std::function mParser; }; TestLLSDSerializeData::TestLLSDSerializeData() @@ -265,12 +291,11 @@ namespace tut void TestLLSDSerializeData::checkRoundTrip(const std::string& msg, const LLSD& v) { - std::stringstream stream; - mFormatter->format(v, stream); + std::stringstream stream; + mFormatter(v, stream); //LL_INFOS() << "checkRoundTrip: length " << stream.str().length() << LL_ENDL; LLSD w; - mParser->reset(); // reset() call is needed since test code re-uses mParser - mParser->parse(stream, w, stream.str().size()); + mParser(stream, w, stream.str().size()); try { @@ -299,52 +324,52 @@ namespace tut fillmap(root[key], width, depth - 1); } } - + void TestLLSDSerializeData::doRoundTripTests(const std::string& msg) { LLSD v; checkRoundTrip(msg + " undefined", v); - + v = true; checkRoundTrip(msg + " true bool", v); - + v = false; checkRoundTrip(msg + " false bool", v); - + v = 1; checkRoundTrip(msg + " positive int", v); - + v = 0; checkRoundTrip(msg + " zero int", v); - + v = -1; checkRoundTrip(msg + " negative int", v); - + v = 1234.5f; checkRoundTrip(msg + " positive float", v); - + v = 0.0f; checkRoundTrip(msg + " zero float", v); - + v = -1234.5f; checkRoundTrip(msg + " negative float", v); - + // FIXME: need a NaN test - + v = LLUUID::null; checkRoundTrip(msg + " null uuid", v); - + LLUUID newUUID; newUUID.generate(); v = newUUID; checkRoundTrip(msg + " new uuid", v); - + v = ""; checkRoundTrip(msg + " empty string", v); - + v = "some string"; checkRoundTrip(msg + " non-empty string", v); - + v = "Second Life is a 3-D virtual world entirely built and owned by its residents. " "Since opening to the public in 2003, it has grown explosively and today is " @@ -372,7 +397,7 @@ namespace tut for (U32 block = 0x000000; block <= 0x10ffff; block += block_size) { std::ostringstream out; - + for (U32 c = block; c < block + block_size; ++c) { if (c <= 0x000001f @@ -386,7 +411,7 @@ namespace tut if (0x00fdd0 <= c && c <= 0x00fdef) { continue; } if ((c & 0x00fffe) == 0x00fffe) { continue; } // see Unicode standard, section 15.8 - + if (c <= 0x00007f) { out << (char)(c & 0x7f); @@ -410,55 +435,55 @@ namespace tut out << (char)(0x80 | ((c >> 0) & 0x3f)); } } - + v = out.str(); std::ostringstream blockmsg; blockmsg << msg << " unicode string block 0x" << std::hex << block; checkRoundTrip(blockmsg.str(), v); } - + LLDate epoch; v = epoch; checkRoundTrip(msg + " epoch date", v); - + LLDate aDay("2002-12-07T05:07:15.00Z"); v = aDay; checkRoundTrip(msg + " date", v); - + LLURI path("http://slurl.com/secondlife/Ambleside/57/104/26/"); v = path; checkRoundTrip(msg + " url", v); - + const char source[] = "it must be a blue moon again"; std::vector data; // note, includes terminating '\0' copy(&source[0], &source[sizeof(source)], back_inserter(data)); - + v = data; checkRoundTrip(msg + " binary", v); - + v = LLSD::emptyMap(); checkRoundTrip(msg + " empty map", v); - + v = LLSD::emptyMap(); v["name"] = "luke"; //v.insert("name", "luke"); v["age"] = 3; //v.insert("age", 3); checkRoundTrip(msg + " map", v); - + v.clear(); v["a"]["1"] = true; v["b"]["0"] = false; checkRoundTrip(msg + " nested maps", v); - + v = LLSD::emptyArray(); checkRoundTrip(msg + " empty array", v); - + v = LLSD::emptyArray(); v.append("ali"); v.append(28); checkRoundTrip(msg + " array", v); - + v.clear(); v[0][0] = true; v[1][0] = false; @@ -468,7 +493,7 @@ namespace tut fillmap(v, 10, 3); // 10^6 maps checkRoundTrip(msg + " many nested maps", v); } - + typedef tut::test_group TestLLSDSerializeGroup; typedef TestLLSDSerializeGroup::object TestLLSDSerializeObject; TestLLSDSerializeGroup gTestLLSDSerializeGroup("llsd serialization"); @@ -476,35 +501,78 @@ namespace tut template<> template<> void TestLLSDSerializeObject::test<1>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY), + new LLSDNotationParser()); doRoundTripTests("pretty binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<2>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); doRoundTripTests("raw binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<3>() { - mFormatter = new LLSDXMLFormatter(); - mParser = new LLSDXMLParser(); + setFormatterParser(new LLSDXMLFormatter(), new LLSDXMLParser()); doRoundTripTests("xml serialization"); } template<> template<> void TestLLSDSerializeObject::test<4>() { - mFormatter = new LLSDBinaryFormatter(); - mParser = new LLSDBinaryParser(); + setFormatterParser(new LLSDBinaryFormatter(), new LLSDBinaryParser()); doRoundTripTests("binary serialization"); } + template<> template<> + void TestLLSDSerializeObject::test<5>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_BINARY); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_BINARY)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<6>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_XML); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_XML)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<7>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_NOTATION); + }; + setParser(LLSDSerialize::deserialize); + // In this test, serialize(LLSD_NOTATION) emits a header recognized by + // deserialize(). + doRoundTripTests("serialize(LLSD_NOTATION)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<8>() + { + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDNotationFormatter does not + // emit an llsd/notation header. + doRoundTripTests("LLSDNotationFormatter -> deserialize"); + }; /** * @class TestLLSDParsing @@ -555,7 +623,7 @@ namespace tut public: TestLLSDXMLParsing() {} }; - + typedef tut::test_group TestLLSDXMLParsingGroup; typedef TestLLSDXMLParsingGroup::object TestLLSDXMLParsingObject; TestLLSDXMLParsingGroup gTestLLSDXMLParsingGroup("llsd XML parsing"); @@ -586,8 +654,8 @@ namespace tut LLSD(), LLSDParser::PARSE_FAILURE); } - - + + template<> template<> void TestLLSDXMLParsingObject::test<2>() { @@ -596,7 +664,7 @@ namespace tut v["amy"] = 23; v["bob"] = LLSD(); v["cam"] = 1.23; - + ensureParse( "unknown data type", "" @@ -607,16 +675,16 @@ namespace tut v, v.size() + 1); } - + template<> template<> void TestLLSDXMLParsingObject::test<3>() { // test handling of nested bad data - + LLSD v; v["amy"] = 23; v["cam"] = 1.23; - + ensureParse( "map with html", "" @@ -626,7 +694,7 @@ namespace tut "", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["cam"] = 1.23; @@ -639,7 +707,7 @@ namespace tut "", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["bob"] = LLSD::emptyMap(); @@ -661,7 +729,7 @@ namespace tut v[0] = 23; v[1] = LLSD(); v[2] = 1.23; - + ensureParse( "array value of html", "" @@ -671,7 +739,7 @@ namespace tut "", v, v.size() + 1); - + v.clear(); v[0] = 23; v[1] = LLSD::emptyMap(); @@ -1225,7 +1293,7 @@ namespace tut vec[0] = 'a'; vec[1] = 'b'; vec[2] = 'c'; vec[3] = '3'; vec[4] = '2'; vec[5] = '1'; LLSD value = vec; - + vec.resize(11); vec[0] = 'b'; // for binary vec[5] = 'a'; vec[6] = 'b'; vec[7] = 'c'; @@ -1909,6 +1977,6 @@ namespace tut "This string\n" "has several\n" "lines."); - + } } -- cgit v1.2.3