From abe62c23d74d5319691a49881719b03cc9b5b090 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 22 Nov 2022 16:21:44 -0500 Subject: SL-18330: Make LLSDSerialize::deserialize() default to notation. LLSDSerialize::serialize() emits a header string, e.g. "" for notation format. Until now, LLSDSerialize::deserialize() has required that header to properly decode the input stream. But none of LLSDBinaryFormatter, LLSDXMLFormatter or LLSDNotationFormatter emit that header themselves. Nor do any of the Python llsd.format_binary(), format_xml() or format_notation() functions. Until now, you could not use LLSD::deserialize() to parse an arbitrary-format LLSD stream serialized by anything but LLSDSerialize::serialize(). Change LLSDSerialize::deserialize() so that if no header is recognized, instead of failing, it attempts to parse as notation. Add tests to exercise this case. The tricky part about this processing is that deserialize() necessarily reads some number of bytes from the input stream first, to try to recognize the header. If it fails to do so, it must prepend the bytes it has already read to the rest of the input stream since they're probably the beginning of the serialized data. To support this use case, introduce cat_streambuf, a std::streambuf subclass that (virtually) concatenates other std::streambuf instances. When read by a std::istream, the sequence of underlying std::streambufs appears to the consumer as a single continuous stream. --- indra/llcommon/llsdserialize.cpp | 87 ++++++++----- indra/llcommon/llstreamtools.cpp | 26 ++++ indra/llcommon/llstreamtools.h | 27 +++- indra/llcommon/tests/llsdserialize_test.cpp | 192 +++++++++++++++++++--------- 4 files changed, 233 insertions(+), 99 deletions(-) diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 8b4a0ee6d8..9ff4676b95 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -45,6 +45,7 @@ #endif #include "lldate.h" +#include "llmemorystream.h" #include "llsd.h" #include "llstring.h" #include "lluri.h" @@ -61,6 +62,23 @@ const std::string LLSD_NOTATION_HEADER("llsd/notation"); #define windowBits 15 #define ENABLE_ZLIB_GZIP 32 +// If we published this in llsdserialize.h, we could use it in the +// implementation of LLSDOStreamer's operator<<(). +template +void format_using(const LLSD& data, std::ostream& ostr, + LLSDFormatter::EFormatterOptions options=LLSDFormatter::OPTIONS_PRETTY_BINARY) +{ + LLPointer f{ new Formatter }; + f->format(data, ostr, options); +} + +template +S32 parse_using(std::istream& istr, LLSD& data, size_t max_bytes, S32 max_depth=-1) +{ + LLPointer p{ new Parser }; + return p->parse(istr, data, max_bytes, max_depth); +} + /** * LLSDSerialize */ @@ -83,10 +101,10 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize f = new LLSDXMLFormatter; break; - case LLSD_NOTATION: - str << "\n"; - f = new LLSDNotationFormatter; - break; + case LLSD_NOTATION: + str << "\n"; + f = new LLSDNotationFormatter; + break; default: LL_WARNS() << "serialize request for unknown ELLSD_Serialize" << LL_ENDL; @@ -101,10 +119,7 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize // static bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { - LLPointer p = NULL; char hdr_buf[MAX_HDR_LEN + 1] = ""; /* Flawfinder: ignore */ - int i; - int inbuf = 0; bool legacy_no_header = false; bool fail_if_not_legacy = false; std::string header; @@ -112,7 +127,10 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) /* * Get the first line before anything. */ - str.get(hdr_buf, MAX_HDR_LEN, '\n'); + // Remember str's original input position: if there's no header, we'll + // want to back up and retry. + str.get(hdr_buf, sizeof(hdr_buf), '\n'); + auto inbuf = str.gcount(); if (str.fail()) { str.clear(); @@ -122,16 +140,18 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) if (!strncasecmp(LEGACY_NON_HEADER, hdr_buf, strlen(LEGACY_NON_HEADER))) /* Flawfinder: ignore */ { legacy_no_header = true; - inbuf = (int)str.gcount(); } else { if (fail_if_not_legacy) - goto fail; + { + LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; + return false; + } /* * Remove the newline chars */ - for (i = 0; i < MAX_HDR_LEN; i++) + for (size_t i = 0; i < sizeof(hdr_buf); i++) { if (hdr_buf[i] == 0 || hdr_buf[i] == '\r' || hdr_buf[i] == '\n') @@ -149,50 +169,47 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { end = header.find_first_of(" ?", start); } - if ((start == std::string::npos) || (end == std::string::npos)) - goto fail; - - header = header.substr(start, end - start); - ws(str); + if (! (start == std::string::npos) || (end == std::string::npos)) + { + header = header.substr(start, end - start); + ws(str); + } } /* * Create the parser as appropriate */ if (legacy_no_header) { // Create a LLSD XML parser, and parse the first chunk read above - LLSDXMLParser* x = new LLSDXMLParser(); - x->parsePart(hdr_buf, inbuf); // Parse the first part that was already read - x->parseLines(str, sd); // Parse the rest of it - delete x; - return true; + LLSDXMLParser x; + x.parsePart(hdr_buf, inbuf); // Parse the first part that was already read + auto parsed = x.parseLines(str, sd); // Parse the rest of it + // Formally we should probably check (parsed != PARSE_FAILURE && + // parsed > 0), but since PARSE_FAILURE is -1, this suffices. + return (parsed > 0); } if (header == LLSD_BINARY_HEADER) { - p = new LLSDBinaryParser; + return (parse_using(str, sd, max_bytes) > 0); } else if (header == LLSD_XML_HEADER) { - p = new LLSDXMLParser; + return (parse_using(str, sd, max_bytes) > 0); } else if (header == LLSD_NOTATION_HEADER) { - p = new LLSDNotationParser; + return (parse_using(str, sd, max_bytes) > 0); } else { - LL_WARNS() << "deserialize request for unknown ELLSD_Serialize" << LL_ENDL; + LL_DEBUGS() << "deserialize request with no header, assuming notation" << LL_ENDL; + // Since we've already read 'inbuf' bytes into 'hdr_buf', prepend that + // data to whatever remains in 'str'. + LLMemoryStreamBuf already(reinterpret_cast(hdr_buf), inbuf); + cat_streambuf prebuff(&already, str.rdbuf()); + std::istream prepend(&prebuff); + return (parse_using(prepend, sd, max_bytes) > 0); } - - if (p.notNull()) - { - p->parse(str, sd, max_bytes); - return true; - } - -fail: - LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; - return false; } /** diff --git a/indra/llcommon/llstreamtools.cpp b/indra/llcommon/llstreamtools.cpp index d7a6f47932..1674f6edc2 100644 --- a/indra/llcommon/llstreamtools.cpp +++ b/indra/llcommon/llstreamtools.cpp @@ -523,3 +523,29 @@ std::istream& operator>>(std::istream& str, const char *tocheck) } return str; } + +int cat_streambuf::underflow() +{ + if (gptr() == egptr()) + { + // here because our buffer is empty + std::streamsize size; + // Until we've run out of mInputs, try reading the first of them + // into mBuffer. If that fetches some characters, break the loop. + while (! mInputs.empty() + && ! (size = mInputs.front()->sgetn(mBuffer.data(), mBuffer.size()))) + { + // We tried to read mInputs.front() but got zero characters. + // Discard the first streambuf and try the next one. + mInputs.pop_front(); + } + // Either we ran out of mInputs or we succeeded in reading some + // characters, that is, size != 0. Tell base class what we have. + setg(mBuffer.data(), mBuffer.data(), mBuffer.data() + size); + } + // If we fell out of the above loop with mBuffer still empty, return + // eof(), otherwise return the next character. + return (gptr() == egptr()) + ? std::char_traits::eof() + : std::char_traits::to_int_type(*gptr()); +} diff --git a/indra/llcommon/llstreamtools.h b/indra/llcommon/llstreamtools.h index 1b04bf91d7..997f738840 100644 --- a/indra/llcommon/llstreamtools.h +++ b/indra/llcommon/llstreamtools.h @@ -27,8 +27,10 @@ #ifndef LL_STREAM_TOOLS_H #define LL_STREAM_TOOLS_H +#include #include #include +#include // unless specifed otherwise these all return input_stream.good() @@ -113,6 +115,27 @@ LL_COMMON_API std::streamsize fullread( LL_COMMON_API std::istream& operator>>(std::istream& str, const char *tocheck); -#endif - +/** + * cat_streambuf is a std::streambuf subclass that accepts a variadic number + * of std::streambuf* (e.g. some_istream.rdbuf()) and virtually concatenates + * their contents. + */ +// derived from https://stackoverflow.com/a/49441066/5533635 +class cat_streambuf: public std::streambuf +{ +private: + std::deque mInputs; + std::vector mBuffer; + +public: + // only valid for std::streambuf* arguments + template + cat_streambuf(Inputs... inputs): + mInputs{inputs...}, + mBuffer(1024) + {} + + int underflow(); +}; +#endif diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index c246f5ee56..0334699c7f 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -60,6 +60,7 @@ using namespace boost::phoenix; #include "../test/lltut.h" #include "../test/namedtempfile.h" #include "stringize.h" +#include std::vector string_to_vector(const std::string& str) { @@ -112,7 +113,7 @@ namespace tut mSD = LLUUID::null; expected = "\n"; xml_test("null uuid", expected); - + mSD = LLUUID("c96f9b1e-f589-4100-9774-d98643ce0bed"); expected = "c96f9b1e-f589-4100-9774-d98643ce0bed\n"; xml_test("uuid", expected); @@ -136,7 +137,7 @@ namespace tut expected = "aGVsbG8=\n"; xml_test("binary", expected); } - + template<> template<> void sd_xml_object::test<2>() { @@ -225,7 +226,7 @@ namespace tut expected = "bazfoobar\n"; xml_test("2 element map", expected); } - + template<> template<> void sd_xml_object::test<6>() { @@ -241,7 +242,7 @@ namespace tut expected = "Nnw2fGFzZGZoYXBweWJveHw2MGU0NGVjNS0zMDVjLTQzYzItOWExOS1iNGI4OWIxYWUyYTZ8NjBlNDRlYzUtMzA1Yy00M2MyLTlhMTktYjRiODliMWFlMmE2fDYwZTQ0ZWM1LTMwNWMtNDNjMi05YTE5LWI0Yjg5YjFhZTJhNnwwMDAwMDAwMC0wMDAwLTAwMDAtMDAwMC0wMDAwMDAwMDAwMDB8N2ZmZmZmZmZ8N2ZmZmZmZmZ8MHwwfDgyMDAwfDQ1MGZlMzk0LTI5MDQtYzlhZC0yMTRjLWEwN2ViN2ZlZWMyOXwoTm8gRGVzY3JpcHRpb24pfDB8MTB8MA==\n"; xml_test("binary", expected); } - + class TestLLSDSerializeData { public: @@ -250,9 +251,34 @@ namespace tut void doRoundTripTests(const std::string&); void checkRoundTrip(const std::string&, const LLSD& v); - - LLPointer mFormatter; - LLPointer mParser; + + void setFormatterParser(LLPointer formatter, LLPointer parser) + { + mFormatter = [formatter](const LLSD& data, std::ostream& str) + { + formatter->format(data, str); + }; + // this lambda must be mutable since otherwise the bound 'parser' + // is assumed to point to a const LLSDParser + mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) mutable + { + // reset() call is needed since test code re-uses parser object + parser->reset(); + return (parser->parse(istr, data, max_bytes) > 0); + }; + } + + void setParser(bool (*parser)(LLSD&, std::istream&, size_t)) + { + // why does LLSDSerialize::deserialize() reverse the parse() params?? + mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) + { + return (parser(data, istr, max_bytes) > 0); + }; + } + + std::function mFormatter; + std::function mParser; }; TestLLSDSerializeData::TestLLSDSerializeData() @@ -265,12 +291,11 @@ namespace tut void TestLLSDSerializeData::checkRoundTrip(const std::string& msg, const LLSD& v) { - std::stringstream stream; - mFormatter->format(v, stream); + std::stringstream stream; + mFormatter(v, stream); //LL_INFOS() << "checkRoundTrip: length " << stream.str().length() << LL_ENDL; LLSD w; - mParser->reset(); // reset() call is needed since test code re-uses mParser - mParser->parse(stream, w, stream.str().size()); + mParser(stream, w, stream.str().size()); try { @@ -299,52 +324,52 @@ namespace tut fillmap(root[key], width, depth - 1); } } - + void TestLLSDSerializeData::doRoundTripTests(const std::string& msg) { LLSD v; checkRoundTrip(msg + " undefined", v); - + v = true; checkRoundTrip(msg + " true bool", v); - + v = false; checkRoundTrip(msg + " false bool", v); - + v = 1; checkRoundTrip(msg + " positive int", v); - + v = 0; checkRoundTrip(msg + " zero int", v); - + v = -1; checkRoundTrip(msg + " negative int", v); - + v = 1234.5f; checkRoundTrip(msg + " positive float", v); - + v = 0.0f; checkRoundTrip(msg + " zero float", v); - + v = -1234.5f; checkRoundTrip(msg + " negative float", v); - + // FIXME: need a NaN test - + v = LLUUID::null; checkRoundTrip(msg + " null uuid", v); - + LLUUID newUUID; newUUID.generate(); v = newUUID; checkRoundTrip(msg + " new uuid", v); - + v = ""; checkRoundTrip(msg + " empty string", v); - + v = "some string"; checkRoundTrip(msg + " non-empty string", v); - + v = "Second Life is a 3-D virtual world entirely built and owned by its residents. " "Since opening to the public in 2003, it has grown explosively and today is " @@ -372,7 +397,7 @@ namespace tut for (U32 block = 0x000000; block <= 0x10ffff; block += block_size) { std::ostringstream out; - + for (U32 c = block; c < block + block_size; ++c) { if (c <= 0x000001f @@ -386,7 +411,7 @@ namespace tut if (0x00fdd0 <= c && c <= 0x00fdef) { continue; } if ((c & 0x00fffe) == 0x00fffe) { continue; } // see Unicode standard, section 15.8 - + if (c <= 0x00007f) { out << (char)(c & 0x7f); @@ -410,55 +435,55 @@ namespace tut out << (char)(0x80 | ((c >> 0) & 0x3f)); } } - + v = out.str(); std::ostringstream blockmsg; blockmsg << msg << " unicode string block 0x" << std::hex << block; checkRoundTrip(blockmsg.str(), v); } - + LLDate epoch; v = epoch; checkRoundTrip(msg + " epoch date", v); - + LLDate aDay("2002-12-07T05:07:15.00Z"); v = aDay; checkRoundTrip(msg + " date", v); - + LLURI path("http://slurl.com/secondlife/Ambleside/57/104/26/"); v = path; checkRoundTrip(msg + " url", v); - + const char source[] = "it must be a blue moon again"; std::vector data; // note, includes terminating '\0' copy(&source[0], &source[sizeof(source)], back_inserter(data)); - + v = data; checkRoundTrip(msg + " binary", v); - + v = LLSD::emptyMap(); checkRoundTrip(msg + " empty map", v); - + v = LLSD::emptyMap(); v["name"] = "luke"; //v.insert("name", "luke"); v["age"] = 3; //v.insert("age", 3); checkRoundTrip(msg + " map", v); - + v.clear(); v["a"]["1"] = true; v["b"]["0"] = false; checkRoundTrip(msg + " nested maps", v); - + v = LLSD::emptyArray(); checkRoundTrip(msg + " empty array", v); - + v = LLSD::emptyArray(); v.append("ali"); v.append(28); checkRoundTrip(msg + " array", v); - + v.clear(); v[0][0] = true; v[1][0] = false; @@ -468,7 +493,7 @@ namespace tut fillmap(v, 10, 3); // 10^6 maps checkRoundTrip(msg + " many nested maps", v); } - + typedef tut::test_group TestLLSDSerializeGroup; typedef TestLLSDSerializeGroup::object TestLLSDSerializeObject; TestLLSDSerializeGroup gTestLLSDSerializeGroup("llsd serialization"); @@ -476,35 +501,78 @@ namespace tut template<> template<> void TestLLSDSerializeObject::test<1>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY), + new LLSDNotationParser()); doRoundTripTests("pretty binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<2>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); doRoundTripTests("raw binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<3>() { - mFormatter = new LLSDXMLFormatter(); - mParser = new LLSDXMLParser(); + setFormatterParser(new LLSDXMLFormatter(), new LLSDXMLParser()); doRoundTripTests("xml serialization"); } template<> template<> void TestLLSDSerializeObject::test<4>() { - mFormatter = new LLSDBinaryFormatter(); - mParser = new LLSDBinaryParser(); + setFormatterParser(new LLSDBinaryFormatter(), new LLSDBinaryParser()); doRoundTripTests("binary serialization"); } + template<> template<> + void TestLLSDSerializeObject::test<5>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_BINARY); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_BINARY)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<6>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_XML); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_XML)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<7>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_NOTATION); + }; + setParser(LLSDSerialize::deserialize); + // In this test, serialize(LLSD_NOTATION) emits a header recognized by + // deserialize(). + doRoundTripTests("serialize(LLSD_NOTATION)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<8>() + { + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDNotationFormatter does not + // emit an llsd/notation header. + doRoundTripTests("LLSDNotationFormatter -> deserialize"); + }; /** * @class TestLLSDParsing @@ -555,7 +623,7 @@ namespace tut public: TestLLSDXMLParsing() {} }; - + typedef tut::test_group TestLLSDXMLParsingGroup; typedef TestLLSDXMLParsingGroup::object TestLLSDXMLParsingObject; TestLLSDXMLParsingGroup gTestLLSDXMLParsingGroup("llsd XML parsing"); @@ -586,8 +654,8 @@ namespace tut LLSD(), LLSDParser::PARSE_FAILURE); } - - + + template<> template<> void TestLLSDXMLParsingObject::test<2>() { @@ -596,7 +664,7 @@ namespace tut v["amy"] = 23; v["bob"] = LLSD(); v["cam"] = 1.23; - + ensureParse( "unknown data type", "" @@ -607,16 +675,16 @@ namespace tut v, v.size() + 1); } - + template<> template<> void TestLLSDXMLParsingObject::test<3>() { // test handling of nested bad data - + LLSD v; v["amy"] = 23; v["cam"] = 1.23; - + ensureParse( "map with html", "" @@ -626,7 +694,7 @@ namespace tut "", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["cam"] = 1.23; @@ -639,7 +707,7 @@ namespace tut "", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["bob"] = LLSD::emptyMap(); @@ -661,7 +729,7 @@ namespace tut v[0] = 23; v[1] = LLSD(); v[2] = 1.23; - + ensureParse( "array value of html", "" @@ -671,7 +739,7 @@ namespace tut "", v, v.size() + 1); - + v.clear(); v[0] = 23; v[1] = LLSD::emptyMap(); @@ -1225,7 +1293,7 @@ namespace tut vec[0] = 'a'; vec[1] = 'b'; vec[2] = 'c'; vec[3] = '3'; vec[4] = '2'; vec[5] = '1'; LLSD value = vec; - + vec.resize(11); vec[0] = 'b'; // for binary vec[5] = 'a'; vec[6] = 'b'; vec[7] = 'c'; @@ -1909,6 +1977,6 @@ namespace tut "This string\n" "has several\n" "lines."); - + } } -- cgit v1.2.3 From 42e0787446d35bf967ff088148f8ad5f902e929b Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 23 Nov 2022 17:15:15 -0500 Subject: SL-18330: LLSDSerialize::deserialize() w/o hdr uses XML or notation Absent a header from LLSDSerialize::serialize(), make deserialize() distinguish between XML or notation by recognizing an initial '<'. --- indra/llcommon/llsdserialize.cpp | 87 ++++++++++++++--------------- indra/llcommon/tests/llsdserialize_test.cpp | 28 ++++++++++ 2 files changed, 70 insertions(+), 45 deletions(-) diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 9ff4676b95..dc8f8f5737 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -120,9 +120,7 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { char hdr_buf[MAX_HDR_LEN + 1] = ""; /* Flawfinder: ignore */ - bool legacy_no_header = false; bool fail_if_not_legacy = false; - std::string header; /* * Get the first line before anything. @@ -131,6 +129,7 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) // want to back up and retry. str.get(hdr_buf, sizeof(hdr_buf), '\n'); auto inbuf = str.gcount(); + std::string header{ hdr_buf, inbuf }; if (str.fail()) { str.clear(); @@ -138,38 +137,36 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) } if (!strncasecmp(LEGACY_NON_HEADER, hdr_buf, strlen(LEGACY_NON_HEADER))) /* Flawfinder: ignore */ + { // Create a LLSD XML parser, and parse the first chunk read above. + LLSDXMLParser x; + x.parsePart(hdr_buf, inbuf); // Parse the first part that was already read + auto parsed = x.parse(str, sd, max_bytes - inbuf); // Parse the rest of it + // Formally we should probably check (parsed != PARSE_FAILURE && + // parsed > 0), but since PARSE_FAILURE is -1, this suffices. + return (parsed > 0); + } + + if (fail_if_not_legacy) { - legacy_no_header = true; + LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; + return false; } - else + + /* + * Remove the newline chars + */ + auto lastchar = header.find_last_not_of("\r\n"); + if (lastchar != std::string::npos) { - if (fail_if_not_legacy) - { - LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; - return false; - } - /* - * Remove the newline chars - */ - for (size_t i = 0; i < sizeof(hdr_buf); i++) - { - if (hdr_buf[i] == 0 || hdr_buf[i] == '\r' || - hdr_buf[i] == '\n') - { - hdr_buf[i] = 0; - break; - } - } - header = hdr_buf; + header.erase(lastchar+1); + } - std::string::size_type start = std::string::npos; - std::string::size_type end = std::string::npos; - start = header.find_first_not_of(" header syntax + auto start = header.find_first_not_of(" 0), but since PARSE_FAILURE is -1, this suffices. - return (parsed > 0); - } - if (header == LLSD_BINARY_HEADER) { return (parse_using(str, sd, max_bytes) > 0); @@ -200,15 +187,27 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) { return (parse_using(str, sd, max_bytes) > 0); } - else + else // no header we recognize { - LL_DEBUGS() << "deserialize request with no header, assuming notation" << LL_ENDL; + LLPointer p; + if (inbuf && hdr_buf[0] == '<') + { + // looks like XML + LL_DEBUGS() << "deserialize request with no header, assuming XML" << LL_ENDL; + p = new LLSDXMLParser; + } + else + { + // assume notation + LL_DEBUGS() << "deserialize request with no header, assuming notation" << LL_ENDL; + p = new LLSDNotationParser; + } // Since we've already read 'inbuf' bytes into 'hdr_buf', prepend that // data to whatever remains in 'str'. LLMemoryStreamBuf already(reinterpret_cast(hdr_buf), inbuf); cat_streambuf prebuff(&already, str.rdbuf()); std::istream prepend(&prebuff); - return (parse_using(prepend, sd, max_bytes) > 0); + return (p->parse(prepend, sd, max_bytes) > 0); } } @@ -2411,5 +2410,3 @@ U8* unzip_llsdNavMesh( bool& valid, unsigned int& outsize, std::istream& is, S32 return result; } - - diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 0334699c7f..bbbc32214b 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -574,6 +574,34 @@ namespace tut doRoundTripTests("LLSDNotationFormatter -> deserialize"); }; + template<> template<> + void TestLLSDSerializeObject::test<9>() + { + setFormatterParser(new LLSDXMLFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDXMLParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDXMLFormatter does not + // emit an LLSD/XML header. + doRoundTripTests("LLSDXMLFormatter -> deserialize"); + }; + +/*==========================================================================*| + // We do not expect this test to succeed. Without a header, neither + // notation LLSD nor binary LLSD reliably start with a distinct character, + // the way XML LLSD starts with '<'. By convention, we default to notation + // rather than binary. + template<> template<> + void TestLLSDSerializeObject::test<10>() + { + setFormatterParser(new LLSDBinaryFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDBinaryParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDBinaryFormatter does not + // emit an LLSD/Binary header. + doRoundTripTests("LLSDBinaryFormatter -> deserialize"); + }; +|*==========================================================================*/ + /** * @class TestLLSDParsing * @brief Base class for of a parse tester. -- cgit v1.2.3 From b180e4de23cb54ec385e2d999fc5fdd4ea804ba4 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 29 Nov 2022 14:03:02 -0500 Subject: SL-18330: WIP: Send LLLeap to child as binary LLSD; generic parser. Since parsing binary LLSD is faster than parsing notation LLSD, send data from the viewer to the LEAP plugin child process's stdin in binary instead of notation. Similarly, instead of parsing the child process's stdout using specifically a notation parser, use the generic LLSDSerialize::deserialize() LLSD parser. Add more LLSDSerialize Python compatibility tests. --- indra/llcommon/llleap.cpp | 28 +-- indra/llcommon/tests/llsdserialize_test.cpp | 286 +++++++++++++++++++--------- 2 files changed, 211 insertions(+), 103 deletions(-) diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp index 2704f8b6de..f71eaf92c4 100644 --- a/indra/llcommon/llleap.cpp +++ b/indra/llcommon/llleap.cpp @@ -204,30 +204,35 @@ public: LLSD packet(LLSDMap("pump", pump)("data", data)); std::ostringstream buffer; - buffer << LLSDNotationStreamer(packet); + // SL-18330: for large data blocks, it's much faster to parse binary + // LLSD than notation LLSD. Use serialize(LLSD_BINARY) rather than + // directly calling LLSDBinaryFormatter because, unlike the latter, + // serialize() prepends the relevant header, needed by a general- + // purpose LLSD parser to distinguish binary from notation. + LLSDSerialize::serialize(packet, buffer, LLSDSerialize::LLSD_BINARY, + LLSDFormatter::OPTIONS_NONE); /*==========================================================================*| // DEBUGGING ONLY: don't copy str() if we can avoid it. std::string strdata(buffer.str()); if (std::size_t(buffer.tellp()) != strdata.length()) { - LL_ERRS("LLLeap") << "tellp() -> " << buffer.tellp() << " != " + LL_ERRS("LLLeap") << "tellp() -> " << static_cast(buffer.tellp()) << " != " << "str().length() -> " << strdata.length() << LL_ENDL; } // DEBUGGING ONLY: reading back is terribly inefficient. std::istringstream readback(strdata); LLSD echo; - LLPointer parser(new LLSDNotationParser()); - S32 parse_status(parser->parse(readback, echo, strdata.length())); - if (parse_status == LLSDParser::PARSE_FAILURE) + bool parse_status(LLSDSerialize::deserialize(echo, readback, strdata.length())); + if (! parse_status) { - LL_ERRS("LLLeap") << "LLSDNotationParser() cannot parse output of " - << "LLSDNotationStreamer()" << LL_ENDL; + LL_ERRS("LLLeap") << "LLSDSerialize::deserialize() cannot parse output of " + << "LLSDSerialize::serialize(LLSD_BINARY)" << LL_ENDL; } if (! llsd_equals(echo, packet)) { - LL_ERRS("LLLeap") << "LLSDNotationParser() produced different LLSD " - << "than passed to LLSDNotationStreamer()" << LL_ENDL; + LL_ERRS("LLLeap") << "LLSDSerialize::deserialize() returned different LLSD " + << "than passed to LLSDSerialize::serialize()" << LL_ENDL; } |*==========================================================================*/ @@ -312,9 +317,8 @@ public: LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got " << childout.size() << ", parsing LLSD" << LL_ENDL; LLSD data; - LLPointer parser(new LLSDNotationParser()); - S32 parse_status(parser->parse(childout.get_istream(), data, mExpect)); - if (parse_status == LLSDParser::PARSE_FAILURE) + bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect)); + if (! parse_status) { bad_protocol("unparseable LLSD data"); } diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index bbbc32214b..601f2c580d 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -46,7 +46,6 @@ typedef U32 uint32_t; #include "boost/range.hpp" #include "boost/foreach.hpp" -#include "boost/function.hpp" #include "boost/bind.hpp" #include "boost/phoenix/bind/bind_function.hpp" #include "boost/phoenix/core/argument.hpp" @@ -62,6 +61,9 @@ using namespace boost::phoenix; #include "stringize.h" #include +typedef std::function FormatterFunction; +typedef std::function ParserFunction; + std::vector string_to_vector(const std::string& str) { return std::vector(str.begin(), str.end()); @@ -277,8 +279,8 @@ namespace tut }; } - std::function mFormatter; - std::function mParser; + FormatterFunction mFormatter; + ParserFunction mParser; }; TestLLSDSerializeData::TestLLSDSerializeData() @@ -1790,85 +1792,83 @@ namespace tut ensureBinaryAndXML("map", test); } - struct TestPythonCompatible + // helper for TestPythonCompatible + static std::string import_llsd("import os.path\n" + "import sys\n" + "try:\n" + // new freestanding llsd package + " import llsd\n" + "except ImportError:\n" + // older llbase.llsd module + " from llbase import llsd\n"); + + // helper for TestPythonCompatible + template + void python(const std::string& desc, const CONTENT& script, int expect=0) { - TestPythonCompatible(): - // Note the peculiar insertion of __FILE__ into this string. Since - // this script is being written into a platform-dependent temp - // directory, we can't locate indra/lib/python relative to - // Python's __file__. Use __FILE__ instead, navigating relative - // to this C++ source file. Use Python raw-string syntax so - // Windows pathname backslashes won't mislead Python's string - // scanner. - import_llsd("import os.path\n" - "import sys\n" - "from llbase import llsd\n") - {} - ~TestPythonCompatible() {} + auto PYTHON(LLStringUtil::getenv("PYTHON")); + ensure("Set $PYTHON to the Python interpreter", !PYTHON.empty()); - std::string import_llsd; + NamedTempFile scriptfile("py", script); - template - void python(const std::string& desc, const CONTENT& script, int expect=0) +#if LL_WINDOWS + std::string q("\""); + std::string qPYTHON(q + PYTHON + q); + std::string qscript(q + scriptfile.getName() + q); + int rc = _spawnl(_P_WAIT, PYTHON.c_str(), qPYTHON.c_str(), qscript.c_str(), NULL); + if (rc == -1) { - auto PYTHON(LLStringUtil::getenv("PYTHON")); - ensure("Set $PYTHON to the Python interpreter", !PYTHON.empty()); - - NamedTempFile scriptfile("py", script); + char buffer[256]; + strerror_s(buffer, errno); // C++ can infer the buffer size! :-O + ensure(STRINGIZE("Couldn't run Python " << desc << "script: " << buffer), false); + } + else + { + ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), rc, expect); + } -#if LL_WINDOWS - std::string q("\""); - std::string qPYTHON(q + PYTHON + q); - std::string qscript(q + scriptfile.getName() + q); - int rc = _spawnl(_P_WAIT, PYTHON.c_str(), qPYTHON.c_str(), qscript.c_str(), NULL); - if (rc == -1) - { - char buffer[256]; - strerror_s(buffer, errno); // C++ can infer the buffer size! :-O - ensure(STRINGIZE("Couldn't run Python " << desc << "script: " << buffer), false); - } - else +#else // LL_DARWIN, LL_LINUX + LLProcess::Params params; + params.executable = PYTHON; + params.args.add(scriptfile.getName()); + LLProcessPtr py(LLProcess::create(params)); + ensure(STRINGIZE("Couldn't launch " << desc << " script"), bool(py)); + // Implementing timeout would mean messing with alarm() and + // catching SIGALRM... later maybe... + int status(0); + if (waitpid(py->getProcessID(), &status, 0) == -1) + { + int waitpid_errno(errno); + ensure_equals(STRINGIZE("Couldn't retrieve rc from " << desc << " script: " + "waitpid() errno " << waitpid_errno), + waitpid_errno, ECHILD); + } + else + { + if (WIFEXITED(status)) { - ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), rc, expect); + int rc(WEXITSTATUS(status)); + ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), + rc, expect); } - -#else // LL_DARWIN, LL_LINUX - LLProcess::Params params; - params.executable = PYTHON; - params.args.add(scriptfile.getName()); - LLProcessPtr py(LLProcess::create(params)); - ensure(STRINGIZE("Couldn't launch " << desc << " script"), bool(py)); - // Implementing timeout would mean messing with alarm() and - // catching SIGALRM... later maybe... - int status(0); - if (waitpid(py->getProcessID(), &status, 0) == -1) + else if (WIFSIGNALED(status)) { - int waitpid_errno(errno); - ensure_equals(STRINGIZE("Couldn't retrieve rc from " << desc << " script: " - "waitpid() errno " << waitpid_errno), - waitpid_errno, ECHILD); + ensure(STRINGIZE(desc << " script terminated by signal " << WTERMSIG(status)), + false); } else { - if (WIFEXITED(status)) - { - int rc(WEXITSTATUS(status)); - ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), - rc, expect); - } - else if (WIFSIGNALED(status)) - { - ensure(STRINGIZE(desc << " script terminated by signal " << WTERMSIG(status)), - false); - } - else - { - ensure(STRINGIZE(desc << " script produced impossible status " << status), - false); - } + ensure(STRINGIZE(desc << " script produced impossible status " << status), + false); } -#endif } +#endif + } + + struct TestPythonCompatible + { + TestPythonCompatible() {} + ~TestPythonCompatible() {} }; typedef tut::test_group TestPythonCompatibleGroup; @@ -1894,12 +1894,13 @@ namespace tut "print('Running on', sys.platform)\n"); } - // helper for test<3> - static void writeLLSDArray(std::ostream& out, const LLSD& array) + // helper for test<3> - test<7> + static void writeLLSDArray(const FormatterFunction& serialize, + std::ostream& out, const LLSD& array) { BOOST_FOREACH(LLSD item, llsd::inArray(array)) { - LLSDSerialize::toNotation(item, out); + serialize(item, out); // It's important to separate with newlines because Python's llsd // module doesn't support parsing from a file stream, only from a // string, so we have to know how much of the file to read into a @@ -1908,11 +1909,10 @@ namespace tut } } - template<> template<> - void TestPythonCompatibleObject::test<3>() + // helper for test<3> - test<7> + static void toPythonUsing(const std::string& desc, + const FormatterFunction& serialize) { - set_test_name("verify sequence to Python"); - LLSD cdata(LLSDArray(17)(3.14) ("This string\n" "has several\n" @@ -1941,9 +1941,11 @@ namespace tut // takes a callable. To this callable it passes the // std::ostream with which it's writing the // NamedTempFile. - boost::bind(writeLLSDArray, _1, cdata)); + [serialize, cdata] + (std::ostream& out) + { writeLLSDArray(serialize, out, cdata); }); - python("read C++ notation", + python("read C++ " + desc, placeholders::arg1 << import_llsd << "def parse_each(iterable):\n" @@ -1954,17 +1956,70 @@ namespace tut "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n"); } + template<> template<> + void TestPythonCompatibleObject::test<3>() + { + set_test_name("to Python using LLSDSerialize::serialize(LLSD_XML)"); + toPythonUsing("LLSD_XML", + [](const LLSD& sd, std::ostream& out) + { LLSDSerialize::serialize(sd, out, LLSDSerialize::LLSD_XML); }); + } + template<> template<> void TestPythonCompatibleObject::test<4>() { - set_test_name("verify sequence from Python"); + set_test_name("to Python using LLSDSerialize::serialize(LLSD_NOTATION)"); + toPythonUsing("LLSD_NOTATION", + [](const LLSD& sd, std::ostream& out) + { LLSDSerialize::serialize(sd, out, LLSDSerialize::LLSD_NOTATION); }); + } + + template<> template<> + void TestPythonCompatibleObject::test<5>() + { + set_test_name("to Python using LLSDSerialize::serialize(LLSD_BINARY)"); + toPythonUsing("LLSD_BINARY", + [](const LLSD& sd, std::ostream& out) + { LLSDSerialize::serialize(sd, out, LLSDSerialize::LLSD_BINARY); }); + } + + template<> template<> + void TestPythonCompatibleObject::test<6>() + { + set_test_name("to Python using LLSDSerialize::toXML()"); + toPythonUsing("toXML()", LLSDSerialize::toXML); + } + + template<> template<> + void TestPythonCompatibleObject::test<7>() + { + set_test_name("to Python using LLSDSerialize::toNotation()"); + toPythonUsing("toNotation()", LLSDSerialize::toNotation); + } + +/*==========================================================================*| + template<> template<> + void TestPythonCompatibleObject::test<8>() + { + set_test_name("to Python using LLSDSerialize::toBinary()"); + // We don't expect this to work because, without a header, + // llsd.parse() will assume notation rather than binary. + toPythonUsing("toBinary()", LLSDSerialize::toBinary); + } +|*==========================================================================*/ + // helper for test<8> - test<12> + void fromPythonUsing(const std::string& pyformatter, + const ParserFunction& parse= + [](std::istream& istr, LLSD& data, size_t max_bytes) + { return LLSDSerialize::deserialize(data, istr, max_bytes); }) + { // Create an empty data file. This is just a placeholder for our // script to write into. Create it to establish a unique name that // we know. NamedTempFile file("llsd", ""); - python("write Python notation", + python("Python " + pyformatter, placeholders::arg1 << import_llsd << "DATA = [\n" @@ -1977,9 +2032,9 @@ namespace tut "]\n" // Don't forget raw-string syntax for Windows pathnames. // N.B. Using 'print' implicitly adds newlines. - "with open(r'" << file.getName() << "', 'w') as f:\n" + "with open(r'" << file.getName() << "', 'wb') as f:\n" " for item in DATA:\n" - " print(llsd.format_notation(item).decode(), file=f)\n"); + " print(llsd." << pyformatter << "(item), file=f)\n"); std::ifstream inf(file.getName().c_str()); LLSD item; @@ -1989,22 +2044,71 @@ namespace tut // want to ensure that notation-separated-by-newlines works in both // directions -- since in practice, a given file might be read by // either language. - ensure_equals("Failed to read LLSD::Integer from Python", - LLSDSerialize::fromNotation(item, inf, LLSDSerialize::SIZE_UNLIMITED), - 1); + ensure("Failed to read LLSD::Integer from Python", + parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); ensure_equals(item.asInteger(), 17); - ensure_equals("Failed to read LLSD::Real from Python", - LLSDSerialize::fromNotation(item, inf, LLSDSerialize::SIZE_UNLIMITED), - 1); + ensure("Failed to read LLSD::Real from Python", + parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); ensure_approximately_equals("Bad LLSD::Real value from Python", item.asReal(), 3.14, 7); // 7 bits ~= 0.01 - ensure_equals("Failed to read LLSD::String from Python", - LLSDSerialize::fromNotation(item, inf, LLSDSerialize::SIZE_UNLIMITED), - 1); + ensure("Failed to read LLSD::String from Python", + parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); ensure_equals(item.asString(), "This string\n" "has several\n" "lines."); + } + template<> template<> + void TestPythonCompatibleObject::test<8>() + { + set_test_name("from Python XML using LLSDSerialize::deserialize()"); + fromPythonUsing("format_xml"); } + + template<> template<> + void TestPythonCompatibleObject::test<9>() + { + set_test_name("from Python notation using LLSDSerialize::deserialize()"); + fromPythonUsing("format_notation"); + } + + template<> template<> + void TestPythonCompatibleObject::test<10>() + { + set_test_name("from Python binary using LLSDSerialize::deserialize()"); + fromPythonUsing("format_binary"); + } + + template<> template<> + void TestPythonCompatibleObject::test<11>() + { + set_test_name("from Python XML using fromXML()"); + // fromXML()'s optional 3rd param isn't max_bytes, it's emit_errors + fromPythonUsing("format_xml", + [](std::istream& istr, LLSD& data, size_t) + { return LLSDSerialize::fromXML(data, istr) > 0; }); + } + + template<> template<> + void TestPythonCompatibleObject::test<12>() + { + set_test_name("from Python notation using fromNotation()"); + fromPythonUsing("format_notation", + [](std::istream& istr, LLSD& data, size_t max_bytes) + { return LLSDSerialize::fromNotation(data, istr, max_bytes) > 0; }); + } + +/*==========================================================================*| + template<> template<> + void TestPythonCompatibleObject::test<13>() + { + set_test_name("from Python binary using fromBinary()"); + // We don't expect this to work because format_binary() emits a + // header, but fromBinary() won't recognize a header. + fromPythonUsing("format_binary", + [](std::istream& istr, LLSD& data, size_t max_bytes) + { return LLSDSerialize::fromBinary(data, istr, max_bytes) > 0; }); + } +|*==========================================================================*/ } -- cgit v1.2.3 From 2f557cd7faceec36acace1eee4ee38904ff06130 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 2 Dec 2022 15:17:56 -0500 Subject: SL-18330: Fix new C++ <-> Python LLSD compatibility tests. When sending multiple LEAP packets in the same file (for testing convenience), use a length prefix instead of delimiting with '\n'. Now that we allow a serialization format that includes an LLSD format header (e.g. ""), '\n' is part of the packet content. But in fact, testing binary LLSD means we can't pick any delimiter guaranteed not to appear in the packet content. Using a length prefix also lets us pass a specific max_bytes to the subject C++ LLSD parser. Make llleap_test.cpp use new freestanding Python llsd package when available. Update Python-side LEAP protocol code to work directly with encoded bytes stream, avoiding bytes<->str encoding and decoding, which breaks binary LLSD. Make LLSDSerialize::deserialize() recognize LLSD format header case- insensitively. Python emits and checks for "llsd/binary", while LLSDSerialize emits and checks for "LLSD/Binary". Once any of the headers is recognized, pass corrected max_bytes to the specific parser. Make deserialize() more careful about the no-header case: preserve '\n' in content. Introduce debugging code (disabled) because it's a little tricky to recreate. Revert LLLeap child process stdout parser from LLSDSerialize::deserialize() to the specific LLSDNotationParser(), as at present: the generic parser fails one of LLLeap's integration tests for reasons that remain mysterious. --- indra/llcommon/llleap.cpp | 9 +++ indra/llcommon/llsdserialize.cpp | 49 +++++++++--- indra/llcommon/tests/llleap_test.cpp | 32 ++++---- indra/llcommon/tests/llsdserialize_test.cpp | 118 +++++++++++++++++++--------- 4 files changed, 147 insertions(+), 61 deletions(-) diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp index f71eaf92c4..e8ccc4300d 100644 --- a/indra/llcommon/llleap.cpp +++ b/indra/llcommon/llleap.cpp @@ -317,8 +317,17 @@ public: LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got " << childout.size() << ", parsing LLSD" << LL_ENDL; LLSD data; +#if 1 + // specifically require notation LLSD from child + LLPointer parser(new LLSDNotationParser()); + S32 parse_status(parser->parse(childout.get_istream(), data, mExpect)); + if (parse_status == LLSDParser::PARSE_FAILURE) +#else + // SL-18330: accept any valid LLSD serialization format from child + // Unfortunately this runs into trouble we have not yet debugged. bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect)); if (! parse_status) +#endif { bad_protocol("unparseable LLSD data"); } diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index dc8f8f5737..97bf51eeaa 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -123,12 +123,26 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) bool fail_if_not_legacy = false; /* - * Get the first line before anything. + * Get the first line before anything. Don't read more than max_bytes: + * this get() overload reads no more than (count-1) bytes into the + * specified buffer. In the usual case when max_bytes exceeds + * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2. */ - // Remember str's original input position: if there's no header, we'll - // want to back up and retry. - str.get(hdr_buf, sizeof(hdr_buf), '\n'); + str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n'); auto inbuf = str.gcount(); + // https://en.cppreference.com/w/cpp/io/basic_istream/get + // When the get() above sees the specified delimiter '\n', it stops there + // without pulling it from the stream. If it turns out that the stream + // does NOT contain a header, and the content includes meaningful '\n', + // it's important to pull that into hdr_buf too. + if (inbuf < max_bytes && str.get(hdr_buf[inbuf])) + { + // got the delimiting '\n' + ++inbuf; + // None of the following requires that hdr_buf contain a final '\0' + // byte. We could store one if needed, since even the incremented + // inbuf won't exceed sizeof(hdr_buf)-1, but there's no need. + } std::string header{ hdr_buf, inbuf }; if (str.fail()) { @@ -175,17 +189,17 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) /* * Create the parser as appropriate */ - if (header == LLSD_BINARY_HEADER) + if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER)) { - return (parse_using(str, sd, max_bytes) > 0); + return (parse_using(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_XML_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER)) { - return (parse_using(str, sd, max_bytes) > 0); + return (parse_using(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_NOTATION_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER)) { - return (parse_using(str, sd, max_bytes) > 0); + return (parse_using(str, sd, max_bytes-inbuf) > 0); } else // no header we recognize { @@ -207,7 +221,22 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) LLMemoryStreamBuf already(reinterpret_cast(hdr_buf), inbuf); cat_streambuf prebuff(&already, str.rdbuf()); std::istream prepend(&prebuff); +#if 1 return (p->parse(prepend, sd, max_bytes) > 0); +#else + // debugging the reconstituted 'prepend' stream + // allocate a buffer that we hope is big enough for the whole thing + std::vector wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes); + prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size())); + LLMemoryStream replay(reinterpret_cast(wholemsg.data()), prepend.gcount()); + auto success{ p->parse(replay, sd, prepend.gcount()) > 0 }; + { + LL_DEBUGS() << (success? "parsed: $$" : "failed: '") + << std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$" + << LL_ENDL; + } + return success; +#endif } } diff --git a/indra/llcommon/tests/llleap_test.cpp b/indra/llcommon/tests/llleap_test.cpp index 9754353ab0..6c799b7993 100644 --- a/indra/llcommon/tests/llleap_test.cpp +++ b/indra/llcommon/tests/llleap_test.cpp @@ -110,12 +110,12 @@ namespace tut "import os\n" "import sys\n" "\n" - // Don't forget that this Python script is written to some - // temp directory somewhere! Its __file__ is useless in - // finding indra/lib/python. Use our __FILE__, with - // raw-string syntax to deal with Windows pathnames. - "mydir = os.path.dirname(r'" << __FILE__ << "')\n" - "from llbase import llsd\n" + "try:\n" + // new freestanding llsd package + " import llsd\n" + "except ImportError:\n" + // older llbase.llsd module + " from llbase import llsd\n" "\n" "class ProtocolError(Exception):\n" " def __init__(self, msg, data):\n" @@ -126,26 +126,26 @@ namespace tut " pass\n" "\n" "def get():\n" - " hdr = ''\n" - " while ':' not in hdr and len(hdr) < 20:\n" - " hdr += sys.stdin.read(1)\n" + " hdr = []\n" + " while b':' not in hdr and len(hdr) < 20:\n" + " hdr.append(sys.stdin.buffer.read(1))\n" " if not hdr:\n" " sys.exit(0)\n" - " if not hdr.endswith(':'):\n" + " if not hdr[-1] == b':':\n" " raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n" " try:\n" - " length = int(hdr[:-1])\n" + " length = int(b''.join(hdr[:-1]))\n" " except ValueError:\n" " raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n" " parts = []\n" " received = 0\n" " while received < length:\n" - " parts.append(sys.stdin.read(length - received))\n" + " parts.append(sys.stdin.buffer.read(length - received))\n" " received += len(parts[-1])\n" - " data = ''.join(parts)\n" + " data = b''.join(parts)\n" " assert len(data) == length\n" " try:\n" - " return llsd.parse(data.encode())\n" + " return llsd.parse(data)\n" // Seems the old indra.base.llsd module didn't properly // convert IndexError (from running off end of string) to // LLSDParseError. @@ -185,11 +185,11 @@ namespace tut " return _reply\n" "\n" "def put(req):\n" - " sys.stdout.write(':'.join((str(len(req)), req)))\n" + " sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n" " sys.stdout.flush()\n" "\n" "def send(pump, data):\n" - " put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n" + " put(llsd.format_notation(dict(pump=pump, data=data)))\n" "\n" "def request(pump, data):\n" " # we expect 'data' is a dict\n" diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 601f2c580d..39b93cf335 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -51,10 +51,11 @@ typedef U32 uint32_t; #include "boost/phoenix/core/argument.hpp" using namespace boost::phoenix; -#include "../llsd.h" -#include "../llsdserialize.h" +#include "llsd.h" +#include "llsdserialize.h" #include "llsdutil.h" -#include "../llformat.h" +#include "llformat.h" +#include "llmemorystream.h" #include "../test/lltut.h" #include "../test/namedtempfile.h" @@ -1898,14 +1899,22 @@ namespace tut static void writeLLSDArray(const FormatterFunction& serialize, std::ostream& out, const LLSD& array) { - BOOST_FOREACH(LLSD item, llsd::inArray(array)) + for (const LLSD& item : llsd::inArray(array)) { - serialize(item, out); - // It's important to separate with newlines because Python's llsd - // module doesn't support parsing from a file stream, only from a - // string, so we have to know how much of the file to read into a - // string. - out << '\n'; + // It's important to delimit the entries in this file somehow + // because, although Python's llsd.parse() can accept a file + // stream, the XML parser expects EOF after a single outer element + // -- it doesn't just stop. So we must extract a sequence of bytes + // strings from the file. But since one of the serialization + // formats we want to test is binary, we can't pick any single + // byte value as a delimiter! Use a binary integer length prefix + // instead. + std::ostringstream buffer; + serialize(item, buffer); + auto buffstr{ buffer.str() }; + int bufflen{ static_cast(buffstr.length()) }; + out.write(reinterpret_cast(&bufflen), sizeof(bufflen)); + out.write(buffstr.c_str(), buffstr.length()); } } @@ -1932,7 +1941,7 @@ namespace tut " except StopIteration:\n" " pass\n" " else:\n" - " assert False, 'Too many data items'\n"; + " raise AssertionError('Too many data items')\n"; // Create an llsdXXXXXX file containing 'data' serialized to // notation. @@ -1948,10 +1957,23 @@ namespace tut python("read C++ " + desc, placeholders::arg1 << import_llsd << - "def parse_each(iterable):\n" - " for item in iterable:\n" - " yield llsd.parse(item)\n" << - pydata << + "from functools import partial\n" + "import struct\n" + "lenformat = struct.Struct('i')\n" + "def parse_each(inf):\n" + " for rawlen in iter(partial(inf.read, lenformat.size), b''):\n" + " len = lenformat.unpack(rawlen)[0]\n" + // Since llsd.parse() has no max_bytes argument, instead of + // passing the input stream directly to parse(), read the item + // into a distinct bytes object and parse that. + " data = inf.read(len)\n" + " try:\n" + " yield llsd.parse(data)\n" + " except llsd.LLSDParseError as err:\n" + " print(f'*** {err}')\n" + " print(f'Bad content:\\n{data!r}')\n" + " raise\n" + << pydata << // Don't forget raw-string syntax for Windows pathnames. "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n"); } @@ -2008,6 +2030,26 @@ namespace tut } |*==========================================================================*/ + // helper for test<8> - test<12> + bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse) + { + // reset the output value for debugging clarity + item.clear(); + // We use an int length prefix as a foolproof delimiter even for + // binary serialized streams. + int length{ 0 }; + istr.read(reinterpret_cast(&length), sizeof(length)); +// return parse(istr, item, length); + // Sadly, as of 2022-12-01 it seems we can't really trust our LLSD + // parsers to honor max_bytes: this test works better when we read + // each item into its own distinct LLMemoryStream, instead of passing + // the original istr with a max_bytes constraint. + std::vector buffer(length); + istr.read(reinterpret_cast(buffer.data()), length); + LLMemoryStream stream(buffer.data(), length); + return parse(stream, item, length); + } + // helper for test<8> - test<12> void fromPythonUsing(const std::string& pyformatter, const ParserFunction& parse= @@ -2022,6 +2064,8 @@ namespace tut python("Python " + pyformatter, placeholders::arg1 << import_llsd << + "import struct\n" + "lenformat = struct.Struct('i')\n" "DATA = [\n" " 17,\n" " 3.14,\n" @@ -2034,29 +2078,33 @@ namespace tut // N.B. Using 'print' implicitly adds newlines. "with open(r'" << file.getName() << "', 'wb') as f:\n" " for item in DATA:\n" - " print(llsd." << pyformatter << "(item), file=f)\n"); + " serialized = llsd." << pyformatter << "(item)\n" + " f.write(lenformat.pack(len(serialized)))\n" + " f.write(serialized)\n"); std::ifstream inf(file.getName().c_str()); LLSD item; - // Notice that we're not doing anything special to parse out the - // newlines: LLSDSerialize::fromNotation ignores them. While it would - // seem they're not strictly necessary, going in this direction, we - // want to ensure that notation-separated-by-newlines works in both - // directions -- since in practice, a given file might be read by - // either language. - ensure("Failed to read LLSD::Integer from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_equals(item.asInteger(), 17); - ensure("Failed to read LLSD::Real from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_approximately_equals("Bad LLSD::Real value from Python", - item.asReal(), 3.14, 7); // 7 bits ~= 0.01 - ensure("Failed to read LLSD::String from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_equals(item.asString(), - "This string\n" - "has several\n" - "lines."); + try + { + ensure("Failed to read LLSD::Integer from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asInteger(), 17); + ensure("Failed to read LLSD::Real from Python", + itemFromStream(inf, item, parse)); + ensure_approximately_equals("Bad LLSD::Real value from Python", + item.asReal(), 3.14, 7); // 7 bits ~= 0.01 + ensure("Failed to read LLSD::String from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asString(), + "This string\n" + "has several\n" + "lines."); + } + catch (const tut::failure& err) + { + std::cout << "for " << err.what() << ", item = " << item << std::endl; + throw; + } } template<> template<> -- cgit v1.2.3 From 761d8337fb70cea9cfb326f06a8b1b35438ed96b Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 2 Dec 2022 17:14:31 -0500 Subject: SL-18330: Test Python llsd.parse() both from bytes and from stream. --- indra/llcommon/tests/llsdserialize_test.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 39b93cf335..6e5d8a26e1 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -1958,6 +1958,7 @@ namespace tut placeholders::arg1 << import_llsd << "from functools import partial\n" + "import io\n" "import struct\n" "lenformat = struct.Struct('i')\n" "def parse_each(inf):\n" @@ -1968,11 +1969,16 @@ namespace tut // into a distinct bytes object and parse that. " data = inf.read(len)\n" " try:\n" - " yield llsd.parse(data)\n" + " frombytes = llsd.parse(data)\n" " except llsd.LLSDParseError as err:\n" " print(f'*** {err}')\n" " print(f'Bad content:\\n{data!r}')\n" " raise\n" + // Also try parsing from a distinct stream. + " stream = io.BytesIO(data)\n" + " fromstream = llsd.parse(stream)\n" + " assert frombytes == fromstream\n" + " yield frombytes\n" << pydata << // Don't forget raw-string syntax for Windows pathnames. "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n"); -- cgit v1.2.3 From 590e158cf36b6e6fca07f54837db084fe8a163c6 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 6 Dec 2022 16:30:35 -0500 Subject: SL-18330: Adapt LLSDSerialize and tests to llssize max_bytes params. --- indra/llcommon/llsdserialize.cpp | 2 +- indra/llcommon/tests/llsdserialize_test.cpp | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 97bf51eeaa..c1b24d8138 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -128,7 +128,7 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) * specified buffer. In the usual case when max_bytes exceeds * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2. */ - str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n'); + str.get(hdr_buf, llmin(max_bytes+1, sizeof(hdr_buf)-1), '\n'); auto inbuf = str.gcount(); // https://en.cppreference.com/w/cpp/io/basic_istream/get // When the get() above sees the specified delimiter '\n', it stops there diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 6e5d8a26e1..29e3007aff 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -63,7 +63,7 @@ using namespace boost::phoenix; #include typedef std::function FormatterFunction; -typedef std::function ParserFunction; +typedef std::function ParserFunction; std::vector string_to_vector(const std::string& str) { @@ -263,7 +263,7 @@ namespace tut }; // this lambda must be mutable since otherwise the bound 'parser' // is assumed to point to a const LLSDParser - mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) mutable + mParser = [parser](std::istream& istr, LLSD& data, llssize max_bytes) mutable { // reset() call is needed since test code re-uses parser object parser->reset(); @@ -271,10 +271,10 @@ namespace tut }; } - void setParser(bool (*parser)(LLSD&, std::istream&, size_t)) + void setParser(bool (*parser)(LLSD&, std::istream&, llssize)) { // why does LLSDSerialize::deserialize() reverse the parse() params?? - mParser = [parser](std::istream& istr, LLSD& data, size_t max_bytes) + mParser = [parser](std::istream& istr, LLSD& data, llssize max_bytes) { return (parser(data, istr, max_bytes) > 0); }; @@ -2059,7 +2059,7 @@ namespace tut // helper for test<8> - test<12> void fromPythonUsing(const std::string& pyformatter, const ParserFunction& parse= - [](std::istream& istr, LLSD& data, size_t max_bytes) + [](std::istream& istr, LLSD& data, llssize max_bytes) { return LLSDSerialize::deserialize(data, istr, max_bytes); }) { // Create an empty data file. This is just a placeholder for our @@ -2140,7 +2140,7 @@ namespace tut set_test_name("from Python XML using fromXML()"); // fromXML()'s optional 3rd param isn't max_bytes, it's emit_errors fromPythonUsing("format_xml", - [](std::istream& istr, LLSD& data, size_t) + [](std::istream& istr, LLSD& data, llssize) { return LLSDSerialize::fromXML(data, istr) > 0; }); } @@ -2149,7 +2149,7 @@ namespace tut { set_test_name("from Python notation using fromNotation()"); fromPythonUsing("format_notation", - [](std::istream& istr, LLSD& data, size_t max_bytes) + [](std::istream& istr, LLSD& data, llssize max_bytes) { return LLSDSerialize::fromNotation(data, istr, max_bytes) > 0; }); } @@ -2161,7 +2161,7 @@ namespace tut // We don't expect this to work because format_binary() emits a // header, but fromBinary() won't recognize a header. fromPythonUsing("format_binary", - [](std::istream& istr, LLSD& data, size_t max_bytes) + [](std::istream& istr, LLSD& data, llssize max_bytes) { return LLSDSerialize::fromBinary(data, istr, max_bytes) > 0; }); } |*==========================================================================*/ -- cgit v1.2.3 From 292bb3991b589d39d61cf721b82fe7bdae460785 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 15 Feb 2023 17:29:48 -0500 Subject: SL-18330: Fix per PR review comments. --- indra/llcommon/llsdserialize.cpp | 6 +++++- indra/llcommon/llstreamtools.cpp | 2 +- indra/llcommon/llstreamtools.h | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 0a5e3652c7..a14a6b5b1b 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -169,9 +169,13 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, llssize max_bytes) /* * Remove the newline chars */ - auto lastchar = header.find_last_not_of("\r\n"); + std::string::size_type lastchar = header.find_last_not_of("\r\n"); if (lastchar != std::string::npos) { + // It's important that find_last_not_of() returns size_type, which is + // why lastchar explicitly declares the type above. erase(size_type) + // erases from that offset to the end of the string, whereas + // erase(iterator) erases only a single character. header.erase(lastchar+1); } diff --git a/indra/llcommon/llstreamtools.cpp b/indra/llcommon/llstreamtools.cpp index 979e96b848..bc32b6fd9e 100644 --- a/indra/llcommon/llstreamtools.cpp +++ b/indra/llcommon/llstreamtools.cpp @@ -519,7 +519,7 @@ int cat_streambuf::underflow() if (gptr() == egptr()) { // here because our buffer is empty - std::streamsize size; + std::streamsize size = 0; // Until we've run out of mInputs, try reading the first of them // into mBuffer. If that fetches some characters, break the loop. while (! mInputs.empty() diff --git a/indra/llcommon/llstreamtools.h b/indra/llcommon/llstreamtools.h index 997f738840..bb7bc20327 100644 --- a/indra/llcommon/llstreamtools.h +++ b/indra/llcommon/llstreamtools.h @@ -135,7 +135,7 @@ public: mBuffer(1024) {} - int underflow(); + int underflow() override; }; #endif -- cgit v1.2.3