diff options
Diffstat (limited to 'indra')
| -rw-r--r-- | indra/llcommon/llleap.cpp | 9 | ||||
| -rw-r--r-- | indra/llcommon/llsdserialize.cpp | 49 | ||||
| -rw-r--r-- | indra/llcommon/tests/llleap_test.cpp | 32 | ||||
| -rw-r--r-- | indra/llcommon/tests/llsdserialize_test.cpp | 118 | 
4 files changed, 147 insertions, 61 deletions
| diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp index f71eaf92c4..e8ccc4300d 100644 --- a/indra/llcommon/llleap.cpp +++ b/indra/llcommon/llleap.cpp @@ -317,8 +317,17 @@ public:              LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got "                                  << childout.size() << ", parsing LLSD" << LL_ENDL;              LLSD data; +#if 1 +            // specifically require notation LLSD from child +            LLPointer<LLSDParser> parser(new LLSDNotationParser()); +            S32 parse_status(parser->parse(childout.get_istream(), data, mExpect)); +            if (parse_status == LLSDParser::PARSE_FAILURE) +#else +            // SL-18330: accept any valid LLSD serialization format from child +            // Unfortunately this runs into trouble we have not yet debugged.              bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect));              if (! parse_status) +#endif              {                  bad_protocol("unparseable LLSD data");              } diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index dc8f8f5737..97bf51eeaa 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -123,12 +123,26 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)  	bool fail_if_not_legacy = false;  	/* -	 * Get the first line before anything. +	 * Get the first line before anything. Don't read more than max_bytes: +	 * this get() overload reads no more than (count-1) bytes into the +	 * specified buffer. In the usual case when max_bytes exceeds +	 * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2.  	 */ -	// Remember str's original input position: if there's no header, we'll -	// want to back up and retry. -	str.get(hdr_buf, sizeof(hdr_buf), '\n'); +	str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n');  	auto inbuf = str.gcount(); +	// https://en.cppreference.com/w/cpp/io/basic_istream/get +	// When the get() above sees the specified delimiter '\n', it stops there +	// without pulling it from the stream. If it turns out that the stream +	// does NOT contain a header, and the content includes meaningful '\n', +	// it's important to pull that into hdr_buf too. +	if (inbuf < max_bytes && str.get(hdr_buf[inbuf])) +	{ +		// got the delimiting '\n' +		++inbuf; +		// None of the following requires that hdr_buf contain a final '\0' +		// byte. We could store one if needed, since even the incremented +		// inbuf won't exceed sizeof(hdr_buf)-1, but there's no need. +	}  	std::string header{ hdr_buf, inbuf };  	if (str.fail())  	{ @@ -175,17 +189,17 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)  	/*  	 * Create the parser as appropriate  	 */ -	if (header == LLSD_BINARY_HEADER) +	if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER))  	{ -		return (parse_using<LLSDBinaryParser>(str, sd, max_bytes) > 0); +		return (parse_using<LLSDBinaryParser>(str, sd, max_bytes-inbuf) > 0);  	} -	else if (header == LLSD_XML_HEADER) +	else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER))  	{ -		return (parse_using<LLSDXMLParser>(str, sd, max_bytes) > 0); +		return (parse_using<LLSDXMLParser>(str, sd, max_bytes-inbuf) > 0);  	} -	else if (header == LLSD_NOTATION_HEADER) +	else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER))  	{ -		return (parse_using<LLSDNotationParser>(str, sd, max_bytes) > 0); +		return (parse_using<LLSDNotationParser>(str, sd, max_bytes-inbuf) > 0);  	}  	else // no header we recognize  	{ @@ -207,7 +221,22 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)  		LLMemoryStreamBuf already(reinterpret_cast<const U8*>(hdr_buf), inbuf);  		cat_streambuf prebuff(&already, str.rdbuf());  		std::istream  prepend(&prebuff); +#if 1  		return (p->parse(prepend, sd, max_bytes) > 0); +#else +		// debugging the reconstituted 'prepend' stream +		// allocate a buffer that we hope is big enough for the whole thing +		std::vector<char> wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes); +		prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size())); +		LLMemoryStream replay(reinterpret_cast<const U8*>(wholemsg.data()), prepend.gcount()); +		auto success{ p->parse(replay, sd, prepend.gcount()) > 0 }; +		{ +			LL_DEBUGS() << (success? "parsed: $$" : "failed: '") +						<< std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$" +						<< LL_ENDL; +		} +		return success; +#endif  	}  } diff --git a/indra/llcommon/tests/llleap_test.cpp b/indra/llcommon/tests/llleap_test.cpp index 9754353ab0..6c799b7993 100644 --- a/indra/llcommon/tests/llleap_test.cpp +++ b/indra/llcommon/tests/llleap_test.cpp @@ -110,12 +110,12 @@ namespace tut                     "import os\n"                     "import sys\n"                     "\n" -                   // Don't forget that this Python script is written to some -                   // temp directory somewhere! Its __file__ is useless in -                   // finding indra/lib/python. Use our __FILE__, with -                   // raw-string syntax to deal with Windows pathnames. -                   "mydir = os.path.dirname(r'" << __FILE__ << "')\n" -                   "from llbase import llsd\n" +                   "try:\n" +                   // new freestanding llsd package +                   "    import llsd\n" +                   "except ImportError:\n" +                   // older llbase.llsd module +                   "    from llbase import llsd\n"                     "\n"                     "class ProtocolError(Exception):\n"                     "    def __init__(self, msg, data):\n" @@ -126,26 +126,26 @@ namespace tut                     "    pass\n"                     "\n"                     "def get():\n" -                   "    hdr = ''\n" -                   "    while ':' not in hdr and len(hdr) < 20:\n" -                   "        hdr += sys.stdin.read(1)\n" +                   "    hdr = []\n" +                   "    while b':' not in hdr and len(hdr) < 20:\n" +                   "        hdr.append(sys.stdin.buffer.read(1))\n"                     "        if not hdr:\n"                     "            sys.exit(0)\n" -                   "    if not hdr.endswith(':'):\n" +                   "    if not hdr[-1] == b':':\n"                     "        raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n"                     "    try:\n" -                   "        length = int(hdr[:-1])\n" +                   "        length = int(b''.join(hdr[:-1]))\n"                     "    except ValueError:\n"                     "        raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n"                     "    parts = []\n"                     "    received = 0\n"                     "    while received < length:\n" -                   "        parts.append(sys.stdin.read(length - received))\n" +                   "        parts.append(sys.stdin.buffer.read(length - received))\n"                     "        received += len(parts[-1])\n" -                   "    data = ''.join(parts)\n" +                   "    data = b''.join(parts)\n"                     "    assert len(data) == length\n"                     "    try:\n" -                   "        return llsd.parse(data.encode())\n" +                   "        return llsd.parse(data)\n"                     //   Seems the old indra.base.llsd module didn't properly                     //   convert IndexError (from running off end of string) to                     //   LLSDParseError. @@ -185,11 +185,11 @@ namespace tut                     "    return _reply\n"                     "\n"                     "def put(req):\n" -                   "    sys.stdout.write(':'.join((str(len(req)), req)))\n" +                   "    sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n"                     "    sys.stdout.flush()\n"                     "\n"                     "def send(pump, data):\n" -                   "    put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n" +                   "    put(llsd.format_notation(dict(pump=pump, data=data)))\n"                     "\n"                     "def request(pump, data):\n"                     "    # we expect 'data' is a dict\n" diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 601f2c580d..39b93cf335 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -51,10 +51,11 @@ typedef U32 uint32_t;  #include "boost/phoenix/core/argument.hpp"  using namespace boost::phoenix; -#include "../llsd.h" -#include "../llsdserialize.h" +#include "llsd.h" +#include "llsdserialize.h"  #include "llsdutil.h" -#include "../llformat.h" +#include "llformat.h" +#include "llmemorystream.h"  #include "../test/lltut.h"  #include "../test/namedtempfile.h" @@ -1898,14 +1899,22 @@ namespace tut      static void writeLLSDArray(const FormatterFunction& serialize,                                 std::ostream& out, const LLSD& array)      { -        BOOST_FOREACH(LLSD item, llsd::inArray(array)) +        for (const LLSD& item : llsd::inArray(array))          { -            serialize(item, out); -            // It's important to separate with newlines because Python's llsd -            // module doesn't support parsing from a file stream, only from a -            // string, so we have to know how much of the file to read into a -            // string. -            out << '\n'; +            // It's important to delimit the entries in this file somehow +            // because, although Python's llsd.parse() can accept a file +            // stream, the XML parser expects EOF after a single outer element +            // -- it doesn't just stop. So we must extract a sequence of bytes +            // strings from the file. But since one of the serialization +            // formats we want to test is binary, we can't pick any single +            // byte value as a delimiter! Use a binary integer length prefix +            // instead. +            std::ostringstream buffer; +            serialize(item, buffer); +            auto buffstr{ buffer.str() }; +            int bufflen{ static_cast<int>(buffstr.length()) }; +            out.write(reinterpret_cast<const char*>(&bufflen), sizeof(bufflen)); +            out.write(buffstr.c_str(), buffstr.length());          }      } @@ -1932,7 +1941,7 @@ namespace tut              "    except StopIteration:\n"              "        pass\n"              "    else:\n" -            "        assert False, 'Too many data items'\n"; +            "        raise AssertionError('Too many data items')\n";          // Create an llsdXXXXXX file containing 'data' serialized to          // notation. @@ -1948,10 +1957,23 @@ namespace tut          python("read C++ " + desc,                 placeholders::arg1 <<                 import_llsd << -               "def parse_each(iterable):\n" -               "    for item in iterable:\n" -               "        yield llsd.parse(item)\n" << -               pydata << +               "from functools import partial\n" +               "import struct\n" +               "lenformat = struct.Struct('i')\n" +               "def parse_each(inf):\n" +               "    for rawlen in iter(partial(inf.read, lenformat.size), b''):\n" +               "        len = lenformat.unpack(rawlen)[0]\n" +               // Since llsd.parse() has no max_bytes argument, instead of +               // passing the input stream directly to parse(), read the item +               // into a distinct bytes object and parse that. +               "        data = inf.read(len)\n" +               "        try:\n" +               "            yield llsd.parse(data)\n" +               "        except llsd.LLSDParseError as err:\n" +               "            print(f'*** {err}')\n" +               "            print(f'Bad content:\\n{data!r}')\n" +               "            raise\n" +               << pydata <<                 // Don't forget raw-string syntax for Windows pathnames.                 "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n");      } @@ -2009,6 +2031,26 @@ namespace tut  |*==========================================================================*/      // helper for test<8> - test<12> +    bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse) +    { +        // reset the output value for debugging clarity +        item.clear(); +        // We use an int length prefix as a foolproof delimiter even for +        // binary serialized streams. +        int length{ 0 }; +        istr.read(reinterpret_cast<char*>(&length), sizeof(length)); +//      return parse(istr, item, length); +        // Sadly, as of 2022-12-01 it seems we can't really trust our LLSD +        // parsers to honor max_bytes: this test works better when we read +        // each item into its own distinct LLMemoryStream, instead of passing +        // the original istr with a max_bytes constraint. +        std::vector<U8> buffer(length); +        istr.read(reinterpret_cast<char*>(buffer.data()), length); +        LLMemoryStream stream(buffer.data(), length); +        return parse(stream, item, length); +    } + +    // helper for test<8> - test<12>      void fromPythonUsing(const std::string& pyformatter,                           const ParserFunction& parse=                           [](std::istream& istr, LLSD& data, size_t max_bytes) @@ -2022,6 +2064,8 @@ namespace tut          python("Python " + pyformatter,                 placeholders::arg1 <<                 import_llsd << +               "import struct\n" +               "lenformat = struct.Struct('i')\n"                 "DATA = [\n"                 "    17,\n"                 "    3.14,\n" @@ -2034,29 +2078,33 @@ namespace tut                 // N.B. Using 'print' implicitly adds newlines.                 "with open(r'" << file.getName() << "', 'wb') as f:\n"                 "    for item in DATA:\n" -               "        print(llsd." << pyformatter << "(item), file=f)\n"); +               "        serialized = llsd." << pyformatter << "(item)\n" +               "        f.write(lenformat.pack(len(serialized)))\n" +               "        f.write(serialized)\n");          std::ifstream inf(file.getName().c_str());          LLSD item; -        // Notice that we're not doing anything special to parse out the -        // newlines: LLSDSerialize::fromNotation ignores them. While it would -        // seem they're not strictly necessary, going in this direction, we -        // want to ensure that notation-separated-by-newlines works in both -        // directions -- since in practice, a given file might be read by -        // either language. -        ensure("Failed to read LLSD::Integer from Python", -               parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); -        ensure_equals(item.asInteger(), 17); -        ensure("Failed to read LLSD::Real from Python", -               parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); -        ensure_approximately_equals("Bad LLSD::Real value from Python", -                                    item.asReal(), 3.14, 7); // 7 bits ~= 0.01 -        ensure("Failed to read LLSD::String from Python", -               parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); -        ensure_equals(item.asString(),  -                      "This string\n" -                      "has several\n" -                      "lines."); +        try +        { +            ensure("Failed to read LLSD::Integer from Python", +                   itemFromStream(inf, item, parse)); +            ensure_equals(item.asInteger(), 17); +            ensure("Failed to read LLSD::Real from Python", +                   itemFromStream(inf, item, parse)); +            ensure_approximately_equals("Bad LLSD::Real value from Python", +                                        item.asReal(), 3.14, 7); // 7 bits ~= 0.01 +            ensure("Failed to read LLSD::String from Python", +                   itemFromStream(inf, item, parse)); +            ensure_equals(item.asString(),  +                          "This string\n" +                          "has several\n" +                          "lines."); +        } +        catch (const tut::failure& err) +        { +            std::cout << "for " << err.what() << ", item = " << item << std::endl; +            throw; +        }      }      template<> template<> | 
