From d24c59bb139bbbd23f339952de0b25bfd30e9658 Mon Sep 17 00:00:00 2001
From: Nat Goodspeed <nat@lindenlab.com>
Date: Wed, 25 Mar 2015 13:28:15 -0400
Subject: Add logic to attempt to look up large Windows negative return codes.
 A large negative return code doesn't do a human reader any good, even for
 lookup purposes, because Microsoft's lookup tables list the hex
 representation of that integer. So at least format the return code as hex.
 Going further, we've captured the content of the web page
 https://msdn.microsoft.com/en-us/library/cc704588.aspx as windows-rcs.html.
 If we can parse that file, and if we understand the structure of its table
 entries, and if the hex form of the actual return code is in fact listed
 there, we can display the symbol name and description as well as the hex
 return code. All those maybes are to support refreshing the file from the URL
 above (or wherever it might get moved) from time to time. Later versions of
 that file might change in unexpected ways. If we can't look up the hex rc, oh
 well, just display that to the user instead of crumping.

---
 indra/cmake/run_build_test.py | 161 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 153 insertions(+), 8 deletions(-)

(limited to 'indra/cmake/run_build_test.py')

diff --git a/indra/cmake/run_build_test.py b/indra/cmake/run_build_test.py
index 582185e5ab..a79d09a9ea 100755
--- a/indra/cmake/run_build_test.py
+++ b/indra/cmake/run_build_test.py
@@ -47,6 +47,8 @@ $/LicenseInfo$
 import os
 import sys
 import errno
+import HTMLParser
+import re
 import signal
 import subprocess
 
@@ -148,15 +150,158 @@ def translate_rc(rc):
     if rc >= 0:
         return "terminated with rc %s" % rc
 
-    # Negative rc means the child was terminated by signal -rc.
-    rc = -rc
-    for attr in dir(signal):
-        if attr.startswith('SIG') and getattr(signal, attr) == rc:
-            strc = attr
-            break
+    if sys.platform.startswith("win"):
+        # From http://stackoverflow.com/questions/20629027/process-finished-with-exit-code-1073741571
+        # [-1073741571] is the signed integer representation of Microsoft's
+        # "stack overflow/stack exhaustion" error code 0xC00000FD.
+        # Anytime you see strange, large negative exit codes in windows, convert
+        # them to hex and then look them up in the ntstatus error codes
+        # http://msdn.microsoft.com/en-us/library/cc704588.aspx
+
+        # Python bends over backwards to give you all the integer precision
+        # you need, avoiding truncation. But only with 32-bit signed ints is
+        # -1073741571 equivalent to 0xC00000FD! Explicitly truncate before
+        # converting.
+        hexrc = "0x%X" % (rc & 0xFFFFFFFF)
+        # At this point, we're only trying to format the rc to make it easier
+        # for a human being to understand. Any exception here -- file doesn't
+        # exist, HTML parsing error, unrecognized table structure, unknown key
+        # -- should NOT kill the script! It should only cause us to shrug and
+        # present our caller with the best information available.
+        try:
+            table = get_windows_table()
+            symbol, desc = table[hexrc]
+        except Exception, err:
+            print >>sys.stderr, "(%s -- carrying on)" % err
+            return "terminated with rc %s (%s)" % (rc, hexrc)
+        else:
+            return "terminated with rc %s: %s: %s" % (hexrc, symbol, desc)
+
     else:
-        strc = str(rc)
-    return "terminated by signal %s" % strc
+        # On Posix, negative rc means the child was terminated by signal -rc.
+        rc = -rc
+        for attr in dir(signal):
+            if attr.startswith('SIG') and getattr(signal, attr) == rc:
+                strc = attr
+                break
+        else:
+            strc = str(rc)
+        return "terminated by signal %s" % strc
+
+class TableParser(HTMLParser.HTMLParser):
+    """
+    This HTMLParser subclass is designed to parse the table we know exists
+    in windows-rcs.html, hopefully without building in too much knowledge of
+    the specific way that table is currently formatted.
+    """
+    # regular expression matching any string containing only whitespace
+    whitespace = re.compile(r'\s*$')
+
+    def __init__(self):
+        # Because Python 2.x's HTMLParser is an old-style class, we must use
+        # old-style syntax to forward the __init__() call -- not super().
+        HTMLParser.HTMLParser.__init__(self)
+        # this will collect all the data, eventually
+        self.table = []
+        # Stack whose top (last item) indicates where to append current
+        # element data. When empty, don't collect data at all.
+        self.dest = []
+
+    def handle_starttag(self, tag, attrs):
+        if tag == "table":
+            # This is the outermost tag we recognize. Collect nested elements
+            # within self.table.
+            self.dest.append(self.table)
+        elif tag in ("tr", "td"):
+            # Nested elements whose contents we want to capture as sublists.
+            # To the list currently designated by the top of the dest stack,
+            # append a new empty sublist.
+            self.dest[-1].append([])
+            # Now push THAT new, empty list as the new top of the dest stack.
+            self.dest.append(self.dest[-1][-1])
+        elif tag == "p":
+            # We could handle <p> ... </p> just like <tr> or <td>, but that
+            # introduces an unnecessary extra level of nesting. Just skip.
+            pass
+        else:
+            # For any tag we don't recognize (notably <th>), push a new, empty
+            # list to the top of the dest stack. This new list is NOT
+            # referenced by anything in self.table; thus, when we pop it, any
+            # data we've collected inside that list will be discarded.
+            self.dest.append([])
+
+    def handle_endtag(self, tag):
+        # Because we avoid pushing self.dest for <p> in handle_starttag(), we
+        # must refrain from popping it for </p> here.
+        if tag != "p":
+            # For everything else, including unrecognized tags, pop the dest
+            # stack, reverting to outer collection.
+            self.dest.pop()
+
+    def handle_startendtag(self, tag, attrs):
+        # The table of interest contains <td> entries of the form:
+        # <p>0x00000000<br />STATUS_SUCCESS</p>
+        # The <br/> is very useful -- we definitely want two different data
+        # items for "0x00000000" and "STATUS_SUCCESS" -- but we don't need or
+        # want it to push, then discard, an empty list as it would if we let
+        # the default HTMLParser.handle_startendtag() call handle_starttag()
+        # followed by handle_endtag(). Just ignore <br/> or any other
+        # singleton tag.
+        pass
+
+    def handle_data(self, data):
+        # Outside the <table> of interest, self.dest is empty. Do not bother
+        # collecting data when self.dest is empty.
+        # HTMLParser calls handle_data() with every chunk of whitespace
+        # between tags. That would be lovely if our eventual goal was to
+        # reconstitute the original input stream with its existing formatting,
+        # but for us, whitespace only clutters the table. Ignore it.
+        if self.dest and not self.whitespace.match(data):
+            # Here we're within our <table> and we have non-whitespace data.
+            # Append it to the list designated by the top of the dest stack.
+            self.dest[-1].append(data)
+
+# cache for get_windows_table()
+_windows_table = None
+
+def get_windows_table():
+    global _windows_table
+    # If we already loaded _windows_table, no need to load it all over again.
+    if _windows_table:
+        return _windows_table
+
+    # windows-rcs.html was fetched on 2015-03-24 with the following command:
+    # curl -o windows-rcs.html \
+    #         https://msdn.microsoft.com/en-us/library/cc704588.aspx
+    parser = TableParser()
+    with open(os.path.join(os.path.dirname(__file__), "windows-rcs.html")) as hf:
+        # We tried feeding the file data to TableParser in chunks, to avoid
+        # buffering the entire file as a single string. Unfortunately its
+        # handle_data() cannot tell the difference between distinct calls
+        # separated by HTML tags, and distinct calls necessitated by a chunk
+        # boundary. Sigh! Read in the whole file. At the time this was
+        # written, it was only 500KB anyway.
+        parser.feed(hf.read())
+    parser.close()
+    table = parser.table
+
+    # With our parser, any <tr><th>...</th></tr> row leaves a table entry
+    # consisting only of an empty list. Remove any such.
+    while table and not table[0]:
+        table.pop(0)
+
+    # We expect rows of the form:
+    # [['0x00000000', 'STATUS_SUCCESS'],
+    #  ['The operation completed successfully.']]
+    # The latter list will have multiple entries if Microsoft embedded <br/>
+    # or <p> ... </p> in the text, in which case joining with '\n' is
+    # appropriate.
+    # Turn that into a dict whose key is the hex string, and whose value is
+    # the pair (symbol, desc).
+    _windows_table = dict((key, (symbol, '\n'.join(desc)))
+                          for (key, symbol), desc in table)
+
+    return _windows_table
 
 if __name__ == "__main__":
     from optparse import OptionParser
-- 
cgit v1.2.3