diff options
Diffstat (limited to 'indra/lib')
| -rw-r--r-- | indra/lib/python/indra/ipc/webdav.py | 577 | 
1 files changed, 577 insertions, 0 deletions
| diff --git a/indra/lib/python/indra/ipc/webdav.py b/indra/lib/python/indra/ipc/webdav.py new file mode 100644 index 0000000000..d50b3f2b26 --- /dev/null +++ b/indra/lib/python/indra/ipc/webdav.py @@ -0,0 +1,577 @@ +""" +@file webdav.py +@brief Classes to make manipulation of a webdav store easier. + +Copyright (c) 2006-2007, Linden Research, Inc. +$License$ +""" + +import sys, os, httplib, urlparse +import socket, time +import xml.dom.minidom +import syslog +# import signal + +__revision__ = '0' + +dav_debug = False + + +# def urlsafe_b64decode (enc): +#     return base64.decodestring (enc.replace ('_', '/').replace ('-', '+')) + +# def urlsafe_b64encode (str): +#     return base64.encodestring (str).replace ('+', '-').replace ('/', '_') + + +class DAVError (Exception): +    """ Base class for exceptions in this module. """ +    def __init__ (self, status=0, message='', body='', details=''): +        self.status = status +        self.message = message +        self.body = body +        self.details = details +        Exception.__init__ (self, '%d:%s:%s%s' % (self.status, self.message, +                                                   self.body, self.details)) + +    def print_to_stderr (self): +        """ print_to_stderr docstring """ +        print >> sys.stderr, str (self.status) + ' ' + self.message +        print >> sys.stderr, str (self.details) + + +class Timeout (Exception): +    """ Timeout docstring """ +    def __init__ (self, arg=''): +        Exception.__init__ (self, arg) + + +def alarm_handler (signum, frame): +    """ alarm_handler docstring """ +    raise Timeout ('caught alarm') + + +class WebDAV: +    """ WebDAV docstring """ +    def __init__ (self, url, proxy=None, retries_before_fail=6): +        self.init_url = url +        self.init_proxy = proxy +        self.retries_before_fail = retries_before_fail +        url_parsed = urlparse.urlsplit (url) + +        self.top_path = url_parsed[ 2 ] +        # make sure top_path has a trailing / +        if self.top_path == None or self.top_path == '': +            self.top_path = '/' +        elif len (self.top_path) > 1 and self.top_path[-1:] != '/': +            self.top_path += '/' + +        if dav_debug: +            syslog.syslog ('new WebDAV %s : %s' % (str (url), str (proxy))) + +        if proxy: +            proxy_parsed = urlparse.urlsplit (proxy) +            self.host_header = url_parsed[ 1 ] +            host_and_port = proxy_parsed[ 1 ].split (':') +            self.host = host_and_port[ 0 ] +            if len (host_and_port) > 1: +                self.port = int(host_and_port[ 1 ]) +            else: +                self.port = 80 +        else: # no proxy +            host_and_port = url_parsed[ 1 ].split (':') +            self.host_header = None +            self.host = host_and_port[ 0 ] +            if len (host_and_port) > 1: +                self.port = int(host_and_port[ 1 ]) +            else: +                self.port = 80 + +        self.connection = False +        self.connect () + + +    def log (self, msg, depth=0): +        """ log docstring """ +        if dav_debug and depth == 0: +            host = str (self.init_url) +            if host == 'http://int.tuco.lindenlab.com:80/asset/': +                host = 'tuco' +            if host == 'http://harriet.lindenlab.com/asset-keep/': +                host = 'harriet/asset-keep' +            if host == 'http://harriet.lindenlab.com/asset-flag/': +                host = 'harriet/asset-flag' +            if host == 'http://harriet.lindenlab.com/asset/': +                host = 'harriet/asset' +            if host == 'http://ozzy.lindenlab.com/asset/': +                host = 'ozzy/asset' +            if host == 'http://station11.lindenlab.com:12041/:': +                host = 'station11:12041' +            proxy = str (self.init_proxy) +            if proxy == 'None': +                proxy = '' +            if proxy == 'http://int.tuco.lindenlab.com:3128/': +                proxy = 'tuco' +            syslog.syslog ('WebDAV (%s:%s) %s' % (host, proxy, str (msg))) + + +    def connect (self): +        """ connect docstring """ +        self.log ('connect') +        self.connection = httplib.HTTPConnection (self.host, self.port) + +    def __err (self, response, details): +        """ __err docstring """ +        raise DAVError (response.status, response.reason, response.read (), +                        str (self.init_url) + ':' + \ +                        str (self.init_proxy) + ':' + str (details)) + +    def request (self, method, path, body=None, headers=None, +                 read_all=True, body_hook = None, recurse=0, allow_cache=True): +        """ request docstring """ +        # self.log ('request %s %s' % (method, path)) +        if headers == None: +            headers = {} +        if not allow_cache: +            headers['Pragma'] = 'no-cache' +            headers['cache-control'] = 'no-cache' +        try: +            if method.lower () != 'purge': +                if path.startswith ('/'): +                    path = path[1:] +                if self.host_header: # use proxy +                    headers[ 'host' ] = self.host_header +                    fullpath = 'http://%s%s%s' % (self.host_header, +                                                  self.top_path, path) +                else: # no proxy +                    fullpath = self.top_path + path +            else: +                fullpath = path + +            self.connection.request (method, fullpath, body, headers) +            if body_hook: +                body_hook () + +            # signal.signal (signal.SIGALRM, alarm_handler) +            # try: +            #     signal.alarm (120) +            #     signal.alarm (0) +            # except Timeout, e: +            #     if recurse < 6: +            #         return self.retry_request (method, path, body, headers, +            #                                    read_all, body_hook, recurse) +            #     else: +            #         raise DAVError (0, 'timeout', self.host, +            #                         (method, path, body, headers, recurse)) + +            response = self.connection.getresponse () + +            if read_all: +                while len (response.read (1024)) > 0: +                    pass +            if (response.status == 500 or \ +                response.status == 503 or \ +                response.status == 403) and \ +                recurse < self.retries_before_fail: +                return self.retry_request (method, path, body, headers, +                                           read_all, body_hook, recurse) +            return response +        except (httplib.ResponseNotReady, +                httplib.BadStatusLine, +                socket.error): +            # if the server hangs up on us (keepalive off, broken pipe), +            # we need to reconnect and try again. +            if recurse < self.retries_before_fail: +                return self.retry_request (method, path, body, headers, +                                           read_all, body_hook, recurse) +            raise DAVError (0, 'reconnect failed', self.host, +                            (method, path, body, headers, recurse)) + + +    def retry_request (self, method, path, body, headers, +                       read_all, body_hook, recurse): +        """ retry_request docstring """ +        time.sleep (10.0 * recurse) +        self.connect () +        return self.request (method, path, body, headers, +                             read_all, body_hook, recurse+1) + + + +    def propfind (self, path, body=None, depth=1): +        """ propfind docstring """ +        # self.log ('propfind %s' % path) +        headers = {'Content-Type':'text/xml; charset="utf-8"', +                   'Depth':str(depth)} +        response = self.request ('PROPFIND', path, body, headers, False) +        if response.status == 207: +            return response # Multi-Status +        self.__err (response, ('PROPFIND', path, body, headers, 0)) + + +    def purge (self, path): +        """ issue a squid purge command """ +        headers = {'Accept':'*/*'} +        response = self.request ('PURGE', path, None, headers) +        if response.status == 200 or response.status == 404: +            # 200 if it was purge, 404 if it wasn't there. +            return response +        self.__err (response, ('PURGE', path, None, headers)) + + +    def get_file_size (self, path): +        """ +        Use propfind to ask a webdav server what the size of +        a file is.  If used on a directory (collection) return 0 +        """ +        self.log ('get_file_size %s' % path) +        # "getcontentlength" property +        # 8.1.1 Example - Retrieving Named Properties +        # http://docs.python.org/lib/module-xml.dom.html +        nsurl = 'http://apache.org/dav/props/' +        doc = xml.dom.minidom.Document () +        propfind_element = doc.createElementNS (nsurl, "D:propfind") +        propfind_element.setAttributeNS (nsurl, 'xmlns:D', 'DAV:') +        doc.appendChild (propfind_element) +        prop_element = doc.createElementNS (nsurl, "D:prop") +        propfind_element.appendChild (prop_element) +        con_len_element = doc.createElementNS (nsurl, "D:getcontentlength") +        prop_element.appendChild (con_len_element) + +        response = self.propfind (path, doc.toxml ()) +        doc.unlink () + +        resp_doc = xml.dom.minidom.parseString (response.read ()) +        cln = resp_doc.getElementsByTagNameNS ('DAV:','getcontentlength')[ 0 ] +        try: +            content_length = int (cln.childNodes[ 0 ].nodeValue) +        except IndexError: +            return 0 +        resp_doc.unlink () +        return content_length + + +    def file_exists (self, path): +        """ +        do an http head on the given file.  return True if it succeeds +        """ +        self.log ('file_exists %s' % path) +        expect_gzip = path.endswith ('.gz') +        response = self.request ('HEAD', path) +        got_gzip = response.getheader ('Content-Encoding', '').strip () +        if got_gzip.lower () == 'x-gzip' and expect_gzip == False: +            # the asset server fakes us out if we ask for the non-gzipped +            # version of an asset, but the server has the gzipped version. +            return False +        return response.status == 200 + + +    def mkdir (self, path): +        """ mkdir docstring """ +        self.log ('mkdir %s' % path) +        headers = {} +        response = self.request ('MKCOL', path, None, headers) +        if response.status == 201: +            return # success +        if response.status == 405: +            return # directory already existed? +        self.__err (response, ('MKCOL', path, None, headers, 0)) + + +    def delete (self, path): +        """ delete docstring """ +        self.log ('delete %s' % path) +        headers = {'Depth':'infinity'} # collections require infinity +        response = self.request ('DELETE', path, None, headers) +        if response.status == 204: +            return # no content +        if response.status == 404: +            return # hmm +        self.__err (response, ('DELETE', path, None, headers, 0)) + + +    def list_directory (self, path, dir_filter=None, allow_cache=True, +                        minimum_cache_time=False): +        """ +        Request an http directory listing and parse the filenames out of lines +        like: '<LI><A HREF="X"> X</A>'. If a filter function is provided, +        only return filenames that the filter returns True for. + +        This is sort of grody, but it seems faster than other ways of getting +        this information from an isilon. +        """ +        self.log ('list_directory %s' % path) + +        def try_match (lline, before, after): +            """ try_match docstring """ +            try: +                blen = len (before) +                asset_start_index = lline.index (before) +                asset_end_index = lline.index (after, asset_start_index + blen) +                asset = line[ asset_start_index + blen : asset_end_index ] + +                if not dir_filter or dir_filter (asset): +                    return [ asset ] +                return [] +            except ValueError: +                return [] + +        if len (path) > 0 and path[-1:] != '/': +            path += '/' + +        response = self.request ('GET', path, None, {}, False, +                                 allow_cache=allow_cache) + +        if allow_cache and minimum_cache_time: # XXX +            print response.getheader ('Date') +            # s = "2005-12-06T12:13:14" +            # from datetime import datetime +            # from time import strptime +            # datetime(*strptime(s, "%Y-%m-%dT%H:%M:%S")[0:6]) +            # datetime.datetime(2005, 12, 6, 12, 13, 14) + +        if response.status != 200: +            self.__err (response, ('GET', path, None, {}, 0)) +        assets = [] +        for line in response.read ().split ('\n'): +            lline = line.lower () +            if lline.find ("parent directory") == -1: +                # isilon file +                assets += try_match (lline, '<li><a href="', '"> ') +                # apache dir +                assets += try_match (lline, 'alt="[dir]"> <a href="', '/">') +                # apache file +                assets += try_match (lline, 'alt="[   ]"> <a href="', '">') +        return assets + + +    def __tmp_filename (self, path_and_file): +        """ __tmp_filename docstring """ +        head, tail = os.path.split (path_and_file) +        if head != '': +            return head + '/.' + tail + '.' + str (os.getpid ()) +        else: +            return head + '.' + tail + '.' + str (os.getpid ()) + + +    def __put__ (self, filesize, body_hook, remotefile): +        """ __put__ docstring """ +        headers = {'Content-Length' : str (filesize)} +        remotefile_tmp = self.__tmp_filename (remotefile) +        response = self.request ('PUT', remotefile_tmp, None, +                                 headers, True, body_hook) +        if not response.status in (201, 204): # created, no content +            self.__err (response, ('PUT', remotefile, None, headers, 0)) +        if filesize != self.get_file_size (remotefile_tmp): +            try: +                self.delete (remotefile_tmp) +            except: +                pass +            raise DAVError (0, 'tmp upload error', remotefile_tmp) +        # move the file to its final location +        try: +            self.rename (remotefile_tmp, remotefile) +        except DAVError, exc: +            if exc.status == 403: # try to clean up the tmp file +                try: +                    self.delete (remotefile_tmp) +                except: +                    pass +            raise +        if filesize != self.get_file_size (remotefile): +            raise DAVError (0, 'file upload error', str (remotefile_tmp)) + + +    def put_string (self, strng, remotefile): +        """ put_string docstring """ +        self.log ('put_string %d -> %s' % (len (strng), remotefile)) +        filesize = len (strng) +        def body_hook (): +            """ body_hook docstring """ +            self.connection.send (strng) +        self.__put__ (filesize, body_hook, remotefile) + + +    def put_file (self, localfile, remotefile): +        """ +        Send a local file to a remote webdav store.  First, upload to +        a temporary filename.  Next make sure the file is the size we +        expected.  Next, move the file to its final location.  Next, +        check the file size at the final location. +        """ +        self.log ('put_file %s -> %s' % (localfile, remotefile)) +        filesize = os.path.getsize (localfile) +        def body_hook (): +            """ body_hook docstring """ +            handle = open (localfile) +            while True: +                data = handle.read (1300) +                if len (data) == 0: +                    break +                self.connection.send (data) +            handle.close () +        self.__put__ (filesize, body_hook, remotefile) + + +    def create_empty_file (self, remotefile): +        """ create an empty file """ +        self.log ('touch_file %s' % (remotefile)) +        headers = {'Content-Length' : '0'} +        response = self.request ('PUT', remotefile, None, headers) +        if not response.status in (201, 204): # created, no content +            self.__err (response, ('PUT', remotefile, None, headers, 0)) +        if self.get_file_size (remotefile) != 0: +            raise DAVError (0, 'file upload error', str (remotefile)) + + +    def __get_file_setup (self, remotefile, check_size=True): +        """ __get_file_setup docstring """ +        if check_size: +            remotesize = self.get_file_size (remotefile) +        response = self.request ('GET', remotefile, None, {}, False) +        if response.status != 200: +            self.__err (response, ('GET', remotefile, None, {}, 0)) +        try: +            content_length = int (response.getheader ("Content-Length")) +        except TypeError: +            content_length = None +        if check_size: +            if content_length != remotesize: +                raise DAVError (0, 'file DL size error', remotefile) +        return (response, content_length) + + +    def __get_file_read (self, writehandle, response, content_length): +        """ __get_file_read docstring """ +        if content_length != None: +            so_far_length = 0 +            while so_far_length < content_length: +                data = response.read (content_length - so_far_length) +                if len (data) == 0: +                    raise DAVError (0, 'short file download') +                so_far_length += len (data) +                writehandle.write (data) +            while len (response.read ()) > 0: +                pass +        else: +            while True: +                data = response.read () +                if (len (data) < 1): +                    break +                writehandle.write (data) + + +    def get_file (self, remotefile, localfile, check_size=True): +        """ +        Get a remote file from a webdav server.  Download to a local +        tmp file, then move into place.  Sanity check file sizes as +        we go. +        """ +        self.log ('get_file %s -> %s' % (remotefile, localfile)) +        (response, content_length) = \ +                   self.__get_file_setup (remotefile, check_size) +        localfile_tmp = self.__tmp_filename (localfile) +        handle = open (localfile_tmp, 'w') +        self.__get_file_read (handle, response, content_length) +        handle.close () +        if check_size: +            if content_length != os.path.getsize (localfile_tmp): +                raise DAVError (0, 'file DL size error', +                                remotefile+','+localfile) +        os.rename (localfile_tmp, localfile) + + +    def get_file_as_string (self, remotefile, check_size=True): +        """ +        download a file from a webdav server and return it as a string. +        """ +        self.log ('get_file_as_string %s' % remotefile) +        (response, content_length) = \ +                   self.__get_file_setup (remotefile, check_size) +        # (tmp_handle, tmp_filename) = tempfile.mkstemp () +        tmp_handle = os.tmpfile () +        self.__get_file_read (tmp_handle, response, content_length) +        tmp_handle.seek (0) +        ret = tmp_handle.read () +        tmp_handle.close () +        # os.unlink (tmp_filename) +        return ret + + +    def get_post_as_string (self, remotefile, body): +        """ +        Do an http POST, send body, get response and return it. +        """ +        self.log ('get_post_as_string %s' % remotefile) +        # headers = {'Content-Type':'application/x-www-form-urlencoded'} +        headers = {'Content-Type':'text/xml; charset="utf-8"'} +        # b64body = urlsafe_b64encode (asset_url) +        response = self.request ('POST', remotefile, body, headers, False) +        if response.status != 200: +            self.__err (response, ('POST', remotefile, body, headers, 0)) +        try: +            content_length = int (response.getheader ('Content-Length')) +        except TypeError: +            content_length = None +        tmp_handle = os.tmpfile () +        self.__get_file_read (tmp_handle, response, content_length) +        tmp_handle.seek (0) +        ret = tmp_handle.read () +        tmp_handle.close () +        return ret + + +    def __destination_command (self, verb, remotesrc, dstdav, remotedst): +        """ +        self and dstdav should point to the same http server. +        """ +        if len (remotedst) > 0 and remotedst[ 0 ] == '/': +            remotedst = remotedst[1:] +        headers = {'Destination': 'http://%s:%d%s%s' % (dstdav.host, +                                                        dstdav.port, +                                                        dstdav.top_path, +                                                        remotedst)} +        response = self.request (verb, remotesrc, None, headers) +        if response.status == 201: +            return # created +        if response.status == 204: +            return # no content +        self.__err (response, (verb, remotesrc, None, headers, 0)) + + +    def rename (self, remotesrc, remotedst): +        """ rename a file on a webdav server """ +        self.log ('rename %s -> %s' % (remotesrc, remotedst)) +        self.__destination_command ('MOVE', remotesrc, self, remotedst) +    def xrename (self, remotesrc, dstdav, remotedst): +        """ rename a file on a webdav server """ +        self.log ('xrename %s -> %s' % (remotesrc, remotedst)) +        self.__destination_command ('MOVE', remotesrc, dstdav, remotedst) + + +    def copy (self, remotesrc, remotedst): +        """ copy a file on a webdav server """ +        self.log ('copy %s -> %s' % (remotesrc, remotedst)) +        self.__destination_command ('COPY', remotesrc, self, remotedst) +    def xcopy (self, remotesrc, dstdav, remotedst): +        """ copy a file on a webdav server """ +        self.log ('xcopy %s -> %s' % (remotesrc, remotedst)) +        self.__destination_command ('COPY', remotesrc, dstdav, remotedst) + + +def put_string (data, url): +    """ +    upload string s to a url +    """ +    url_parsed = urlparse.urlsplit (url) +    dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ])) +    dav.put_string (data, url_parsed[ 2 ]) + + +def get_string (url, check_size=True): +    """ +    return the contents of a url as a string +    """ +    url_parsed = urlparse.urlsplit (url) +    dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ])) +    return dav.get_file_as_string (url_parsed[ 2 ], check_size) | 
