diff options
author | Christian Goetze <cg@lindenlab.com> | 2007-12-01 01:29:28 +0000 |
---|---|---|
committer | Christian Goetze <cg@lindenlab.com> | 2007-12-01 01:29:28 +0000 |
commit | 7b7dd4e6f61aa731ef7f9f255014f5b76d5b9fec (patch) | |
tree | 08bf4cb9354f975940b83cd695a2aa8bb8d74870 /indra/lib/python | |
parent | 5c53c187fff573ef73a643949b3bded1084b0983 (diff) |
svn merge -r74808:74832 svn+ssh://svn/svn/linden/branches/qa-dpo-9a
Diffstat (limited to 'indra/lib/python')
-rw-r--r-- | indra/lib/python/indra/base/lluuid.py | 4 | ||||
-rw-r--r-- | indra/lib/python/indra/ipc/mysql_pool.py | 2 | ||||
-rw-r--r-- | indra/lib/python/indra/util/named_query.py | 387 |
3 files changed, 346 insertions, 47 deletions
diff --git a/indra/lib/python/indra/base/lluuid.py b/indra/lib/python/indra/base/lluuid.py index eb0c557b04..609c8cc261 100644 --- a/indra/lib/python/indra/base/lluuid.py +++ b/indra/lib/python/indra/base/lluuid.py @@ -212,6 +212,10 @@ class UUID(object): _int2binstr(v3,4) + \ _int2binstr(v4,4) + +# module-level null constant +NULL = UUID() + def printTranslatedMemory(four_hex_uints): """ We expect to get the string as four hex units. eg: diff --git a/indra/lib/python/indra/ipc/mysql_pool.py b/indra/lib/python/indra/ipc/mysql_pool.py index 4a265a1f2e..6288a4c732 100644 --- a/indra/lib/python/indra/ipc/mysql_pool.py +++ b/indra/lib/python/indra/ipc/mysql_pool.py @@ -97,7 +97,7 @@ class ConnectionPool(Pool): except (AttributeError, DeadProcess), e: conn = self.create() # TODO figure out if we're still connected to the database - if conn: + if conn is not None: Pool.put(self, conn) else: self.current_size -= 1 diff --git a/indra/lib/python/indra/util/named_query.py b/indra/lib/python/indra/util/named_query.py index 019eb6306b..c3bdd046fd 100644 --- a/indra/lib/python/indra/util/named_query.py +++ b/indra/lib/python/indra/util/named_query.py @@ -28,20 +28,33 @@ THE SOFTWARE. $/LicenseInfo$ """ +import errno import MySQLdb +import MySQLdb.cursors import os import os.path +import re import time +#import sys # *TODO: remove. only used in testing. +#import pprint # *TODO: remove. only used in testing. + +try: + set = set +except NameError: + from sets import Set as set + from indra.base import llsd from indra.base import config -from indra.ipc import russ _g_named_manager = None -# this function is entirely intended for testing purposes, -# because it's tricky to control the config from inside a test def _init_g_named_manager(sql_dir = None): + """Initializes a global NamedManager object to point at a + specified named queries hierarchy. + + This function is intended entirely for testing purposes, + because it's tricky to control the config from inside a test.""" if sql_dir is None: sql_dir = config.get('named-query-base-dir') global _g_named_manager @@ -49,14 +62,14 @@ def _init_g_named_manager(sql_dir = None): os.path.abspath(os.path.realpath(sql_dir))) def get(name): - "@brief get the named query object to be used to perform queries" + "Get the named query object to be used to perform queries" if _g_named_manager is None: _init_g_named_manager() return _g_named_manager.get(name) -def sql(name, params): +def sql(connection, name, params): # use module-global NamedQuery object to perform default substitution - return get(name).sql(params) + return get(name).sql(connection, params) def run(connection, name, params, expect_rows = None): """\ @@ -72,66 +85,243 @@ Note that this function will fetch ALL rows. return get(name).run(connection, params, expect_rows) class ExpectationFailed(Exception): + """ Exception that is raised when an expectation for an sql query + is not met.""" def __init__(self, message): + Exception.__init__(self, message) self.message = message class NamedQuery(object): def __init__(self, name, filename): - self._stat_interval = 5000 # 5 seconds + """ Construct a NamedQuery object. The name argument is an + arbitrary name as a handle for the query, and the filename is + a path to a file containing an llsd named query document.""" + self._stat_interval_seconds = 5 # 5 seconds self._name = name self._location = filename + self._alternative = dict() + self._last_mod_time = 0 + self._last_check_time = 0 + self.deleted = False self.load_contents() def name(self): + """ The name of the query. """ return self._name def get_modtime(self): - return os.path.getmtime(self._location) + """ Returns the mtime (last modified time) of the named query + file, if such exists.""" + if self._location: + return os.path.getmtime(self._location) + return 0 def load_contents(self): - self._contents = llsd.parse(open(self._location).read()) + """ Loads and parses the named query file into self. Does + nothing if self.location is nonexistant.""" + if self._location: + self._reference_contents(llsd.parse(open(self._location).read())) + # Check for alternative implementations + try: + for name, alt in self._contents['alternative'].items(): + nq = NamedQuery(name, None) + nq._reference_contents(alt) + self._alternative[name] = nq + except KeyError, e: + pass + self._last_mod_time = self.get_modtime() + self._last_check_time = time.time() + + def _reference_contents(self, contents): + "Helper method which builds internal structure from parsed contents" + self._contents = contents self._ttl = int(self._contents.get('ttl', 0)) self._return_as_map = bool(self._contents.get('return_as_map', False)) self._legacy_dbname = self._contents.get('legacy_dbname', None) - self._legacy_query = self._contents.get('legacy_query', None) - self._options = self._contents.get('options', {}) - self._base_query = self._contents['base_query'] - self._last_mod_time = self.get_modtime() - self._last_check_time = time.time() + # reset these before doing the sql conversion because we will + # read them there. reset these while loading so we pick up + # changes. + self._around = set() + self._append = set() + self._integer = set() + self._options = self._contents.get('dynamic_where', {}) + for key in self._options: + if isinstance(self._options[key], basestring): + self._options[key] = self._convert_sql(self._options[key]) + elif isinstance(self._options[key], list): + lines = [] + for line in self._options[key]: + lines.append(self._convert_sql(line)) + self._options[key] = lines + else: + moreopt = {} + for kk in self._options[key]: + moreopt[kk] = self._convert_sql(self._options[key][kk]) + self._options[key] = moreopt + self._base_query = self._convert_sql(self._contents['base_query']) + self._query_suffix = self._convert_sql( + self._contents.get('query_suffix', '')) + + def _convert_sql(self, sql): + """convert the parsed sql into a useful internal structure. + + This function has to turn the named query format into a pyformat + style. It also has to look for %:name% and :name% and + ready them for use in LIKE statements""" + if sql: + #print >>sys.stderr, "sql:",sql + expr = re.compile("(%?):([a-zA-Z][a-zA-Z0-9_-]*)%") + sql = expr.sub(self._prepare_like, sql) + expr = re.compile("#:([a-zA-Z][a-zA-Z0-9_-]*)") + sql = expr.sub(self._prepare_integer, sql) + expr = re.compile(":([a-zA-Z][a-zA-Z0-9_-]*)") + sql = expr.sub("%(\\1)s", sql) + return sql + + def _prepare_like(self, match): + """This function changes LIKE statement replace behavior + + It works by turning %:name% to %(_name_around)s and :name% to + %(_name_append)s. Since a leading '_' is not a valid keyname + input (enforced via unit tests), it will never clash with + existing keys. Then, when building the statement, the query + runner will generate corrected strings.""" + if match.group(1) == '%': + # there is a leading % so this is treated as prefix/suffix + self._around.add(match.group(2)) + return "%(" + self._build_around_key(match.group(2)) + ")s" + else: + # there is no leading %, so this is suffix only + self._append.add(match.group(2)) + return "%(" + self._build_append_key(match.group(2)) + ")s" + + def _build_around_key(self, key): + return "_" + key + "_around" + + def _build_append_key(self, key): + return "_" + key + "_append" + + def _prepare_integer(self, match): + """This function adjusts the sql for #:name replacements + + It works by turning #:name to %(_name_as_integer)s. Since a + leading '_' is not a valid keyname input (enforced via unit + tests), it will never clash with existing keys. Then, when + building the statement, the query runner will generate + corrected strings.""" + self._integer.add(match.group(1)) + return "%(" + self._build_integer_key(match.group(1)) + ")s" + + def _build_integer_key(self, key): + return "_" + key + "_as_integer" + + def _strip_wildcards_to_list(self, value): + """Take string, and strip out the LIKE special characters. + + Technically, this is database dependant, but postgresql and + mysql use the same wildcards, and I am not aware of a general + way to handle this. I think you need a sql statement of the + form: + + LIKE_STRING( [ANY,ONE,str]... ) + + which would treat ANY as their any string, and ONE as their + single glyph, and str as something that needs database + specific encoding to not allow any % or _ to affect the query. + + As it stands, I believe it's impossible to write a named query + style interface which uses like to search the entire space of + text available. Imagine the query: + + % of brain used by average linden + + In order to search for %, it must be escaped, so once you have + escaped the string to not do wildcard searches, and be escaped + for the database, and then prepended the wildcard you come + back with one of: + + 1) %\% of brain used by average linden + 2) %%% of brain used by average linden + + Then, when passed to the database to be escaped to be database + safe, you get back: + + 1) %\\% of brain used by average linden + : which means search for any character sequence, followed by a + backslash, followed by any sequence, followed by ' of + brain...' + 2) %%% of brain used by average linden + : which (I believe) means search for a % followed by any + character sequence followed by 'of brain...' + + Neither of which is what we want! + + So, we need a vendor (or extention) for LIKE_STRING. Anyone + want to write it?""" + utf8_value = unicode(value, "utf-8") + esc_list = [] + remove_chars = set(u"%_") + for glyph in utf8_value: + if glyph in remove_chars: + continue + esc_list.append(glyph.encode("utf-8")) + return esc_list + + def delete(self): + """ Makes this query unusable by deleting all the members and + setting the deleted member. This is desired when the on-disk + query has been deleted but the in-memory copy remains.""" + # blow away all members except _name, _location, and deleted + name, location = self._name, self._location + for key in self.__dict__.keys(): + del self.__dict__[key] + self.deleted = True + self._name, self._location = name, location def ttl(self): + """ Estimated time to live of this query. Used for web + services to set the Expires header.""" return self._ttl def legacy_dbname(self): return self._legacy_dbname - def legacy_query(self): - return self._legacy_query - def return_as_map(self): + """ Returns true if this query is configured to return its + results as a single map (as opposed to a list of maps, the + normal behavior).""" + return self._return_as_map - def run(self, connection, params, expect_rows = None, use_dictcursor = True): - """\ -@brief given a connection, run a named query with the params + def for_schema(self, db_name): + "Look trough the alternates and return the correct query" + try: + return self._alternative[db_name] + except KeyError, e: + pass + return self -Note that this function will fetch ALL rows. We do this because it -opens and closes the cursor to generate the values, and this isn't a generator so the -cursor has no life beyond the method call. -@param cursor The connection to use (this generates its own cursor for the query) -@param name The name of the query to run -@param params The parameters passed into the query -@param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack. -@param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts. -@return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict. + def run(self, connection, params, expect_rows = None, use_dictcursor = True): + """given a connection, run a named query with the params + + Note that this function will fetch ALL rows. We do this because it + opens and closes the cursor to generate the values, and this + isn't a generator so the cursor has no life beyond the method call. + + @param cursor The connection to use (this generates its own cursor for the query) + @param name The name of the query to run + @param params The parameters passed into the query + @param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack. + @param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts. + @return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict. """ if use_dictcursor: cursor = connection.cursor(MySQLdb.cursors.DictCursor) else: cursor = connection.cursor() - statement = self.sql(params) + statement = self.sql(connection, params) #print "SQL:", statement rows = cursor.execute(statement) @@ -169,47 +359,152 @@ cursor has no life beyond the method call. return result_set[0] return result_set - def sql(self, params): + def sql(self, connection, params): + """ Generates an SQL statement from the named query document + and a dictionary of parameters. + + """ self.refresh() # build the query from the options available and the params base_query = [] base_query.append(self._base_query) + #print >>sys.stderr, "base_query:",base_query for opt, extra_where in self._options.items(): - if opt in params and (params[opt] == 0 or params[opt]): - if type(extra_where) in (dict, list, tuple): + if type(extra_where) in (dict, list, tuple): + if opt in params: base_query.append(extra_where[params[opt]]) - else: + else: + if opt in params and params[opt]: base_query.append(extra_where) - + if self._query_suffix: + base_query.append(self._query_suffix) + #print >>sys.stderr, "base_query:",base_query full_query = '\n'.join(base_query) - - # do substitution - sql = russ.format(full_query, params) + + # Go through the query and rewrite all of the ones with the + # @:name syntax. + rewrite = _RewriteQueryForArray(params) + expr = re.compile("@%\(([a-zA-Z][a-zA-Z0-9_-]*)\)s") + full_query = expr.sub(rewrite.operate, full_query) + params.update(rewrite.new_params) + + # build out the params for like. We only have to do this + # parameters which were detected to have ued the where syntax + # during load. + # + # * treat the incoming string as utf-8 + # * strip wildcards + # * append or prepend % as appropriate + new_params = {} + for key in params: + if key in self._around: + new_value = ['%'] + new_value.extend(self._strip_wildcards_to_list(params[key])) + new_value.append('%') + new_params[self._build_around_key(key)] = ''.join(new_value) + if key in self._append: + new_value = self._strip_wildcards_to_list(params[key]) + new_value.append('%') + new_params[self._build_append_key(key)] = ''.join(new_value) + if key in self._integer: + new_params[self._build_integer_key(key)] = int(params[key]) + params.update(new_params) + + # do substitution using the mysql (non-standard) 'literal' + # function to do the escaping. + sql = full_query % connection.literal(params) return sql def refresh(self): - # only stat the file every so often + """ Refresh self from the file on the filesystem. + + This is optimized to be callable as frequently as you wish, + without adding too much load. It does so by only stat-ing the + file every N seconds, where N defaults to 5 and is + configurable through the member _stat_interval_seconds. If the stat + reveals that the file has changed, refresh will re-parse the + contents of the file and use them to update the named query + instance. If the stat reveals that the file has been deleted, + refresh will call self.delete to make the in-memory + representation unusable.""" now = time.time() - if(now - self._last_check_time > self._stat_interval): + if(now - self._last_check_time > self._stat_interval_seconds): self._last_check_time = now - modtime = self.get_modtime() - if(modtime > self._last_mod_time): - self.load_contents() + try: + modtime = self.get_modtime() + if(modtime > self._last_mod_time): + self.load_contents() + except OSError, e: + if e.errno == errno.ENOENT: # file not found + self.delete() # clean up self + raise # pass the exception along to the caller so they know that this query disappeared class NamedQueryManager(object): + """ Manages the lifespan of NamedQuery objects, drawing from a + directory hierarchy of named query documents. + + In practice this amounts to a memory cache of NamedQuery objects.""" + def __init__(self, named_queries_dir): + """ Initializes a manager to look for named queries in a + directory.""" self._dir = os.path.abspath(os.path.realpath(named_queries_dir)) self._cached_queries = {} - def sql(self, name, params): + def sql(self, connection, name, params): nq = self.get(name) - return nq.sql(params) + return nq.sql(connection, params) def get(self, name): - # new up/refresh a NamedQuery based on the name + """ Returns a NamedQuery instance based on the name, either + from memory cache, or by parsing from disk. + + The name is simply a relative path to the directory associated + with the manager object. Before returning the instance, the + NamedQuery object is cached in memory, so that subsequent + accesses don't have to read from disk or do any parsing. This + means that NamedQuery objects returned by this method are + shared across all users of the manager object. + NamedQuery.refresh is used to bring the NamedQuery objects in + sync with the actual files on disk.""" nq = self._cached_queries.get(name) if nq is None: nq = NamedQuery(name, os.path.join(self._dir, name)) self._cached_queries[name] = nq + else: + try: + nq.refresh() + except OSError, e: + if e.errno == errno.ENOENT: # file not found + del self._cached_queries[name] + raise # pass exception along to caller so they know that the query disappeared + return nq + +class _RewriteQueryForArray(object): + "Helper class for rewriting queries with the @:name syntax" + def __init__(self, params): + self.params = params + self.new_params = dict() + + def operate(self, match): + "Given a match, return the string that should be in use" + key = match.group(1) + value = self.params[key] + if type(value) in (list,tuple): + rv = [] + for idx in range(len(value)): + new_key = "_" + key + "_" + str(idx) + self.new_params[new_key] = value[idx] + rv.append("%(" + new_key + ")s") + return ','.join(rv) + else: + # not something that can be expanded, so just drop the + # leading @ in the front of the match. This will mean that + # the single value we have, be it a string, int, whatever + # (other than dict) will correctly show up, eg: + # + # where foo in (@:foobar) -- foobar is a string, so we get + # where foo in (:foobar) + return match.group(0)[1:] |