summaryrefslogtreecommitdiff
path: root/indra/lib/python
diff options
context:
space:
mode:
authorChristian Goetze <cg@lindenlab.com>2007-12-01 01:29:28 +0000
committerChristian Goetze <cg@lindenlab.com>2007-12-01 01:29:28 +0000
commit7b7dd4e6f61aa731ef7f9f255014f5b76d5b9fec (patch)
tree08bf4cb9354f975940b83cd695a2aa8bb8d74870 /indra/lib/python
parent5c53c187fff573ef73a643949b3bded1084b0983 (diff)
svn merge -r74808:74832 svn+ssh://svn/svn/linden/branches/qa-dpo-9a
Diffstat (limited to 'indra/lib/python')
-rw-r--r--indra/lib/python/indra/base/lluuid.py4
-rw-r--r--indra/lib/python/indra/ipc/mysql_pool.py2
-rw-r--r--indra/lib/python/indra/util/named_query.py387
3 files changed, 346 insertions, 47 deletions
diff --git a/indra/lib/python/indra/base/lluuid.py b/indra/lib/python/indra/base/lluuid.py
index eb0c557b04..609c8cc261 100644
--- a/indra/lib/python/indra/base/lluuid.py
+++ b/indra/lib/python/indra/base/lluuid.py
@@ -212,6 +212,10 @@ class UUID(object):
_int2binstr(v3,4) + \
_int2binstr(v4,4)
+
+# module-level null constant
+NULL = UUID()
+
def printTranslatedMemory(four_hex_uints):
"""
We expect to get the string as four hex units. eg:
diff --git a/indra/lib/python/indra/ipc/mysql_pool.py b/indra/lib/python/indra/ipc/mysql_pool.py
index 4a265a1f2e..6288a4c732 100644
--- a/indra/lib/python/indra/ipc/mysql_pool.py
+++ b/indra/lib/python/indra/ipc/mysql_pool.py
@@ -97,7 +97,7 @@ class ConnectionPool(Pool):
except (AttributeError, DeadProcess), e:
conn = self.create()
# TODO figure out if we're still connected to the database
- if conn:
+ if conn is not None:
Pool.put(self, conn)
else:
self.current_size -= 1
diff --git a/indra/lib/python/indra/util/named_query.py b/indra/lib/python/indra/util/named_query.py
index 019eb6306b..c3bdd046fd 100644
--- a/indra/lib/python/indra/util/named_query.py
+++ b/indra/lib/python/indra/util/named_query.py
@@ -28,20 +28,33 @@ THE SOFTWARE.
$/LicenseInfo$
"""
+import errno
import MySQLdb
+import MySQLdb.cursors
import os
import os.path
+import re
import time
+#import sys # *TODO: remove. only used in testing.
+#import pprint # *TODO: remove. only used in testing.
+
+try:
+ set = set
+except NameError:
+ from sets import Set as set
+
from indra.base import llsd
from indra.base import config
-from indra.ipc import russ
_g_named_manager = None
-# this function is entirely intended for testing purposes,
-# because it's tricky to control the config from inside a test
def _init_g_named_manager(sql_dir = None):
+ """Initializes a global NamedManager object to point at a
+ specified named queries hierarchy.
+
+ This function is intended entirely for testing purposes,
+ because it's tricky to control the config from inside a test."""
if sql_dir is None:
sql_dir = config.get('named-query-base-dir')
global _g_named_manager
@@ -49,14 +62,14 @@ def _init_g_named_manager(sql_dir = None):
os.path.abspath(os.path.realpath(sql_dir)))
def get(name):
- "@brief get the named query object to be used to perform queries"
+ "Get the named query object to be used to perform queries"
if _g_named_manager is None:
_init_g_named_manager()
return _g_named_manager.get(name)
-def sql(name, params):
+def sql(connection, name, params):
# use module-global NamedQuery object to perform default substitution
- return get(name).sql(params)
+ return get(name).sql(connection, params)
def run(connection, name, params, expect_rows = None):
"""\
@@ -72,66 +85,243 @@ Note that this function will fetch ALL rows.
return get(name).run(connection, params, expect_rows)
class ExpectationFailed(Exception):
+ """ Exception that is raised when an expectation for an sql query
+ is not met."""
def __init__(self, message):
+ Exception.__init__(self, message)
self.message = message
class NamedQuery(object):
def __init__(self, name, filename):
- self._stat_interval = 5000 # 5 seconds
+ """ Construct a NamedQuery object. The name argument is an
+ arbitrary name as a handle for the query, and the filename is
+ a path to a file containing an llsd named query document."""
+ self._stat_interval_seconds = 5 # 5 seconds
self._name = name
self._location = filename
+ self._alternative = dict()
+ self._last_mod_time = 0
+ self._last_check_time = 0
+ self.deleted = False
self.load_contents()
def name(self):
+ """ The name of the query. """
return self._name
def get_modtime(self):
- return os.path.getmtime(self._location)
+ """ Returns the mtime (last modified time) of the named query
+ file, if such exists."""
+ if self._location:
+ return os.path.getmtime(self._location)
+ return 0
def load_contents(self):
- self._contents = llsd.parse(open(self._location).read())
+ """ Loads and parses the named query file into self. Does
+ nothing if self.location is nonexistant."""
+ if self._location:
+ self._reference_contents(llsd.parse(open(self._location).read()))
+ # Check for alternative implementations
+ try:
+ for name, alt in self._contents['alternative'].items():
+ nq = NamedQuery(name, None)
+ nq._reference_contents(alt)
+ self._alternative[name] = nq
+ except KeyError, e:
+ pass
+ self._last_mod_time = self.get_modtime()
+ self._last_check_time = time.time()
+
+ def _reference_contents(self, contents):
+ "Helper method which builds internal structure from parsed contents"
+ self._contents = contents
self._ttl = int(self._contents.get('ttl', 0))
self._return_as_map = bool(self._contents.get('return_as_map', False))
self._legacy_dbname = self._contents.get('legacy_dbname', None)
- self._legacy_query = self._contents.get('legacy_query', None)
- self._options = self._contents.get('options', {})
- self._base_query = self._contents['base_query']
- self._last_mod_time = self.get_modtime()
- self._last_check_time = time.time()
+ # reset these before doing the sql conversion because we will
+ # read them there. reset these while loading so we pick up
+ # changes.
+ self._around = set()
+ self._append = set()
+ self._integer = set()
+ self._options = self._contents.get('dynamic_where', {})
+ for key in self._options:
+ if isinstance(self._options[key], basestring):
+ self._options[key] = self._convert_sql(self._options[key])
+ elif isinstance(self._options[key], list):
+ lines = []
+ for line in self._options[key]:
+ lines.append(self._convert_sql(line))
+ self._options[key] = lines
+ else:
+ moreopt = {}
+ for kk in self._options[key]:
+ moreopt[kk] = self._convert_sql(self._options[key][kk])
+ self._options[key] = moreopt
+ self._base_query = self._convert_sql(self._contents['base_query'])
+ self._query_suffix = self._convert_sql(
+ self._contents.get('query_suffix', ''))
+
+ def _convert_sql(self, sql):
+ """convert the parsed sql into a useful internal structure.
+
+ This function has to turn the named query format into a pyformat
+ style. It also has to look for %:name% and :name% and
+ ready them for use in LIKE statements"""
+ if sql:
+ #print >>sys.stderr, "sql:",sql
+ expr = re.compile("(%?):([a-zA-Z][a-zA-Z0-9_-]*)%")
+ sql = expr.sub(self._prepare_like, sql)
+ expr = re.compile("#:([a-zA-Z][a-zA-Z0-9_-]*)")
+ sql = expr.sub(self._prepare_integer, sql)
+ expr = re.compile(":([a-zA-Z][a-zA-Z0-9_-]*)")
+ sql = expr.sub("%(\\1)s", sql)
+ return sql
+
+ def _prepare_like(self, match):
+ """This function changes LIKE statement replace behavior
+
+ It works by turning %:name% to %(_name_around)s and :name% to
+ %(_name_append)s. Since a leading '_' is not a valid keyname
+ input (enforced via unit tests), it will never clash with
+ existing keys. Then, when building the statement, the query
+ runner will generate corrected strings."""
+ if match.group(1) == '%':
+ # there is a leading % so this is treated as prefix/suffix
+ self._around.add(match.group(2))
+ return "%(" + self._build_around_key(match.group(2)) + ")s"
+ else:
+ # there is no leading %, so this is suffix only
+ self._append.add(match.group(2))
+ return "%(" + self._build_append_key(match.group(2)) + ")s"
+
+ def _build_around_key(self, key):
+ return "_" + key + "_around"
+
+ def _build_append_key(self, key):
+ return "_" + key + "_append"
+
+ def _prepare_integer(self, match):
+ """This function adjusts the sql for #:name replacements
+
+ It works by turning #:name to %(_name_as_integer)s. Since a
+ leading '_' is not a valid keyname input (enforced via unit
+ tests), it will never clash with existing keys. Then, when
+ building the statement, the query runner will generate
+ corrected strings."""
+ self._integer.add(match.group(1))
+ return "%(" + self._build_integer_key(match.group(1)) + ")s"
+
+ def _build_integer_key(self, key):
+ return "_" + key + "_as_integer"
+
+ def _strip_wildcards_to_list(self, value):
+ """Take string, and strip out the LIKE special characters.
+
+ Technically, this is database dependant, but postgresql and
+ mysql use the same wildcards, and I am not aware of a general
+ way to handle this. I think you need a sql statement of the
+ form:
+
+ LIKE_STRING( [ANY,ONE,str]... )
+
+ which would treat ANY as their any string, and ONE as their
+ single glyph, and str as something that needs database
+ specific encoding to not allow any % or _ to affect the query.
+
+ As it stands, I believe it's impossible to write a named query
+ style interface which uses like to search the entire space of
+ text available. Imagine the query:
+
+ % of brain used by average linden
+
+ In order to search for %, it must be escaped, so once you have
+ escaped the string to not do wildcard searches, and be escaped
+ for the database, and then prepended the wildcard you come
+ back with one of:
+
+ 1) %\% of brain used by average linden
+ 2) %%% of brain used by average linden
+
+ Then, when passed to the database to be escaped to be database
+ safe, you get back:
+
+ 1) %\\% of brain used by average linden
+ : which means search for any character sequence, followed by a
+ backslash, followed by any sequence, followed by ' of
+ brain...'
+ 2) %%% of brain used by average linden
+ : which (I believe) means search for a % followed by any
+ character sequence followed by 'of brain...'
+
+ Neither of which is what we want!
+
+ So, we need a vendor (or extention) for LIKE_STRING. Anyone
+ want to write it?"""
+ utf8_value = unicode(value, "utf-8")
+ esc_list = []
+ remove_chars = set(u"%_")
+ for glyph in utf8_value:
+ if glyph in remove_chars:
+ continue
+ esc_list.append(glyph.encode("utf-8"))
+ return esc_list
+
+ def delete(self):
+ """ Makes this query unusable by deleting all the members and
+ setting the deleted member. This is desired when the on-disk
+ query has been deleted but the in-memory copy remains."""
+ # blow away all members except _name, _location, and deleted
+ name, location = self._name, self._location
+ for key in self.__dict__.keys():
+ del self.__dict__[key]
+ self.deleted = True
+ self._name, self._location = name, location
def ttl(self):
+ """ Estimated time to live of this query. Used for web
+ services to set the Expires header."""
return self._ttl
def legacy_dbname(self):
return self._legacy_dbname
- def legacy_query(self):
- return self._legacy_query
-
def return_as_map(self):
+ """ Returns true if this query is configured to return its
+ results as a single map (as opposed to a list of maps, the
+ normal behavior)."""
+
return self._return_as_map
- def run(self, connection, params, expect_rows = None, use_dictcursor = True):
- """\
-@brief given a connection, run a named query with the params
+ def for_schema(self, db_name):
+ "Look trough the alternates and return the correct query"
+ try:
+ return self._alternative[db_name]
+ except KeyError, e:
+ pass
+ return self
-Note that this function will fetch ALL rows. We do this because it
-opens and closes the cursor to generate the values, and this isn't a generator so the
-cursor has no life beyond the method call.
-@param cursor The connection to use (this generates its own cursor for the query)
-@param name The name of the query to run
-@param params The parameters passed into the query
-@param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
-@param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts.
-@return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict.
+ def run(self, connection, params, expect_rows = None, use_dictcursor = True):
+ """given a connection, run a named query with the params
+
+ Note that this function will fetch ALL rows. We do this because it
+ opens and closes the cursor to generate the values, and this
+ isn't a generator so the cursor has no life beyond the method call.
+
+ @param cursor The connection to use (this generates its own cursor for the query)
+ @param name The name of the query to run
+ @param params The parameters passed into the query
+ @param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
+ @param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts.
+ @return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict.
"""
if use_dictcursor:
cursor = connection.cursor(MySQLdb.cursors.DictCursor)
else:
cursor = connection.cursor()
- statement = self.sql(params)
+ statement = self.sql(connection, params)
#print "SQL:", statement
rows = cursor.execute(statement)
@@ -169,47 +359,152 @@ cursor has no life beyond the method call.
return result_set[0]
return result_set
- def sql(self, params):
+ def sql(self, connection, params):
+ """ Generates an SQL statement from the named query document
+ and a dictionary of parameters.
+
+ """
self.refresh()
# build the query from the options available and the params
base_query = []
base_query.append(self._base_query)
+ #print >>sys.stderr, "base_query:",base_query
for opt, extra_where in self._options.items():
- if opt in params and (params[opt] == 0 or params[opt]):
- if type(extra_where) in (dict, list, tuple):
+ if type(extra_where) in (dict, list, tuple):
+ if opt in params:
base_query.append(extra_where[params[opt]])
- else:
+ else:
+ if opt in params and params[opt]:
base_query.append(extra_where)
-
+ if self._query_suffix:
+ base_query.append(self._query_suffix)
+ #print >>sys.stderr, "base_query:",base_query
full_query = '\n'.join(base_query)
-
- # do substitution
- sql = russ.format(full_query, params)
+
+ # Go through the query and rewrite all of the ones with the
+ # @:name syntax.
+ rewrite = _RewriteQueryForArray(params)
+ expr = re.compile("@%\(([a-zA-Z][a-zA-Z0-9_-]*)\)s")
+ full_query = expr.sub(rewrite.operate, full_query)
+ params.update(rewrite.new_params)
+
+ # build out the params for like. We only have to do this
+ # parameters which were detected to have ued the where syntax
+ # during load.
+ #
+ # * treat the incoming string as utf-8
+ # * strip wildcards
+ # * append or prepend % as appropriate
+ new_params = {}
+ for key in params:
+ if key in self._around:
+ new_value = ['%']
+ new_value.extend(self._strip_wildcards_to_list(params[key]))
+ new_value.append('%')
+ new_params[self._build_around_key(key)] = ''.join(new_value)
+ if key in self._append:
+ new_value = self._strip_wildcards_to_list(params[key])
+ new_value.append('%')
+ new_params[self._build_append_key(key)] = ''.join(new_value)
+ if key in self._integer:
+ new_params[self._build_integer_key(key)] = int(params[key])
+ params.update(new_params)
+
+ # do substitution using the mysql (non-standard) 'literal'
+ # function to do the escaping.
+ sql = full_query % connection.literal(params)
return sql
def refresh(self):
- # only stat the file every so often
+ """ Refresh self from the file on the filesystem.
+
+ This is optimized to be callable as frequently as you wish,
+ without adding too much load. It does so by only stat-ing the
+ file every N seconds, where N defaults to 5 and is
+ configurable through the member _stat_interval_seconds. If the stat
+ reveals that the file has changed, refresh will re-parse the
+ contents of the file and use them to update the named query
+ instance. If the stat reveals that the file has been deleted,
+ refresh will call self.delete to make the in-memory
+ representation unusable."""
now = time.time()
- if(now - self._last_check_time > self._stat_interval):
+ if(now - self._last_check_time > self._stat_interval_seconds):
self._last_check_time = now
- modtime = self.get_modtime()
- if(modtime > self._last_mod_time):
- self.load_contents()
+ try:
+ modtime = self.get_modtime()
+ if(modtime > self._last_mod_time):
+ self.load_contents()
+ except OSError, e:
+ if e.errno == errno.ENOENT: # file not found
+ self.delete() # clean up self
+ raise # pass the exception along to the caller so they know that this query disappeared
class NamedQueryManager(object):
+ """ Manages the lifespan of NamedQuery objects, drawing from a
+ directory hierarchy of named query documents.
+
+ In practice this amounts to a memory cache of NamedQuery objects."""
+
def __init__(self, named_queries_dir):
+ """ Initializes a manager to look for named queries in a
+ directory."""
self._dir = os.path.abspath(os.path.realpath(named_queries_dir))
self._cached_queries = {}
- def sql(self, name, params):
+ def sql(self, connection, name, params):
nq = self.get(name)
- return nq.sql(params)
+ return nq.sql(connection, params)
def get(self, name):
- # new up/refresh a NamedQuery based on the name
+ """ Returns a NamedQuery instance based on the name, either
+ from memory cache, or by parsing from disk.
+
+ The name is simply a relative path to the directory associated
+ with the manager object. Before returning the instance, the
+ NamedQuery object is cached in memory, so that subsequent
+ accesses don't have to read from disk or do any parsing. This
+ means that NamedQuery objects returned by this method are
+ shared across all users of the manager object.
+ NamedQuery.refresh is used to bring the NamedQuery objects in
+ sync with the actual files on disk."""
nq = self._cached_queries.get(name)
if nq is None:
nq = NamedQuery(name, os.path.join(self._dir, name))
self._cached_queries[name] = nq
+ else:
+ try:
+ nq.refresh()
+ except OSError, e:
+ if e.errno == errno.ENOENT: # file not found
+ del self._cached_queries[name]
+ raise # pass exception along to caller so they know that the query disappeared
+
return nq
+
+class _RewriteQueryForArray(object):
+ "Helper class for rewriting queries with the @:name syntax"
+ def __init__(self, params):
+ self.params = params
+ self.new_params = dict()
+
+ def operate(self, match):
+ "Given a match, return the string that should be in use"
+ key = match.group(1)
+ value = self.params[key]
+ if type(value) in (list,tuple):
+ rv = []
+ for idx in range(len(value)):
+ new_key = "_" + key + "_" + str(idx)
+ self.new_params[new_key] = value[idx]
+ rv.append("%(" + new_key + ")s")
+ return ','.join(rv)
+ else:
+ # not something that can be expanded, so just drop the
+ # leading @ in the front of the match. This will mean that
+ # the single value we have, be it a string, int, whatever
+ # (other than dict) will correctly show up, eg:
+ #
+ # where foo in (@:foobar) -- foobar is a string, so we get
+ # where foo in (:foobar)
+ return match.group(0)[1:]