573 lines
24 KiB
Python
573 lines
24 KiB
Python
"""\
|
|
@file named_query.py
|
|
@author Ryan Williams, Phoenix
|
|
@date 2007-07-31
|
|
@brief An API for running named queries.
|
|
|
|
$LicenseInfo:firstyear=2007&license=mit$
|
|
|
|
Copyright (c) 2007-2009, Linden Research, Inc.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
$/LicenseInfo$
|
|
"""
|
|
|
|
import errno
|
|
import MySQLdb
|
|
import MySQLdb.cursors
|
|
import os
|
|
import os.path
|
|
import re
|
|
import time
|
|
|
|
from indra.base import llsd
|
|
from indra.base import config
|
|
|
|
DEBUG = False
|
|
|
|
NQ_FILE_SUFFIX = config.get('named-query-file-suffix', '.nq')
|
|
NQ_FILE_SUFFIX_LEN = len(NQ_FILE_SUFFIX)
|
|
|
|
_g_named_manager = None
|
|
|
|
def _init_g_named_manager(sql_dir = None):
|
|
"""Initializes a global NamedManager object to point at a
|
|
specified named queries hierarchy.
|
|
|
|
This function is intended entirely for testing purposes,
|
|
because it's tricky to control the config from inside a test."""
|
|
if sql_dir is None:
|
|
sql_dir = config.get('named-query-base-dir')
|
|
|
|
# extra fallback directory in case config doesn't return what we want
|
|
if sql_dir is None:
|
|
sql_dir = os.path.abspath(
|
|
os.path.join(
|
|
os.path.realpath(os.path.dirname(__file__)), "..", "..", "..", "..", "web", "dataservice", "sql"))
|
|
|
|
global _g_named_manager
|
|
_g_named_manager = NamedQueryManager(
|
|
os.path.abspath(os.path.realpath(sql_dir)))
|
|
|
|
def get(name):
|
|
"Get the named query object to be used to perform queries"
|
|
if _g_named_manager is None:
|
|
_init_g_named_manager()
|
|
return _g_named_manager.get(name)
|
|
|
|
def sql(connection, name, params):
|
|
# use module-global NamedQuery object to perform default substitution
|
|
return get(name).sql(connection, params)
|
|
|
|
def run(connection, name, params, expect_rows = None):
|
|
"""\
|
|
@brief given a connection, run a named query with the params
|
|
|
|
Note that this function will fetch ALL rows.
|
|
@param connection The connection to use
|
|
@param name The name of the query to run
|
|
@param params The parameters passed into the query
|
|
@param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
|
|
@return Returns the result set as a list of dicts.
|
|
"""
|
|
return get(name).run(connection, params, expect_rows)
|
|
|
|
class ExpectationFailed(Exception):
|
|
""" Exception that is raised when an expectation for an sql query
|
|
is not met."""
|
|
def __init__(self, message):
|
|
Exception.__init__(self, message)
|
|
self.message = message
|
|
|
|
class NamedQuery(object):
|
|
def __init__(self, name, filename):
|
|
""" Construct a NamedQuery object. The name argument is an
|
|
arbitrary name as a handle for the query, and the filename is
|
|
a path to a file or a file-like object containing an llsd named
|
|
query document."""
|
|
self._stat_interval_seconds = 5 # 5 seconds
|
|
self._name = name
|
|
if (filename is not None and isinstance(filename, (str, unicode))
|
|
and NQ_FILE_SUFFIX != filename[-NQ_FILE_SUFFIX_LEN:]):
|
|
filename = filename + NQ_FILE_SUFFIX
|
|
self._location = filename
|
|
self._alternative = dict()
|
|
self._last_mod_time = 0
|
|
self._last_check_time = 0
|
|
self.deleted = False
|
|
self.load_contents()
|
|
|
|
def name(self):
|
|
""" The name of the query. """
|
|
return self._name
|
|
|
|
def get_modtime(self):
|
|
""" Returns the mtime (last modified time) of the named query
|
|
filename. For file-like objects, expect a modtime of 0"""
|
|
if self._location and isinstance(self._location, (str, unicode)):
|
|
return os.path.getmtime(self._location)
|
|
return 0
|
|
|
|
def load_contents(self):
|
|
""" Loads and parses the named query file into self. Does
|
|
nothing if self.location is nonexistant."""
|
|
if self._location:
|
|
if isinstance(self._location, (str, unicode)):
|
|
contents = llsd.parse(open(self._location).read())
|
|
else:
|
|
# we probably have a file-like object. Godspeed!
|
|
contents = llsd.parse(self._location.read())
|
|
self._reference_contents(contents)
|
|
# Check for alternative implementations
|
|
try:
|
|
for name, alt in self._contents['alternative'].items():
|
|
nq = NamedQuery(name, None)
|
|
nq._reference_contents(alt)
|
|
self._alternative[name] = nq
|
|
except KeyError, e:
|
|
pass
|
|
self._last_mod_time = self.get_modtime()
|
|
self._last_check_time = time.time()
|
|
|
|
def _reference_contents(self, contents):
|
|
"Helper method which builds internal structure from parsed contents"
|
|
self._contents = contents
|
|
self._ttl = int(self._contents.get('ttl', 0))
|
|
self._return_as_map = bool(self._contents.get('return_as_map', False))
|
|
self._legacy_dbname = self._contents.get('legacy_dbname', None)
|
|
|
|
# reset these before doing the sql conversion because we will
|
|
# read them there. reset these while loading so we pick up
|
|
# changes.
|
|
self._around = set()
|
|
self._append = set()
|
|
self._integer = set()
|
|
self._options = self._contents.get('dynamic_where', {})
|
|
for key in self._options:
|
|
if isinstance(self._options[key], basestring):
|
|
self._options[key] = self._convert_sql(self._options[key])
|
|
elif isinstance(self._options[key], list):
|
|
lines = []
|
|
for line in self._options[key]:
|
|
lines.append(self._convert_sql(line))
|
|
self._options[key] = lines
|
|
else:
|
|
moreopt = {}
|
|
for kk in self._options[key]:
|
|
moreopt[kk] = self._convert_sql(self._options[key][kk])
|
|
self._options[key] = moreopt
|
|
self._base_query = self._convert_sql(self._contents['base_query'])
|
|
self._query_suffix = self._convert_sql(
|
|
self._contents.get('query_suffix', ''))
|
|
|
|
def _convert_sql(self, sql):
|
|
"""convert the parsed sql into a useful internal structure.
|
|
|
|
This function has to turn the named query format into a pyformat
|
|
style. It also has to look for %:name% and :name% and
|
|
ready them for use in LIKE statements"""
|
|
if sql:
|
|
# This first sub is to properly escape any % signs that
|
|
# are meant to be literally passed through to mysql in the
|
|
# query. It leaves any %'s that are used for
|
|
# like-expressions.
|
|
expr = re.compile("(?<=[^a-zA-Z0-9_-])%(?=[^:])")
|
|
sql = expr.sub('%%', sql)
|
|
|
|
# This should tackle the rest of the %'s in the query, by
|
|
# converting them to LIKE clauses.
|
|
expr = re.compile("(%?):([a-zA-Z][a-zA-Z0-9_-]*)%")
|
|
sql = expr.sub(self._prepare_like, sql)
|
|
expr = re.compile("#:([a-zA-Z][a-zA-Z0-9_-]*)")
|
|
sql = expr.sub(self._prepare_integer, sql)
|
|
expr = re.compile(":([a-zA-Z][a-zA-Z0-9_-]*)")
|
|
sql = expr.sub("%(\\1)s", sql)
|
|
return sql
|
|
|
|
def _prepare_like(self, match):
|
|
"""This function changes LIKE statement replace behavior
|
|
|
|
It works by turning %:name% to %(_name_around)s and :name% to
|
|
%(_name_append)s. Since a leading '_' is not a valid keyname
|
|
input (enforced via unit tests), it will never clash with
|
|
existing keys. Then, when building the statement, the query
|
|
runner will generate corrected strings."""
|
|
if match.group(1) == '%':
|
|
# there is a leading % so this is treated as prefix/suffix
|
|
self._around.add(match.group(2))
|
|
return "%(" + self._build_around_key(match.group(2)) + ")s"
|
|
else:
|
|
# there is no leading %, so this is suffix only
|
|
self._append.add(match.group(2))
|
|
return "%(" + self._build_append_key(match.group(2)) + ")s"
|
|
|
|
def _build_around_key(self, key):
|
|
return "_" + key + "_around"
|
|
|
|
def _build_append_key(self, key):
|
|
return "_" + key + "_append"
|
|
|
|
def _prepare_integer(self, match):
|
|
"""This function adjusts the sql for #:name replacements
|
|
|
|
It works by turning #:name to %(_name_as_integer)s. Since a
|
|
leading '_' is not a valid keyname input (enforced via unit
|
|
tests), it will never clash with existing keys. Then, when
|
|
building the statement, the query runner will generate
|
|
corrected strings."""
|
|
self._integer.add(match.group(1))
|
|
return "%(" + self._build_integer_key(match.group(1)) + ")s"
|
|
|
|
def _build_integer_key(self, key):
|
|
return "_" + key + "_as_integer"
|
|
|
|
def _strip_wildcards_to_list(self, value):
|
|
"""Take string, and strip out the LIKE special characters.
|
|
|
|
Technically, this is database dependant, but postgresql and
|
|
mysql use the same wildcards, and I am not aware of a general
|
|
way to handle this. I think you need a sql statement of the
|
|
form:
|
|
|
|
LIKE_STRING( [ANY,ONE,str]... )
|
|
|
|
which would treat ANY as their any string, and ONE as their
|
|
single glyph, and str as something that needs database
|
|
specific encoding to not allow any % or _ to affect the query.
|
|
|
|
As it stands, I believe it's impossible to write a named query
|
|
style interface which uses like to search the entire space of
|
|
text available. Imagine the query:
|
|
|
|
% of brain used by average linden
|
|
|
|
In order to search for %, it must be escaped, so once you have
|
|
escaped the string to not do wildcard searches, and be escaped
|
|
for the database, and then prepended the wildcard you come
|
|
back with one of:
|
|
|
|
1) %\% of brain used by average linden
|
|
2) %%% of brain used by average linden
|
|
|
|
Then, when passed to the database to be escaped to be database
|
|
safe, you get back:
|
|
|
|
1) %\\% of brain used by average linden
|
|
: which means search for any character sequence, followed by a
|
|
backslash, followed by any sequence, followed by ' of
|
|
brain...'
|
|
2) %%% of brain used by average linden
|
|
: which (I believe) means search for a % followed by any
|
|
character sequence followed by 'of brain...'
|
|
|
|
Neither of which is what we want!
|
|
|
|
So, we need a vendor (or extention) for LIKE_STRING. Anyone
|
|
want to write it?"""
|
|
utf8_value = unicode(value, "utf-8")
|
|
esc_list = []
|
|
remove_chars = set(u"%_")
|
|
for glyph in utf8_value:
|
|
if glyph in remove_chars:
|
|
continue
|
|
esc_list.append(glyph.encode("utf-8"))
|
|
return esc_list
|
|
|
|
def delete(self):
|
|
""" Makes this query unusable by deleting all the members and
|
|
setting the deleted member. This is desired when the on-disk
|
|
query has been deleted but the in-memory copy remains."""
|
|
# blow away all members except _name, _location, and deleted
|
|
name, location = self._name, self._location
|
|
for key in self.__dict__.keys():
|
|
del self.__dict__[key]
|
|
self.deleted = True
|
|
self._name, self._location = name, location
|
|
|
|
def ttl(self):
|
|
""" Estimated time to live of this query. Used for web
|
|
services to set the Expires header."""
|
|
return self._ttl
|
|
|
|
def legacy_dbname(self):
|
|
return self._legacy_dbname
|
|
|
|
def return_as_map(self):
|
|
""" Returns true if this query is configured to return its
|
|
results as a single map (as opposed to a list of maps, the
|
|
normal behavior)."""
|
|
|
|
return self._return_as_map
|
|
|
|
def for_schema(self, db_name):
|
|
"Look trough the alternates and return the correct query"
|
|
try:
|
|
return self._alternative[db_name]
|
|
except KeyError, e:
|
|
pass
|
|
return self
|
|
|
|
def run(self, connection, params, expect_rows = None, use_dictcursor = True):
|
|
"""given a connection, run a named query with the params
|
|
|
|
Note that this function will fetch ALL rows. We do this because it
|
|
opens and closes the cursor to generate the values, and this
|
|
isn't a generator so the cursor has no life beyond the method call.
|
|
|
|
@param cursor The connection to use (this generates its own cursor for the query)
|
|
@param name The name of the query to run
|
|
@param params The parameters passed into the query
|
|
@param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
|
|
@param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts.
|
|
@return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict.
|
|
"""
|
|
if use_dictcursor:
|
|
cursor = connection.cursor(MySQLdb.cursors.DictCursor)
|
|
else:
|
|
cursor = connection.cursor()
|
|
|
|
statement = self.sql(connection, params)
|
|
if DEBUG:
|
|
print "SQL:", statement
|
|
rows = cursor.execute(statement)
|
|
|
|
# *NOTE: the expect_rows argument is a very cheesy way to get some
|
|
# validation on the result set. If you want to add more expectation
|
|
# logic, do something more object-oriented and flexible. Or use an ORM.
|
|
if(self._return_as_map):
|
|
expect_rows = 1
|
|
if expect_rows is not None and rows != expect_rows:
|
|
cursor.close()
|
|
raise ExpectationFailed("Statement expected %s rows, got %s. Sql: %s" % (
|
|
expect_rows, rows, statement))
|
|
|
|
# convert to dicts manually if we're not using a dictcursor
|
|
if use_dictcursor:
|
|
result_set = cursor.fetchall()
|
|
else:
|
|
if cursor.description is None:
|
|
# an insert or something
|
|
x = cursor.fetchall()
|
|
cursor.close()
|
|
return x
|
|
|
|
names = [x[0] for x in cursor.description]
|
|
|
|
result_set = []
|
|
for row in cursor.fetchall():
|
|
converted_row = {}
|
|
for idx, col_name in enumerate(names):
|
|
converted_row[col_name] = row[idx]
|
|
result_set.append(converted_row)
|
|
|
|
cursor.close()
|
|
if self._return_as_map:
|
|
return result_set[0]
|
|
return result_set
|
|
|
|
def sql(self, connection, params):
|
|
""" Generates an SQL statement from the named query document
|
|
and a dictionary of parameters.
|
|
|
|
"""
|
|
self.refresh()
|
|
|
|
# build the query from the options available and the params
|
|
base_query = []
|
|
base_query.append(self._base_query)
|
|
for opt, extra_where in self._options.items():
|
|
if type(extra_where) in (dict, list, tuple):
|
|
if opt in params:
|
|
base_query.append(extra_where[params[opt]])
|
|
else:
|
|
if opt in params and params[opt]:
|
|
base_query.append(extra_where)
|
|
if self._query_suffix:
|
|
base_query.append(self._query_suffix)
|
|
full_query = '\n'.join(base_query)
|
|
|
|
# Go through the query and rewrite all of the ones with the
|
|
# @:name syntax.
|
|
rewrite = _RewriteQueryForArray(params)
|
|
expr = re.compile("@%\(([a-zA-Z][a-zA-Z0-9_-]*)\)s")
|
|
full_query = expr.sub(rewrite.operate, full_query)
|
|
params.update(rewrite.new_params)
|
|
|
|
# build out the params for like. We only have to do this
|
|
# parameters which were detected to have ued the where syntax
|
|
# during load.
|
|
#
|
|
# * treat the incoming string as utf-8
|
|
# * strip wildcards
|
|
# * append or prepend % as appropriate
|
|
new_params = {}
|
|
for key in params:
|
|
if key in self._around:
|
|
new_value = ['%']
|
|
new_value.extend(self._strip_wildcards_to_list(params[key]))
|
|
new_value.append('%')
|
|
new_params[self._build_around_key(key)] = ''.join(new_value)
|
|
if key in self._append:
|
|
new_value = self._strip_wildcards_to_list(params[key])
|
|
new_value.append('%')
|
|
new_params[self._build_append_key(key)] = ''.join(new_value)
|
|
if key in self._integer:
|
|
new_params[self._build_integer_key(key)] = int(params[key])
|
|
params.update(new_params)
|
|
|
|
# do substitution using the mysql (non-standard) 'literal'
|
|
# function to do the escaping.
|
|
sql = full_query % connection.literal(params)
|
|
return sql
|
|
|
|
def refresh(self):
|
|
""" Refresh self from the file on the filesystem.
|
|
|
|
This is optimized to be callable as frequently as you wish,
|
|
without adding too much load. It does so by only stat-ing the
|
|
file every N seconds, where N defaults to 5 and is
|
|
configurable through the member _stat_interval_seconds. If the stat
|
|
reveals that the file has changed, refresh will re-parse the
|
|
contents of the file and use them to update the named query
|
|
instance. If the stat reveals that the file has been deleted,
|
|
refresh will call self.delete to make the in-memory
|
|
representation unusable."""
|
|
now = time.time()
|
|
if(now - self._last_check_time > self._stat_interval_seconds):
|
|
self._last_check_time = now
|
|
try:
|
|
modtime = self.get_modtime()
|
|
if(modtime > self._last_mod_time):
|
|
self.load_contents()
|
|
except OSError, e:
|
|
if e.errno == errno.ENOENT: # file not found
|
|
self.delete() # clean up self
|
|
raise # pass the exception along to the caller so they know that this query disappeared
|
|
|
|
class NamedQueryManager(object):
|
|
""" Manages the lifespan of NamedQuery objects, drawing from a
|
|
directory hierarchy of named query documents.
|
|
|
|
In practice this amounts to a memory cache of NamedQuery objects."""
|
|
|
|
def __init__(self, named_queries_dir):
|
|
""" Initializes a manager to look for named queries in a
|
|
directory."""
|
|
self._dir = os.path.abspath(os.path.realpath(named_queries_dir))
|
|
self._cached_queries = {}
|
|
|
|
def sql(self, connection, name, params):
|
|
nq = self.get(name)
|
|
return nq.sql(connection, params)
|
|
|
|
def get(self, name):
|
|
""" Returns a NamedQuery instance based on the name, either
|
|
from memory cache, or by parsing from disk.
|
|
|
|
The name is simply a relative path to the directory associated
|
|
with the manager object. Before returning the instance, the
|
|
NamedQuery object is cached in memory, so that subsequent
|
|
accesses don't have to read from disk or do any parsing. This
|
|
means that NamedQuery objects returned by this method are
|
|
shared across all users of the manager object.
|
|
NamedQuery.refresh is used to bring the NamedQuery objects in
|
|
sync with the actual files on disk."""
|
|
nq = self._cached_queries.get(name)
|
|
if nq is None:
|
|
nq = NamedQuery(name, os.path.join(self._dir, name))
|
|
self._cached_queries[name] = nq
|
|
else:
|
|
try:
|
|
nq.refresh()
|
|
except OSError, e:
|
|
if e.errno == errno.ENOENT: # file not found
|
|
del self._cached_queries[name]
|
|
raise # pass exception along to caller so they know that the query disappeared
|
|
|
|
return nq
|
|
|
|
class _RewriteQueryForArray(object):
|
|
"Helper class for rewriting queries with the @:name syntax"
|
|
def __init__(self, params):
|
|
self.params = params
|
|
self.new_params = dict()
|
|
|
|
def operate(self, match):
|
|
"Given a match, return the string that should be in use"
|
|
key = match.group(1)
|
|
value = self.params[key]
|
|
if type(value) in (list,tuple):
|
|
rv = []
|
|
for idx in range(len(value)):
|
|
# if the value@idx is array-like, we are
|
|
# probably dealing with a VALUES
|
|
new_key = "_%s_%s"%(key, str(idx))
|
|
val_item = value[idx]
|
|
if type(val_item) in (list, tuple, dict):
|
|
if type(val_item) is dict:
|
|
# this is because in Python, the order of
|
|
# key, value retrieval from the dict is not
|
|
# guaranteed to match what the input intended
|
|
# and for VALUES, order is important.
|
|
# TODO: Implemented ordered dict in LLSD parser?
|
|
raise ExpectationFailed('Only lists/tuples allowed,\
|
|
received dict')
|
|
values_keys = []
|
|
for value_idx, item in enumerate(val_item):
|
|
# we want a key of the format :
|
|
# key_#replacement_#value_row_#value_col
|
|
# ugh... so if we are replacing 10 rows in user_note,
|
|
# the first values clause would read (for @:user_notes) :-
|
|
# ( :_user_notes_0_1_1, :_user_notes_0_1_2, :_user_notes_0_1_3 )
|
|
# the input LLSD for VALUES will look like:
|
|
# <llsd>...
|
|
# <map>
|
|
# <key>user_notes</key>
|
|
# <array>
|
|
# <array> <!-- row 1 for VALUES -->
|
|
# <string>...</string>
|
|
# <string>...</string>
|
|
# <string>...</string>
|
|
# </array>
|
|
# ...
|
|
# </array>
|
|
# </map>
|
|
# ... </llsd>
|
|
values_key = "%s_%s"%(new_key, value_idx)
|
|
self.new_params[values_key] = item
|
|
values_keys.append("%%(%s)s"%values_key)
|
|
# now collapse all these new place holders enclosed in ()
|
|
# from [':_key_0_1_1', ':_key_0_1_2', ':_key_0_1_3,...]
|
|
# rv will have [ '(:_key_0_1_1, :_key_0_1_2, :_key_0_1_3)', ]
|
|
# which is flattened a few lines below join(rv)
|
|
rv.append('(%s)' % ','.join(values_keys))
|
|
else:
|
|
self.new_params[new_key] = val_item
|
|
rv.append("%%(%s)s"%new_key)
|
|
return ','.join(rv)
|
|
else:
|
|
# not something that can be expanded, so just drop the
|
|
# leading @ in the front of the match. This will mean that
|
|
# the single value we have, be it a string, int, whatever
|
|
# (other than dict) will correctly show up, eg:
|
|
#
|
|
# where foo in (@:foobar) -- foobar is a string, so we get
|
|
# where foo in (:foobar)
|
|
return match.group(0)[1:]
|