From aa509ddc8574e0a5220c94a2b72f4d5a798d6e8a Mon Sep 17 00:00:00 2001 From: Javier Sancho Date: Wed, 12 Feb 2014 16:01:29 +0100 Subject: [PATCH] New database scheme, storing key name inside tables for improving searching * dbutils.py: Classes for encapsulating database concepts as tables, fields, constraints, etc, easing its translation to SQL syntax * connection.py: Encloses new dbutils classes * collection.py: Changes at inserting documents, storing key name too * cursor.py: Key name in searching, using new dbutils classes * MySQL.py: Implementation of SQL methods for query translations --- MySQL.py | 90 +++++++++++++++++++++++++++++++++++---------------- collection.py | 27 ++++++++++++---- connection.py | 12 +++++-- cursor.py | 76 ++++++++++++++++++++++++++----------------- dbutils.py | 53 ++++++++++++++++++++++++++++++ 5 files changed, 191 insertions(+), 67 deletions(-) create mode 100644 dbutils.py diff --git a/MySQL.py b/MySQL.py index f5f8888..e983a67 100644 --- a/MySQL.py +++ b/MySQL.py @@ -19,6 +19,7 @@ # ############################################################################## +import dbutils import connection import MySQLdb @@ -28,7 +29,61 @@ SQL_FIELD_TYPES = { 'float': 'DOUBLE', } + +class Query(dbutils.Query): + def sql(self): + res = "SELECT " + res += ",".join(["(%s)" % f.sql() for f in self.fields]) + + res += " FROM " + res += ",".join([t.sql() for t in self.tables]) + + if self.constraints: + res += " WHERE " + res += " AND ".join(["(%s)" % c.sql() for c in self.constraints]) + + return res + + +class Field(dbutils.Field): + def sql(self): + return "%s.`%s`" % (self.table.sql(), self.field_name) + + +class Table(dbutils.Table): + def sql(self): + return "`%s`.`%s`" % (self.db_name, self.table_name) + + +class Constraint(dbutils.Constraint): + def sql(self): + operator = self.operator.strip().lower() + if operator == "starts": + return "(%s) LIKE (%s)" % (self.args[0].sql(), self.args[1].sql()[:-1] + "%'") + elif operator == "in": + return "(%s) IN (%s)" % (self.args[0].sql(), ",".join(["(%s)" % a.sql() for a in self.args[1:]])) + elif operator == "=": + return "(%s) = (%s)" % (self.args[0].sql(), self.args[1].sql()) + else: + token = " %s " % operator.upper() + return token.join(["(%s)" % a.sql() for a in self.args]) + + +class Literal(dbutils.Literal): + def sql(self): + if type(self.value) in (int, float): + return "%s" % self.value + else: + return "'%s'" % str(self.value).replace("'", "''") + + class Connection(connection.Connection): + Query = Query + Field = Field + Table = Table + Constraint = Constraint + Literal = Literal + def __init__(self, *args, **kwargs): self._db_con = MySQLdb.connect(*args, **kwargs) self._db_con_autocommit = MySQLdb.connect(*args, **kwargs) @@ -68,6 +123,7 @@ class Connection(connection.Connection): return SQL_FIELD_TYPES.get(field_type, "UNKNOW") def _create_table(self, db_name, table_name, fields): + primary = [] sql = "CREATE TABLE `%s`.`%s` (" % (db_name, table_name) sql_fields = [] @@ -76,44 +132,22 @@ class Connection(connection.Connection): if f.get('size'): sql_field += "(%s)" % f['size'] if f.get('primary'): - sql_field += " PRIMARY KEY" + primary.append(f['name']) if 'null' in f and not f['null']: sql_field += " NOT NULL" sql_fields.append(sql_field) sql += ",".join(sql_fields) + if primary: + sql += ", PRIMARY KEY(%s)" % ",".join(primary) + sql += ")" return (self.execute(sql, db=self._db_con_autocommit) or False) and True - def _get_sql_field(self, db_name, field): - if type(field) is tuple: - return "`%s`.`%s`.`%s`" % (db_name, field[0], field[1]) - elif type(field) is dict: - return "(%s)" % self._get_sql_query(db_name, field) - else: - return "'%s'" % str(field).replace("'", "''") - - def _get_sql_query(self, db_name, query): - sql = "SELECT " - sql += ",".join([self._get_sql_field(db_name, x) for x in query['select']]) - - sql += " FROM " - sql += ",".join(query['from']) - - if query.get('where'): - sql += " WHERE " - where = [] - for cond in query['where']: - where.append("%s %s %s" % (self._get_sql_field(db_name, cond[0]), cond[1], self._get_sql_field(db_name, cond[2]))) - sql += " AND ".join(where) - - return sql - - def _get_cursor(self, db_name, query): + def _get_cursor(self, query): cur = self._db_con.cursor() - cur.execute("USE `%s`" % db_name) - cur.execute(self._get_sql_query(db_name, query)) + cur.execute(query.sql()) return cur def _next(self, cur): diff --git a/collection.py b/collection.py index 1dad488..3c93b8b 100644 --- a/collection.py +++ b/collection.py @@ -37,6 +37,7 @@ class Collection(object): def _create_field(self, field_name): fields = [ {'name': 'id', 'type': 'char', 'size': 512, 'primary': True}, + {'name': 'name', 'type': 'char', 'size': 64, 'primary': True}, {'name': 'value', 'type': 'text', 'null': False}, {'name': 'number', 'type': 'float'}, ] @@ -78,11 +79,23 @@ class Collection(object): for f in doc: if not f in fields: self._create_field(f) - values = { - 'id': doc_id, - 'value': msgpack.dumps(doc[f]), - } - if type(doc[f]) in (int, float): - values['number'] = doc[f] table_f = '%s$%s' % (self.table_name, f) - self.database.connection._insert(self.database.db_name, table_f, values) + self._insert_field(doc_id, table_f, f, doc[f]) + + def _insert_field(self, doc_id, field_table, field_name, field_value): + values = { + 'id': doc_id, + 'name': field_name, + 'value': msgpack.dumps(field_value), + } + if type(field_value) in (int, float): + values['number'] = field_value + + self.database.connection._insert(self.database.db_name, field_table, values) + + if type(field_value) in (list, tuple) and not '.' in field_name: + for i in xrange(len(field_value)): + self._insert_field(doc_id, field_table, "%s..%s" % (field_name, i), field_value[i]) + elif type(field_value) is dict: + for k, v in field_value.iteritems(): + self._insert_field(doc_id, field_table, "%s.%s" % (field_name, k), v) diff --git a/connection.py b/connection.py index 32cd8ae..add83e3 100644 --- a/connection.py +++ b/connection.py @@ -20,8 +20,15 @@ ############################################################################## from database import Database +import dbutils class Connection(object): + Query = dbutils.Query + Field = dbutils.Field + Table = dbutils.Table + Constraint = dbutils.Constraint + Literal = dbutils.Literal + def __init__(self, *args, **kwargs): self._db_con = None @@ -68,11 +75,10 @@ class Connection(object): # [{'name': 'id', 'type': 'char', 'size': 20, 'primary': True}] return None - def _get_cursor(self, db_name, query): - # {'select': [('t1$_id', 'id'), {'select': [('t1$c1', 'value')], 'from': ['t1$c1'], 'where': [(('t1$c1', 'id'), '=', ('t1$_id', 'id'))]}], 'from': ['t1$_id']} + def _get_cursor(self, query): return None - def _next(self, cursor): + def _next(self, db_name, cursor): return None def _insert(self, db_name, table_name, values): diff --git a/cursor.py b/cursor.py index e07f237..6246872 100644 --- a/cursor.py +++ b/cursor.py @@ -26,6 +26,12 @@ class Cursor(object): if spec and not type(spec) is dict: raise Exception("spec must be an instance of dict") + self.Query = collection.database.connection.Query + self.Field = collection.database.connection.Field + self.Table = collection.database.connection.Table + self.Constraint = collection.database.connection.Constraint + self.Literal = collection.database.connection.Literal + self.collection = collection self.spec = spec if self.collection.exists(): @@ -74,41 +80,53 @@ class Cursor(object): return res_fields def _get_cursor(self): - query = {} - table_id = '%s$_id' % self.collection.table_name + table_id = self.Table(self.collection.database.db_name, '%s$_id' % self.collection.table_name) - query['select'] = [(table_id, 'value')] + fields = [self.Field(table_id, 'value')] for f in filter(lambda x: x != '_id', self.fields): - table_f = '%s$%s' % (self.collection.table_name, f) - q = self._get_cursor_field(table_id, table_f) - query['select'].append(q) + fields.append(self._get_cursor_field(f)) - query['from'] = [table_id] + tables = [table_id] + constraints = [self.Constraint('=', self.Field(table_id, 'name'), self.Literal('_id'))] if self.spec: - query['where'] = [] for k, v in self.spec.iteritems(): - table_f = '%s$%s' % (self.collection.table_name, k) - if type(v) in (int, float): - field_name = 'number' - field_v = v - else: - field_name = 'value' - field_v = msgpack.dumps(v) - if k == '_id': - field_q = (table_id, field_name) - else: - field_q = self._get_cursor_field(table_id, table_f, field_name=field_name) - query['where'].append((field_q, '=', field_v)) - - return self.collection.database.connection._get_cursor(self.collection.database.db_name, query) - - def _get_cursor_field(self, table_id, table_field, field_name='value'): - return { - 'select': [(table_field, field_name)], - 'from': [table_field], - 'where': [((table_field, 'id'), '=', (table_id, 'id'))], - } + constraints.append(self._get_cursor_constraint(k, v)) + + query = self.Query(fields, tables, constraints) + return self.collection.database.connection._get_cursor(query) + + def _get_cursor_field(self, field_name): + table_id = self.Table(self.collection.database.db_name, '%s$_id' % self.collection.table_name) + table_field = self.Table(self.collection.database.db_name, '%s$%s' % (self.collection.table_name, field_name.split(".")[0])) + + fields = [self.Field(table_field, 'value')] + tables = [table_field] + constraints = [ + self.Constraint('=', self.Field(table_field, 'id'), self.Field(table_id, 'id')), + self.Constraint('=', self.Field(table_field, 'name'), self.Literal(field_name)), + ] + return self.Query(fields, tables, constraints) + + def _get_cursor_constraint(self, field_name, field_value): + table_id = self.Table(self.collection.database.db_name, '%s$_id' % self.collection.table_name) + table_field = self.Table(self.collection.database.db_name, '%s$%s' % (self.collection.table_name, field_name.split(".")[0])) + + if type(field_value) in (int, float): + field_type = 'number' + else: + field_type = 'value' + field_value = msgpack.dumps(field_value) + + fields = [self.Field(table_field, 'id')] + tables = [table_field] + constraints = [ + self.Constraint('or', self.Constraint('=', self.Field(table_field, 'name'), self.Literal(field_name)), + self.Constraint('starts', self.Field(table_field, 'name'), self.Literal('%s..' % field_name))), + self.Constraint('=', self.Field(table_field, field_type), self.Literal(field_value)), + ] + + return self.Constraint('in', self.Field(table_id, 'id'), self.Query(fields, tables, constraints)) def next(self): if self.cursor is None: diff --git a/dbutils.py b/dbutils.py new file mode 100644 index 0000000..2eb459e --- /dev/null +++ b/dbutils.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# mojo, a Python library for implementing document based databases +# Copyright (C) 2013-2014 by Javier Sancho Fernandez +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################## + +class SQLGeneric(object): + def sql(self): + return "" + + +class Query(SQLGeneric): + def __init__(self, fields, tables, constraints): + self.fields = fields + self.tables = tables + self.constraints = constraints + + +class Field(SQLGeneric): + def __init__(self, table, field_name): + self.table = table + self.field_name = field_name + + +class Table(SQLGeneric): + def __init__(self, db_name, table_name): + self.db_name = db_name + self.table_name = table_name + + +class Constraint(SQLGeneric): + def __init__(self, operator, *args): + self.operator = operator + self.args = args + +class Literal(SQLGeneric): + def __init__(self, value): + self.value = value -- 2.39.5