openmedialibrary/oml/queryparser.py

275 lines
9 KiB
Python
Raw Normal View History

2014-05-04 17:26:43 +00:00
# -*- coding: utf-8 -*-
from datetime import datetime
import unicodedata
2019-10-13 10:16:56 +00:00
import sqlalchemy.orm.exc
from sqlalchemy.sql import operators
2016-01-07 04:08:00 +00:00
from sqlalchemy.orm import load_only
2024-06-08 11:45:21 +00:00
from sqlalchemy.sql.expression import text, column
2014-05-04 17:26:43 +00:00
import utils
import settings
2019-01-15 08:38:42 +00:00
from fulltext import find_fulltext
2014-05-04 17:26:43 +00:00
2014-05-18 23:24:04 +00:00
import logging
2015-11-29 14:56:38 +00:00
logger = logging.getLogger(__name__)
2014-05-18 23:24:04 +00:00
2024-06-08 11:45:21 +00:00
2014-05-04 17:26:43 +00:00
def get_operator(op, type='str'):
return {
'str': {
2014-05-26 16:02:41 +00:00
'==': operators.eq,
2014-05-04 17:26:43 +00:00
'>': operators.gt,
'>=': operators.ge,
'<': operators.lt,
'<=': operators.le,
'^': operators.startswith_op,
'$': operators.endswith_op,
'&': operators.in_op,
2014-05-04 17:26:43 +00:00
},
2019-01-15 08:38:42 +00:00
'int': {
2014-05-04 17:26:43 +00:00
'==': operators.eq,
'>': operators.gt,
'>=': operators.ge,
'<': operators.lt,
'<=': operators.le,
}
}[type].get(op, {
'str': operators.contains_op,
'int': operators.eq
}[type])
class Parser(object):
2016-02-11 18:23:32 +00:00
def __init__(self, model, user_items, list_items, find, sort):
2014-05-04 17:26:43 +00:00
self._model = model
self._find = find
self._sort = sort
2014-05-04 17:26:43 +00:00
self._user = model.users.mapper.class_
self._user_items = user_items
2016-02-11 18:23:32 +00:00
self._list_items = list_items
2014-05-04 17:26:43 +00:00
self._list = model.lists.mapper.class_
self.item_keys = model.item_keys
self.filter_keys = model.filter_keys
def parse_condition(self, condition):
'''
condition: {
value: "war"
}
or
condition: {
key: "year",
value: [1970, 1980],
operator: "="
}
...
'''
2014-05-19 01:36:37 +00:00
#logger.debug('parse_condition %s', condition)
2019-01-15 08:38:42 +00:00
if 'value' not in condition:
2014-05-18 23:24:04 +00:00
return None
2014-05-04 17:26:43 +00:00
k = condition.get('key', '*')
if not k:
k = '*'
v = condition['value']
op = condition.get('operator')
if not op:
op = '='
if op.startswith('!'):
op = op[1:]
exclude = True
else:
exclude = False
key_type = (utils.get_by_id(self.item_keys, k) or {'type': 'string'}).get('type')
if isinstance(key_type, list):
key_type = key_type[0]
if k == 'list':
key_type = ''
if (not exclude and op == '=' or op in ('$', '^')) and v == '':
return None
elif k == 'resolution':
q = self.parse_condition({'key': 'width', 'value': v[0], 'operator': op}) \
& self.parse_condition({'key': 'height', 'value': v[1], 'operator': op})
if exclude:
q = ~q
return q
elif isinstance(v, list) and len(v) == 2 and op == '=':
q = self.parse_condition({'key': k, 'value': v[0], 'operator': '>='}) \
& self.parse_condition({'key': k, 'value': v[1], 'operator': '<'})
if exclude:
q = ~q
return q
elif key_type == 'boolean':
v = str(v).lower()
v = v == 'true'
vk = getattr(self._sort, k)
q = operators.eq(vk, v)
ids = self._model.query.join(self._sort).filter(q).options(load_only('id'))
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
elif k == 'id':
if op == '&':
ids = v
else:
ids = [v]
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
2019-01-15 08:38:42 +00:00
elif k == 'fulltext':
ids = find_fulltext(v)
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
elif k in ('notes', 'quotes'):
from annotation.models import Annotation
if isinstance(v, str):
v = unicodedata.normalize('NFKD', v).lower()
ids = set()
if k == 'notes':
qs = Annotation.query.filter(get_operator('=')(Annotation.findnotes, v))
elif k == 'quotes':
qs = Annotation.query.filter(get_operator('=')(Annotation.findquotes, v))
for a in qs:
ids.add(a.item_id)
return self.in_ids(ids, exclude)
2014-05-04 17:26:43 +00:00
elif key_type in ("string", "text"):
2014-09-02 22:32:44 +00:00
if isinstance(v, str):
2014-05-04 17:26:43 +00:00
v = unicodedata.normalize('NFKD', v).lower()
2014-05-26 16:02:41 +00:00
q = get_operator(op)(self._find.findvalue, v)
2014-05-04 17:26:43 +00:00
if k != '*':
q &= (self._find.key == k)
2024-06-08 11:45:21 +00:00
ids = self._find.query.filter(q).with_entities(column('item_id'))
ids = [i[0] for i in ids]
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
2014-05-04 17:26:43 +00:00
elif k == 'list':
2014-05-18 23:24:04 +00:00
nickname, name = v.split(':', 1)
2014-05-04 17:26:43 +00:00
if nickname:
2019-02-02 07:13:37 +00:00
try:
u = self._user.query.filter_by(nickname=nickname, peered=True).one()
2019-02-02 07:13:37 +00:00
except sqlalchemy.orm.exc.NoResultFound:
ids = []
return self.in_ids(ids, exclude)
2014-05-04 17:26:43 +00:00
else:
u = self._user.query.filter_by(id=settings.USER_ID).one()
2016-02-11 18:23:32 +00:00
if not name:
ids = self._user_items.select().where(
self._user_items.columns['user_id'].is_(u.id)
).with_only_columns(
2016-02-22 12:30:55 +00:00
[text('item_id')]
2016-02-11 18:23:32 +00:00
)
elif nickname:
2016-02-11 15:55:16 +00:00
peer = utils.get_peer(u.id)
2016-02-11 18:23:32 +00:00
ids = peer.info['lists'].get(name, [])
2016-02-11 15:55:16 +00:00
else:
2016-02-22 13:27:15 +00:00
l = self._list.query.filter_by(user_id=u.id, name=name).first()
if not l:
q = operators.eq(self._model.id, '')
if exclude:
q = ~q
return q
2016-02-11 18:23:32 +00:00
if l.type == 'static':
ids = self._list_items.select().where(
self._list_items.columns['list_id'].is_(l.id)
).with_only_columns(
2016-02-22 12:30:55 +00:00
[text('item_id')]
2016-02-11 18:23:32 +00:00
)
else:
ids = l.get_items().options(load_only('id'))
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
2014-05-04 17:26:43 +00:00
elif key_type == 'date':
def parse_date(d):
while len(d) < 3:
d.append(1)
return datetime(*[int(i) for i in d])
#using sort here since find only contains strings
v = parse_date(v.split('-'))
2014-05-27 18:10:55 +00:00
vk = getattr(self._sort, k)
2014-05-04 17:26:43 +00:00
q = get_operator(op, 'int')(vk, v)
2016-01-07 06:55:39 +00:00
ids = self._model.query.join(self._find).filter(q).options(load_only('id'))
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
2014-05-04 17:26:43 +00:00
else: #integer, float, time
2014-05-27 18:10:55 +00:00
q = get_operator(op, 'int')(getattr(self._sort, k), v)
2016-01-07 06:55:39 +00:00
ids = self._model.query.join(self._find).filter(q).options(load_only('id'))
2019-01-19 08:52:08 +00:00
return self.in_ids(ids, exclude)
def in_ids(self, ids, exclude):
if isinstance(ids, list) and not ids:
q = operators.eq(self._model.id, '')
if exclude:
q = ~q
else:
2016-01-07 06:55:39 +00:00
in_op = operators.notin_op if exclude else operators.in_op
q = in_op(self._model.id, ids)
2019-01-19 08:52:08 +00:00
return q
2014-05-04 17:26:43 +00:00
def parse_conditions(self, conditions, operator):
'''
conditions: [
{
value: "war"
}
{
key: "year",
value: "1970-1980,
operator: "!="
},
{
key: "country",
value: "f",
operator: "^"
}
],
operator: "&"
'''
conn = []
for condition in conditions:
if 'conditions' in condition:
q = self.parse_conditions(condition['conditions'],
2024-06-08 11:45:21 +00:00
condition.get('operator', '&'))
2014-05-04 17:26:43 +00:00
else:
q = self.parse_condition(condition)
if isinstance(q, list):
conn += q
else:
conn.append(q)
conn = [q for q in conn if not isinstance(q, None.__class__)]
if conn:
if operator == '|':
q = conn[0]
for c in conn[1:]:
q = q | c
q = [q]
else:
q = conn
return q
return []
def find(self, data):
'''
query: {
conditions: [
{
value: "war"
}
{
key: "year",
value: "1970-1980,
operator: "!="
},
{
key: "country",
value: "f",
operator: "^"
}
],
operator: "&"
}
'''
#join query with operator
qs = self._model.query
#only include items that have hard metadata
conditions = self.parse_conditions(data.get('query', {}).get('conditions', []),
2024-06-08 11:45:21 +00:00
data.get('query', {}).get('operator', '&'))
2014-05-04 17:26:43 +00:00
for c in conditions:
2014-05-26 23:45:29 +00:00
qs = qs.filter(c)
2014-05-04 17:26:43 +00:00
return qs