openmedialibrary/oml/item/models.py
2016-01-19 10:51:27 +05:30

816 lines
27 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from datetime import datetime
import base64
import hashlib
import os
import re
import shutil
import stat
import unicodedata
import ox
from sqlalchemy.schema import CreateTable
import sqlalchemy as sa
from changelog import Changelog
from db import MutableDict
import json_pickler
from .icons import icons
from .person import get_sort_name, Person
from queryparser import Parser
from settings import config
from utils import remove_empty_folders, get_ratio
from websocket import trigger_event
import db
import media
#import metaremote as meta
import meta
import settings
import state
import utils
import logging
logger = logging.getLogger(__name__)
user_items = sa.Table('useritem', db.metadata,
sa.Column('user_id', sa.String(43), sa.ForeignKey('user.id')),
sa.Column('item_id', sa.String(32), sa.ForeignKey('item.id'))
)
class Item(db.Model):
__tablename__ = 'item'
created = sa.Column(sa.DateTime())
modified = sa.Column(sa.DateTime())
id = sa.Column(sa.String(32), primary_key=True)
info = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
meta = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
# why is this in db and not in i.e. info?
added = sa.Column(sa.DateTime()) # added to local library
accessed = sa.Column(sa.DateTime())
timesaccessed = sa.Column(sa.Integer())
users = sa.orm.relationship('User', secondary=user_items,
backref=sa.orm.backref('items', lazy='dynamic'))
@property
def timestamp(self):
return utils.datetime2ts(self.modified)
def __repr__(self):
return self.id
def __init__(self, id):
if isinstance(id, list):
id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
self.id = id
self.created = datetime.utcnow()
self.modified = datetime.utcnow()
self.info = {}
self.meta = {}
@classmethod
def get(cls, id):
if isinstance(id, list):
id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
return cls.query.filter_by(id=id).first()
@classmethod
def get_or_create(cls, id, info=None):
if isinstance(id, list):
id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
item = cls.query.filter_by(id=id).first()
if not item:
item = cls(id=id)
if info:
item.info = info
state.db.session.add(item)
state.db.session.commit()
return item
@classmethod
def find(cls, data):
return Parser(cls, user_items, Find, Sort).find(data)
def add_user(self, user):
if not user in self.users:
self.users.append(user)
l = user.library
if not self in l.items:
l.items.append(self)
state.db.session.add(l)
def json(self, keys=None):
j = {}
j['id'] = self.id
j['created'] = self.created
j['modified'] = self.modified
j['timesaccessed'] = self.timesaccessed
j['accessed'] = self.accessed
j['added'] = self.added
t = Transfer.get(self.id)
if t:
j['transferadded'] = t.added
j['transferprogress'] = t.progress
# unused and slow
#j['users'] = list(map(str, list(self.users)))
if self.info:
meta_keys = [k for k in self.meta_keys if k != 'pages']
for key in self.info:
if (not keys or key in keys) and key not in meta_keys:
j[key] = self.info[key]
if self.meta:
j.update(self.meta)
for key in self.id_keys:
if key not in self.meta and key in j:
del j[key]
if keys:
for k in list(j):
if k not in keys:
del j[k]
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
if key in j and not isinstance(j[key], list):
j[key] = [j[key]]
return j
def get_path(self):
f = self.files.first()
return f.fullpath() if f else None
def update_sort(self):
update = False
s = Sort.get_or_create(self.id)
for key in config['itemKeys']:
if key.get('sort'):
value = self.json().get(key['id'], None)
sort_type = key.get('sortType', key['type'])
if value:
if sort_type == 'integer':
if isinstance(value, str):
value = int(re.sub('[^0-9]', '', value))
else:
value = int(value)
elif sort_type == 'float':
value = float(value)
elif sort_type == 'date':
pass
elif sort_type == 'person':
if not isinstance(value, list):
value = [value]
value = list(map(get_sort_name, value))
value = ox.sort_string('\n'.join(value)).lower()
elif sort_type == 'title':
if isinstance(value, dict):
value = list(value.values())
if isinstance(value, list):
value = ''.join(value)
value = ox.get_sort_title(value)
value = utils.sort_title(value).lower()
else:
if isinstance(value, list):
value = '\n'.join(value)
if value:
value = str(value)
value = ox.sort_string(value).lower()
elif isinstance(value, list): #empty list
value = None
if getattr(s, key['id']) != value:
setattr(s, key['id'], value)
update = True
if update:
state.db.session.add(s)
def update_find(self):
def add(k, v):
f = Find.query.filter_by(item_id=self.id, key=k, value=v).first()
if not f:
f = Find(item_id=self.id, key=k)
if f.value != v:
f.findvalue = unicodedata.normalize('NFKD', v).lower()
f.value = v
state.db.session.add(f)
keys = []
for key in config['itemKeys']:
if key.get('find') or key.get('filter') or key.get('type') in [['string'], 'string']:
value = self.json().get(key['id'], None)
if key.get('filterMap') and value:
value = re.compile(key.get('filterMap')).findall(value)
if value: value = value[0]
if value:
keys.append(key['id'])
if isinstance(value, dict):
value = ' '.join(list(value.values()))
if not isinstance(value, list):
value = [value]
value = [
v.decode('utf-8') if isinstance(v, bytes) else v
for v in value
]
for v in value:
add(key['id'], v)
for f in Find.query.filter_by(item_id=self.id,
key=key['id']).filter(Find.value.notin_(value)):
state.db.session.delete(f)
for f in Find.query.filter_by(item_id=self.id).filter(Find.key.notin_(keys)):
state.db.session.delete(f)
def update(self, modified=None):
for key in ('mediastate', 'coverRatio', 'previewRatio'):
if key in self.meta:
if key not in self.info:
self.info[key] = self.meta[key]
del self.meta[key]
users = list(map(str, list(self.users)))
self.info['mediastate'] = 'available' # available, unavailable, transferring
t = Transfer.get(self.id)
if t and t.added and t.progress < 1:
self.info['mediastate'] = 'transferring'
else:
self.info['mediastate'] = 'available' if settings.USER_ID in users else 'unavailable'
if modified:
self.modified = modified
else:
self.modified = datetime.utcnow()
self.update_sort()
self.update_find()
self.save()
def save(self):
state.db.session.add(self)
state.db.session.commit()
def delete(self, commit=True):
Sort.query.filter_by(item_id=self.id).delete()
Transfer.query.filter_by(item_id=self.id).delete()
Scrape.query.filter_by(item_id=self.id).delete()
state.db.session.delete(self)
icons.clear('cover:%s' % self.id)
icons.clear('preview:%s' % self.id)
if commit:
state.db.session.commit()
meta_keys = (
'author',
'categories',
'cover',
'date',
'description',
'edition',
'isbn',
'language',
'pages',
'place',
'publisher',
'series',
'tableofcontents',
'title'
)
def update_meta(self, data, modified=None, reset_from=False):
update = False
record = {}
for key in self.meta_keys:
if key in data:
if self.meta.get(key) != data[key]:
record[key] = data[key]
self.meta[key] = data[key]
update = True
for key in list(self.meta):
if key not in self.meta_keys:
del self.meta[key]
update = True
if reset_from and '_from' in self.info:
del self.info['_from']
update = True
if update:
self.update(modified)
self.save()
if 'cover' in record:
self.update_icons()
user = state.user()
if record and user in self.users:
Changelog.record_ts(user, modified, 'edititem', self.id, record)
def edit(self, data, modified=None, reset_from=False):
Scrape.query.filter_by(item_id=self.id).delete()
self.update_meta(data, modified, reset_from=reset_from)
for f in self.files.all():
f.move()
def extract_preview(self):
path = self.get_path()
if path:
return getattr(media, self.info['extension']).cover(path)
def update_cover(self):
key = 'cover:%s'%self.id
cover = None
if 'cover' in self.meta and self.meta['cover']:
try:
cover = ox.cache.read_url(self.meta['cover'])
except:
logger.debug('unable to read cover url %s', self.meta['cover'])
cover = None
if cover:
icons[key] = cover
self.info['coverRatio'] = get_ratio(cover)
else:
del icons[key]
if not cover:
if 'previewRatio' in self.info:
self.info['coverRatio'] = self.info['previewRatio']
elif 'coverRatio' in self.info:
del self.info['coverRatio']
icons.clear('cover:%s:' % self.id)
def update_preview(self):
key = 'preview:%s'%self.id
preview = self.extract_preview()
if preview:
icons[key] = preview
self.info['previewRatio'] = get_ratio(preview)
if not 'coverRatio' in self.info:
self.info['coverRatio'] = self.info['previewRatio']
else:
del icons[key]
if 'previewRatio' in self.info:
del self.info['previewRatio']
if not preview:
if 'coverRatio' in self.info:
self.info['previewRatio'] = self.info['coverRatio']
elif 'previewRatio' in self.info:
del self.info['previewRatio']
icons.clear('preview:%s:' % self.id)
def update_icons(self):
self.update_cover()
self.update_preview()
def load_metadata(self):
'''
load metadata from user_metadata or get via isbn?
'''
for key in self.meta_keys:
if key in self.info:
if key not in self.meta:
self.meta[key] = self.info[key]
if key != 'pages':
del self.info[key]
#FIXME get from user_meta
if state.online:
if 'isbn' in self.meta:
data = meta.lookup('isbn', self.meta['isbn'])
if data:
self.meta.update(data)
def queue_download(self):
u = state.user()
if not u in self.users:
t = Transfer.get_or_create(self.id)
if not t.added:
t.added = datetime.utcnow()
t.progress = 0
t.save()
logger.debug('queue %s for download', self.id)
self.add_user(u)
def save_file(self, content):
u = state.user()
f = File.get(self.id)
content_id = media.get_id(data=content)
if content_id != self.id:
logger.debug('INVALID CONTENT %s vs %s', self.id, content_id)
return False
if not f:
path = 'Downloads/%s.%s' % (self.id, self.info['extension'])
info = self.info.copy()
for key in ('mediastate', 'coverRatio', 'previewRatio'):
if key in info:
del info[key]
f = File.get_or_create(self.id, info, path=path)
path = self.get_path()
if not os.path.exists(path):
ox.makedirs(os.path.dirname(path))
with open(path, 'wb') as fd:
fd.write(content)
f.info = media.metadata(path)
f.save()
for key in ('tableofcontents', ):
if key not in self.meta and key in f.info:
self.meta[key] = f.info[key]
if u not in self.users:
self.add_user(u)
t = Transfer.get_or_create(self.id)
t.progress = 1
t.save()
self.added = datetime.utcnow()
Changelog.record(u, 'additem', self.id, f.info)
Changelog.record(u, 'edititem', self.id, self.meta)
self.update()
f.move()
self.update_icons()
self.save()
trigger_event('transfer', {
'id': self.id, 'progress': 1
})
return True
else:
logger.debug('TRIED TO SAVE EXISTING FILE!!!')
t = Transfer.get_or_create(self.id)
t.progress = 1
t.save()
self.update()
return False
def remove_file(self):
for f in self.files.all():
path = f.fullpath()
if os.path.exists(path):
os.unlink(path)
remove_empty_folders(os.path.dirname(path))
state.db.session.delete(f)
user = state.user()
if user in self.users:
self.users.remove(user)
for l in self.lists.filter_by(user_id=user.id):
l.items.remove(self)
if not self.users:
self.delete()
else:
self.added = None
self.update()
Transfer.query.filter_by(item_id=self.id).delete()
Changelog.record(user, 'removeitem', self.id)
class Sort(db.Model):
__tablename__ = 'sort'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('sort', lazy='dynamic'))
def __repr__(self):
return '%s_sort' % self.item_id
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
f = cls.get(item_id)
if not f:
f = cls(item_id=item_id)
state.db.session.add(f)
state.db.session.commit()
return f
Item.sort_keys = []
for key in config['itemKeys']:
if key.get('sort'):
sort_type = key.get('sortType', key['type'])
if sort_type == 'integer':
col = sa.Column(sa.BigInteger(), index=True)
elif sort_type == 'float':
col = sa.Column(sa.Float(), index=True)
elif sort_type == 'date':
col = sa.Column(sa.DateTime(), index=True)
else:
col = sa.Column(sa.String(1000), index=True)
setattr(Sort, '%s' % key['id'], col)
Item.sort_keys.append(key['id'])
Item.id_keys = ['isbn', 'lccn', 'olid', 'oclc', 'asin']
Item.item_keys = config['itemKeys']
Item.filter_keys = [k['id'] for k in config['itemKeys'] if k.get('filter')]
class Find(db.Model):
__tablename__ = 'find'
id = sa.Column(sa.Integer(), primary_key=True)
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'))
item = sa.orm.relationship('Item', backref=sa.orm.backref('find_', lazy='dynamic'))
key = sa.Column(sa.String(200), index=True)
value = sa.Column(sa.Text())
findvalue = sa.Column(sa.Text(), index=True)
def __repr__(self):
return '%s=%s' % (self.key, self.findvalue)
@classmethod
def get(cls, item, key):
return cls.query.filter_by(item_id=item, key=key).first()
@classmethod
def get_or_create(cls, item, key):
f = cls.get(item, key)
if not f:
f = cls(item_id=item, key=key)
state.db.session.add(f)
state.db.session.commit()
return f
class File(db.Model):
__tablename__ = 'file'
created = sa.Column(sa.DateTime())
modified = sa.Column(sa.DateTime())
sha1 = sa.Column(sa.String(32), primary_key=True)
path = sa.Column(sa.String(2048))
info = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'))
item = sa.orm.relationship('Item', backref=sa.orm.backref('files', lazy='dynamic'))
@classmethod
def get(cls, sha1):
return cls.query.filter_by(sha1=sha1).first()
@classmethod
def get_or_create(cls, sha1, info=None, path=None):
f = cls.get(sha1)
if not f:
f = cls(sha1=sha1)
if info:
f.info = info
if path:
f.path = path
f.item_id = Item.get_or_create(id=sha1, info=info).id
state.db.session.add(f)
state.db.session.commit()
return f
def __repr__(self):
return self.sha1
def __init__(self, sha1):
self.sha1 = sha1
self.created = datetime.utcnow()
self.modified = datetime.utcnow()
def fullpath(self):
prefs = settings.preferences
prefix = os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books/')
return os.path.join(prefix, self.path)
def make_readonly(self):
current_path = self.fullpath()
if os.path.exists(current_path):
mode = os.stat(current_path)[stat.ST_MODE]
readonly = mode & ~stat.S_IWUSR & ~stat.S_IWGRP & ~stat.S_IWOTH
if mode != readonly:
os.chmod(current_path, readonly)
def move(self):
def format_underscores(string):
return re.sub('^\.|\.$|:|/|\?|<|>', '_', string)
prefs = settings.preferences
prefix = os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books/')
j = self.item.json()
current_path = self.fullpath()
if not os.path.exists(current_path):
logger.debug('file is missing. %s', current_path)
return
self.make_readonly()
author = '; '.join([get_sort_name(a) for a in j.get('author', [])])
if not author:
author = 'Unknown Author'
if ' (Ed.)' in author:
author = author.replace(' (Ed.)', '') + ' (Ed.)'
title = j.get('title', 'Untitled')
extension = j['extension']
if len(title) > 100:
title = title[:100]
title = format_underscores(title)
author = format_underscores(author)
publisher = j.get('publisher')
if publisher:
extra = ', '.join(publisher)
else:
extra = ''
date = j.get('date')
if date and len(date) >= 4:
extra += ' ' + date[:4]
if extra:
extra = format_underscores(extra)
title = '%s (%s)' % (title, extra.strip())
filename = '%s.%s' % (title, extension)
first = unicodedata.normalize('NFD', author[0].upper())[0].upper()
new_path = os.path.join(first, author, filename)
new_path = new_path.replace('\x00', '')
new_path = ox.decode_html(new_path)
if self.path == new_path:
return
h = ''
while os.path.exists(os.path.join(prefix, new_path)):
h = self.sha1[:len(h)+1]
filename = '%s.%s.%s' % (title, h, extension)
first = unicodedata.normalize('NFD', author[0].upper())[0].upper()
new_path = os.path.join(first, author, filename)
if current_path == os.path.join(prefix, new_path):
break
if self.path != new_path:
path = os.path.join(prefix, new_path)
ox.makedirs(os.path.dirname(path))
shutil.move(current_path, path)
self.path = new_path
self.save()
for folder in set(os.path.dirname(p) for p in [current_path, path]):
remove_empty_folders(folder)
def save(self):
state.db.session.add(self)
state.db.session.commit()
class Scrape(db.Model):
__tablename__ = 'scrape'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic'))
added = sa.Column(sa.DateTime())
def __repr__(self):
return '='.join(map(str, [self.item_id, self.added]))
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
t = cls.get(item_id)
if not t:
t = cls(item_id=item_id)
t.added = datetime.utcnow()
t.save()
return t
def save(self):
state.db.session.add(self)
state.db.session.commit()
def remove(self):
state.db.session.delete(self)
state.db.session.commit()
class Transfer(db.Model):
__tablename__ = 'transfer'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('transfer', lazy='dynamic'))
added = sa.Column(sa.DateTime())
progress = sa.Column(sa.Float())
def __repr__(self):
return '='.join(map(str, [self.item_id, self.progress]))
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
t = cls.get(item_id)
if not t:
t = cls(item_id=item_id)
t.added = datetime.utcnow()
t.progress = 0
t.save()
return t
def save(self):
state.db.session.add(self)
state.db.session.commit()
class Metadata(db.Model):
__tablename__ = 'metadata'
created = sa.Column(sa.DateTime())
modified = sa.Column(sa.DateTime())
id = sa.Column(sa.Integer(), primary_key=True)
key = sa.Column(sa.String(256))
value = sa.Column(sa.String(256))
data = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
def __repr__(self):
return '='.join([self.key, self.value])
@property
def timestamp(self):
return utils.datetime2ts(self.modified)
@classmethod
def get(cls, key, value):
return cls.query.filter_by(key=key, value=value).first()
@classmethod
def get_or_create(cls, key, value):
m = cls.get(key, value)
if not m:
m = cls(key=key, value=value)
m.created = datetime.utcnow()
m.data = {}
m.save()
return m
def save(self):
self.modified = datetime.utcnow()
state.db.session.add(self)
state.db.session.commit()
def reset(self):
user = state.user()
Changelog.record(user, 'resetmeta', self.key, self.value)
state.db.session.delete(self)
state.db.session.commit()
self.update_items()
def edit(self, data, record=True):
changed = {}
for key in data:
if key == 'id':
continue
if data[key] != self.data.get(key):
self.data[key] = data[key]
changed[key] = data[key]
if changed:
self.save()
if record:
user = state.user()
Changelog.record(user, 'editmeta', self.key, self.value, changed)
return changed
def update_items(self):
for f in Find.query.filter_by(key=self.key, value=self.value):
if f.item:
f.item.update()
@classmethod
def load(self, key, value):
m = self.get(key, value)
if m:
if 'id' in m.data:
del m.data['id']
return m.data
return {}
def remove_unused_names():
used = list(set(
get_sort_name(a)
for i in Item.query
for a in i.meta.get('author', [])
))
for p in Person.query.filter(Person.sortname.notin_(used)):
state.db.session.delete(p)
state.db.session.commit()
def update_sort_table():
current = db.get_table_columns('sort')
drop_columns = list(set(current) - set(Item.sort_keys+['item_id']))
if drop_columns:
db.drop_columns('sort', drop_columns)
add_columns = list(set(Item.sort_keys)-set(current+['item_id']))
if add_columns:
create_table = str(CreateTable(Sort.__table__).compile(db.engine)).split('\n')
sql = []
for col in add_columns:
add = [r for r in create_table if '\t%s ' % col in r][0].strip()[:-1]
sql.append('ALTER TABLE sort ADD '+add)
sql.append('CREATE INDEX ix_sort_{col} ON sort ({col})'.format(col=col))
with db.session() as s:
for q in sql:
s.connection().execute(q)
s.commit()
layout = db.get_layout()
sort_indexes = [i[len('ix_sort_'):] for i in layout['indexes'] if i.startswith('ix_sort_')]
sql = []
for col in set(Item.sort_keys)-set(sort_indexes):
sql.append('CREATE INDEX ix_sort_{col} ON sort ({col})'.format(col=col))
if sql:
with db.session() as s:
for q in sql:
s.connection().execute(q)
s.commit()