# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 from datetime import datetime import base64 import hashlib import os import re import shutil import stat import unicodedata import time import ox from sqlalchemy.orm import load_only from sqlalchemy.schema import CreateTable import sqlalchemy as sa from changelog import Changelog from db import MutableDict import json_pickler from .icons import icons from .person import get_sort_name, Person from queryparser import Parser from settings import config from utils import remove_empty_folders, get_ratio from websocket import trigger_event import db import media import meta import settings import state import utils import logging logger = logging.getLogger(__name__) user_items = sa.Table('useritem', db.metadata, sa.Column('user_id', sa.String(43), sa.ForeignKey('user.id')), sa.Column('item_id', sa.String(32), sa.ForeignKey('item.id'))) class Item(db.Model): __tablename__ = 'item' created = sa.Column(sa.DateTime()) modified = sa.Column(sa.DateTime()) id = sa.Column(sa.String(32), primary_key=True) info = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler))) meta = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler))) # why is this in db and not in i.e. info? added = sa.Column(sa.DateTime()) # added to local library accessed = sa.Column(sa.DateTime()) timesaccessed = sa.Column(sa.Integer()) users = sa.orm.relationship('User', secondary=user_items, backref=sa.orm.backref('items', lazy='dynamic')) @property def timestamp(self): return utils.datetime2ts(self.modified) def __repr__(self): return self.id def __init__(self, id): if isinstance(id, list): id = base64.b32encode(hashlib.sha1(''.join(id)).digest()) self.id = id self.created = datetime.utcnow() self.modified = datetime.utcnow() self.info = {} self.meta = {} @classmethod def get(cls, id): if isinstance(id, list): id = base64.b32encode(hashlib.sha1(''.join(id)).digest()) return cls.query.filter_by(id=id).first() @classmethod def get_or_create(cls, id, info=None): if isinstance(id, list): id = base64.b32encode(hashlib.sha1(''.join(id)).digest()) item = cls.query.filter_by(id=id).first() if not item: item = cls(id=id) if info: item.info = info state.db.session.add(item) state.db.session.commit() return item @classmethod def find(cls, data): from user.models import list_items return Parser(cls, user_items, list_items, Find, Sort).find(data) @classmethod def remove_many(cls, ids): Find.query.filter(Find.item_id.in_(ids)).delete(synchronize_session=False) Sort.query.filter(Sort.item_id.in_(ids)).delete(synchronize_session=False) cls.query.filter(cls.id.in_(ids)).delete(synchronize_session=False) state.db.session.expire_all() Sort.query.filter_by(item_id=None).delete() Find.query.filter_by(item_id=None).delete() @classmethod def remove_without_user(cls): q = user_items.select() owned_ids = {i['item_id'] for i in state.db.session.execute(q)} ids = {i.id for i in cls.query.options(load_only('id'))} remove = ids - owned_ids if remove: cls.remove_many(remove) def add_user(self, user): from user.models import list_items if user not in self.users: self.users.append(user) l = user.library if self not in l.items: q = list_items.insert({'item_id': self.id, 'list_id': l.id}) state.db.session.execute(q) def json(self, keys=None): j = {} j['id'] = self.id j['created'] = self.created j['modified'] = self.modified j['timesaccessed'] = self.timesaccessed j['accessed'] = self.accessed j['added'] = self.added if (not keys or 'transferadded' in keys or 'transferprogress' in keys) \ and state.downloads and not state.shutdown: t = state.downloads.transfers.get(self.id) if t: j['transferadded'] = t['added'] j['transferprogress'] = t['progress'] # unused and slow # j['users'] = list(map(str, list(self.users))) if self.info: meta_keys = [k for k in self.meta_keys if k != 'pages'] for key in self.info: if (not keys or key in keys) and key not in meta_keys: j[key] = self.info[key] if self.meta: for key in self.meta: if not keys or key in keys: j[key] = self.meta[key] for key in self.id_keys: if key not in self.meta and key in j: del j[key] if keys: for k in list(j): if k not in keys: del j[k] for key in self.array_keys: if key in j and not isinstance(j[key], list): j[key] = [j[key]] if keys is None or 'sharemetadata' in keys: j['sharemetadata'] = j.get('sharemetadata', False) if not j['sharemetadata']: j['sharemetadata'] = False return j def get_path(self): f = self.files.first() return f.fullpath() if f else None def update_sort(self, commit=True): update = False s = Sort.get_or_create(self.id, commit=commit) for key in config['itemKeys']: if key.get('sort'): value = self.json().get(key['id'], None) sort_type = key.get('sortType', key['type']) if value: if sort_type == 'integer': if isinstance(value, str): value = int(re.sub('[^0-9]', '', value)) else: value = int(value) elif sort_type == 'float': value = float(value) elif sort_type == 'date': pass elif sort_type == 'person': if not isinstance(value, list): value = [value] value = [get_sort_name(v, commit=commit) for v in value] value = ox.sort_string('\n'.join(value)).lower() elif sort_type == 'title': value = self.get_sorttitle().lower() value = utils.sort_title(value) elif sort_type == 'boolean': pass else: if isinstance(value, list): value = '\n'.join(value) if value: value = str(value) value = ox.sort_string(value).lower() elif isinstance(value, list): # empty list value = None if not value and sort_type != 'boolean': value = None if getattr(s, key['id']) != value: setattr(s, key['id'], value) update = True if update: state.db.session.add(s) def update_find(self, commit=True): current_values = {} for f in Find.query.filter_by(item_id=self.id): if f.key not in current_values: current_values[f.key] = set() current_values[f.key].add(f.value) def add(k, v): if k in current_values and v in current_values[k]: f = Find.query.filter_by(item_id=self.id, key=k, value=v).first() else: f = Find(item_id=self.id, key=k) if f.value != v: f.findvalue = unicodedata.normalize('NFKD', v).lower() f.value = v if k in self.filter_keys: sort_type = utils.get_by_id(settings.config['itemKeys'], k).get('sortType') if sort_type == 'person': f.sortvalue = get_sort_name(f.value, commit=commit) else: f.sortvalue = f.value if f.sortvalue: f.sortvalue = ox.sort_string(unicodedata.normalize('NFKD', f.sortvalue)).lower() else: f.sortvalue = None state.db.session.add(f) keys = [] for key in config['itemKeys']: if key.get('find') or \ key.get('filter') or key.get('type') in [['string'], 'string'] or \ (key.get('type') == 'boolean' and key.get('sort')): value = self.json().get(key['id'], None) if key.get('filterMap') and value: value = re.compile(key.get('filterMap')).findall(value) if value: value = value[0] if key.get('type') == 'boolean': value = True if value else False value = str(value).lower() if value: keys.append(key['id']) if isinstance(value, dict): value = ' '.join(list(value.values())) if not isinstance(value, list): value = [value] value = [ v.decode('utf-8') if isinstance(v, bytes) else v for v in value ] for v in value: add(key['id'], v) if key['id'] in current_values: removed_values = current_values[key['id']] - set(value) if removed_values: for f in Find.query.filter_by(item_id=self.id, key=key['id']).filter(Find.value.in_(removed_values)): state.db.session.delete(f) removed_keys = set(current_values) - set(keys) if removed_keys: for f in Find.query.filter_by(item_id=self.id).filter(Find.key.in_(removed_keys)): state.db.session.delete(f) def update_mediastate(self): # available, unavailable, transferring if state.downloads: t = state.downloads.transfers.get(self.id) if t and t.get('added') and t.get('progress', 0) < 1: self.info['mediastate'] = 'transferring' else: self.info['mediastate'] = 'available' if self.files.count() else 'unavailable' else: self.info['mediastate'] = 'available' if self.files.count() else 'unavailable' def update(self, modified=None, commit=True): self.update_mediastate() if modified: self.modified = modified else: self.modified = datetime.utcnow() self.update_sort(commit=commit) self.update_find(commit=commit) if commit: self.save() else: state.db.session.add(self) def save(self): state.db.session.add(self) state.db.session.commit() def delete(self, commit=True): Sort.query.filter_by(item_id=self.id).delete() if state.downloads and self.id in state.downloads.transfers: del state.downloads.transfers[self.id] state.db.session.delete(self) icons.clear('cover:%s' % self.id) icons.clear('preview:%s' % self.id) if commit: state.db.session.commit() meta_keys = ( 'author', 'categories', 'cover', 'date', 'description', 'edition', 'isbn', 'language', 'pages', 'place', 'publisher', 'series', 'sharemetadata', 'tableofcontents', 'title', 'sorttitle' ) def update_metadata(self, data, modified=None): update = False record = {} for key in self.meta_keys: if key in data: if self.meta.get(key) != data[key]: record[key] = data[key] self.meta[key] = data[key] update = True for key in list(self.meta): if key not in self.meta_keys: del self.meta[key] update = True if update: self.update(modified) self.save() if 'cover' in record: self.update_cover() user = state.user() if record and user in self.users: Changelog.record(user, 'edititem', self.id, record, _ts=modified) if 'sharemetadata' in record and not record['sharemetadata']: self.sync_metadata() def edit(self, data, modified=None): self.update_metadata(data, modified) for f in self.files.all(): f.move() def get_hash(self): return utils.get_meta_hash(self.meta) def get_sorttitle(self): title = self.meta.get('sorttitle') if title is None: title = self.meta.get('title', 'Untitled') title = ox.get_sort_title(title) return title def sync_metadata(self): if self.meta.get('sharemetadata'): return peers = [u for u in self.users if u.id != settings.USER_ID] peers.sort(key=lambda u: utils.user_sort_key(u.json())) sync_from = None first_peer = None # get first peer with sharemetadata set for u in peers: peer = utils.get_peer(u.id) if self.id in peer.library: m = peer.library[self.id].get('meta') else: m = None if m: if m.get('sharemetadata'): sync_from = u.id break if not first_peer: first_peer = u.id # of fall back to first peer that has this item # in case its not available locally if not sync_from and self.info.get('mediastate') != 'available' and first_peer: # logger.debug('syncing from first peer that has item %s', first_peer) sync_from = first_peer if sync_from: peer = utils.get_peer(sync_from) data_hash = peer.get_metahash(self.id) item = peer.library[self.id] sync_meta = item['meta'] sync_modified = item.get('modified') if self.get_hash() != data_hash: logger.debug('update %s with metadata from %s', self, sync_from) record = {} for key in sync_meta: if key != 'sharemetadata' and self.meta.get(key) != sync_meta[key]: record[key] = self.meta[key] = sync_meta[key] for key in set(self.meta)-set(sync_meta): record[key] = self.meta[key] = [] if key in self.array_keys else '' self.update(sync_modified) self.save() for f in self.files.all(): f.move() user = state.user() if record and user in self.users: Changelog.record(user, 'edititem', self.id, record, _ts=self.modified) if 'cover' in record: if state.tasks: state.tasks.queue('getcover', self.id) def extract_preview(self): path = self.get_path() if path: return getattr(media, self.info['extension']).cover(path) else: for u in self.users: if u.id != settings.USER_ID: if state.nodes.download_preview(u.id, self.id): break def update_cover(self): logger.debug('%s update cover', self.id) key = 'cover:%s' % self.id cover = None if 'cover' in self.meta and self.meta['cover']: logger.debug('download cover %s %s', self.id, self.meta['cover']) try: cover = ox.cache.read_url(self.meta['cover']) except: logger.debug('unable to read cover url %s', self.meta['cover']) cover = None if cover: icons[key] = cover self.info['coverRatio'] = get_ratio(cover) else: del icons[key] if not cover: if 'previewRatio' in self.info: self.info['coverRatio'] = self.info['previewRatio'] elif 'coverRatio' in self.info: del self.info['coverRatio'] icons.clear('cover:%s:' % self.id) logger.debug('%s update_cover done', self.id) def get_preview(self): key = 'preview:%s' % self.id data = icons[key] if not data: preview = self.extract_preview() if preview: icons[key] = preview def update_preview(self): logger.debug('%s update_preview', self.id) key = 'preview:%s' % self.id preview = self.extract_preview() if preview: icons[key] = preview self.info['previewRatio'] = get_ratio(preview) if 'coverRatio' not in self.info: self.info['coverRatio'] = self.info['previewRatio'] else: del icons[key] if 'previewRatio' in self.info: del self.info['previewRatio'] if not preview: if 'coverRatio' in self.info: self.info['previewRatio'] = self.info['coverRatio'] elif 'previewRatio' in self.info: del self.info['previewRatio'] icons.clear('preview:%s:' % self.id) logger.debug('%s update_preview done', self.id) def update_icons(self): if state.online: self.update_cover() elif state.tasks: state.tasks.queue('getcover', self.id) self.update_preview() def load_metadata(self): ''' load metadata from user_metadata or get via isbn? ''' for key in self.meta_keys: if key in self.info: if key not in self.meta: self.meta[key] = self.info[key] if key != 'pages': del self.info[key] # FIXME get from user_meta if state.online: if 'isbn' in self.meta: data = meta.lookup_isbn(self.meta['isbn']) if data: for key in data: self.meta[key] = data[key] def queue_download(self): u = state.user() if self.id not in state.downloads.transfers: state.downloads.transfers[self.id] = { 'added': datetime.utcnow(), 'progress': 0 } logger.debug('queue %s for download', self.id) if u not in self.users: self.add_user(u) def save_file(self, content): u = state.user() f = File.get(self.id) content_id = media.get_id(data=content) if content_id != self.id: logger.debug('INVALID CONTENT %s vs %s', self.id, content_id) return False if not f: path = '.import/%s.%s' % (self.id, self.info['extension']) info = self.info.copy() for key in ('mediastate', 'coverRatio', 'previewRatio'): if key in info: del info[key] f = File.get_or_create(self.id, info, path=path) path = self.get_path() if not os.path.exists(path): ox.makedirs(os.path.dirname(path)) with open(path, 'wb') as fd: fd.write(content) f.info = media.metadata(path) f.save() for key in ('tableofcontents', ): if key not in self.meta and key in f.info: self.meta[key] = f.info[key] if u not in self.users: self.add_user(u) if state.downloads and self.id in state.downloads.transfers: del state.downloads.transfers[self.id] self.added = datetime.utcnow() Changelog.record(u, 'additem', self.id, f.info) Changelog.record(u, 'edititem', self.id, self.meta) for l in self.lists.filter_by(user_id=settings.USER_ID): if l.name != '': Changelog.record(l.user, 'addlistitems', l.name, [self.id]) self.update() f.move() self.update_icons() self.save() trigger_event('transfer', { 'id': self.id, 'progress': 1 }) return True else: logger.debug('TRIED TO SAVE EXISTING FILE!!!') if state.downloads and self.id in state.downloads.transfers: del state.downloads.transfers[self.id] self.update() return True return False def remove_file(self): for f in self.files.all(): path = f.fullpath() if os.path.exists(path): try: os.chmod(path, stat.S_IWRITE) os.unlink(path) remove_empty_folders(os.path.dirname(path)) except: pass state.db.session.delete(f) user = state.user() if user in self.users: self.users.remove(user) for l in self.lists.filter_by(user_id=user.id): l.items.remove(self) if self.meta.get('sharemetadata'): self.meta['sharemetadata'] = False if not self.users: self.delete() else: self.added = None self.update() if state.downloads: if self.id in state.downloads.transfers: del state.downloads.transfers[self.id] Changelog.record(user, 'removeitem', self.id) class Sort(db.Model): __tablename__ = 'sort' item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True) item = sa.orm.relationship('Item', backref=sa.orm.backref('sort', lazy='dynamic')) def __repr__(self): return '%s_sort' % self.item_id @classmethod def get(cls, item_id): return cls.query.filter_by(item_id=item_id).first() @classmethod def get_or_create(cls, item_id, commit=True): f = cls.get(item_id) if not f: f = cls(item_id=item_id) state.db.session.add(f) if commit: state.db.session.commit() return f Item.sort_keys = [] for key in config['itemKeys']: if key.get('sort'): sort_type = key.get('sortType', key['type']) if sort_type == 'integer': col = sa.Column(sa.BigInteger(), index=True) elif sort_type == 'float': col = sa.Column(sa.Float(), index=True) elif sort_type == 'date': col = sa.Column(sa.DateTime(), index=True) elif sort_type == 'boolean': col = sa.Column(sa.Boolean(), index=True) else: col = sa.Column(sa.String(1000), index=True) setattr(Sort, '%s' % key['id'], col) Item.sort_keys.append(key['id']) Item.id_keys = ['isbn', 'lccn', 'olid', 'oclc', 'asin'] Item.item_keys = config['itemKeys'] Item.filter_keys = [k['id'] for k in config['itemKeys'] if k.get('filter')] Item.array_keys = [k['id'] for k in config['itemKeys'] if isinstance(k['type'], list)] class Find(db.Model): __tablename__ = 'find' id = sa.Column(sa.Integer(), primary_key=True) item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id')) item = sa.orm.relationship('Item', backref=sa.orm.backref('find_', lazy='dynamic')) key = sa.Column(sa.String(200), index=True) value = sa.Column(sa.Text()) findvalue = sa.Column(sa.Text(), index=True) sortvalue = sa.Column(sa.Text()) def __repr__(self): return '%s=%s' % (self.key, self.findvalue) @classmethod def get(cls, item, key): return cls.query.filter_by(item_id=item, key=key).first() @classmethod def get_or_create(cls, item, key, commit=True): f = cls.get(item, key) if not f: f = cls(item_id=item, key=key) state.db.session.add(f) if commit: state.db.session.commit() return f class File(db.Model): __tablename__ = 'file' created = sa.Column(sa.DateTime()) modified = sa.Column(sa.DateTime()) sha1 = sa.Column(sa.String(32), primary_key=True) path = sa.Column(sa.String(2048)) info = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler))) item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id')) item = sa.orm.relationship('Item', backref=sa.orm.backref('files', lazy='dynamic')) @classmethod def get(cls, sha1): return cls.query.filter_by(sha1=sha1).first() @classmethod def get_or_create(cls, sha1, info=None, path=None): f = cls.get(sha1) if not f: f = cls(sha1=sha1) if info: f.info = info if path: f.path = path f.item_id = Item.get_or_create(id=sha1, info=info).id state.db.session.add(f) state.db.session.commit() return f def __repr__(self): return self.sha1 def __init__(self, sha1): self.sha1 = sha1 self.created = datetime.utcnow() self.modified = datetime.utcnow() def fullpath(self): prefs = settings.preferences prefix = os.sep.join(os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books/').split('/')) return os.path.normpath(os.path.join(prefix, self.path)) def make_readonly(self): current_path = self.fullpath() if os.path.exists(current_path): mode = os.stat(current_path)[stat.ST_MODE] readonly = mode & ~stat.S_IWUSR & ~stat.S_IWGRP & ~stat.S_IWOTH if mode != readonly: os.chmod(current_path, readonly) def move(self): def format_underscores(string): return re.sub(r'^\.|\.$|:|/|\?|<|>|\\|\*', '_', string) prefs = settings.preferences prefix = os.sep.join(os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books/').split('/')) if not self.item: return j = self.item.json(keys=['title', 'author', 'publisher', 'date', 'extension']) current_path = self.fullpath() if not os.path.exists(current_path): logger.debug('file is missing. %s', current_path) return author = '; '.join([get_sort_name(a) for a in j.get('author', [])]) if not author: author = 'Unknown Author' if ' (Ed.)' in author: author = author.replace(' (Ed.)', '') + ' (Ed.)' if len(author) > 255: author = 'Various Authors' title = j.get('title', 'Untitled') extension = j['extension'] if len(title) > 100: title = title[:100] title = format_underscores(title) author = format_underscores(author) publisher = j.get('publisher') if publisher: extra = ', '.join(publisher) else: extra = '' date = j.get('date') if date and len(date) >= 4: extra += ' ' + date[:4] if extra: extra = format_underscores(extra) title = '%s (%s)' % (title, extra.strip()) filename = '%s.%s' % (title, extension) first = unicodedata.normalize('NFD', author[0].upper())[0].upper() new_path = os.path.join(first, author, filename) new_path = new_path.replace('\x00', '') new_path = ox.decode_html(new_path) if self.path == new_path: return h = '' while os.path.exists(os.path.join(prefix, new_path)): h = self.sha1[:len(h)+1] filename = '%s.%s.%s' % (title, h, extension) first = unicodedata.normalize('NFD', author[0].upper())[0].upper() new_path = os.path.join(first, author, filename) if current_path == os.path.join(prefix, new_path): break if self.path != new_path: path = os.path.join(prefix, new_path) ox.makedirs(os.path.dirname(path)) try: os.chmod(current_path, stat.S_IWRITE) shutil.move(current_path, path) except: logger.debug('failed to move %s to %s', current_path, path, exc_info=True) return self.path = new_path self.save() for folder in set(os.path.dirname(p) for p in [current_path, path]): remove_empty_folders(folder) self.make_readonly() def save(self): state.db.session.add(self) state.db.session.commit() def remove_unused_names(): used = list(set( get_sort_name(a) for i in Item.query for a in i.meta.get('author', []) )) for p in Person.query.filter(Person.sortname.notin_(used)): state.db.session.delete(p) state.db.session.commit() def update_sort_table(): current = db.get_table_columns('sort') drop_columns = list(set(current) - set(Item.sort_keys+['item_id'])) if drop_columns: db.drop_columns('sort', drop_columns) add_columns = list(set(Item.sort_keys)-set(current+['item_id'])) if add_columns: create_table = str(CreateTable(Sort.__table__).compile(db.engine)).split('\n') sql = [] for col in add_columns: add = [r for r in create_table if '\t%s ' % col in r][0].strip()[:-1] sql.append('ALTER TABLE sort ADD '+add) sql.append('CREATE INDEX ix_sort_{col} ON sort ({col})'.format(col=col)) with db.session() as s: for q in sql: s.connection().execute(q) s.commit() sql = [] layout = db.get_layout() sort_indexes = [i[len('ix_sort_'):] for i in layout['indexes'] if i.startswith('ix_sort_')] for col in set(Item.sort_keys)-set(sort_indexes): sql.append('CREATE INDEX ix_sort_{col} ON sort ({col})'.format(col=col)) if 'sortvalue' not in db.get_table_columns('find'): create_table = str(CreateTable(Find.__table__).compile(db.engine)).split('\n') col = 'sortvalue' add = [r for r in create_table if '\t%s ' % col in r][0].strip()[:-1] sql.append('ALTER TABLE find ADD '+add) if sql: with db.session() as s: for q in sql: s.connection().execute(q) s.commit() def get_cover(id): delay = 60 if state.online: # logger.debug('get_cover(%s)', id) with db.session(): i = Item.get(id) if i: i.update_cover() else: state.main.call_later(delay, lambda: state.tasks.queue('getcover', id)) def get_preview(id): if state.online: # logger.debug('get_preview(%s)', id) with db.session(): i = Item.get(id) if i: i.get_preview() else: state.tasks.queue('getpreview', id) time.sleep(0.5) def sync_metadata(ids=None): # logger.debug('sync_metadata(%s)', len(ids) if len(ids) > 10 else ids) step = 1000 delay = 10 with db.session(): if not ids: ids = [i.id for i in Item.query.options(load_only('id'))] if len(ids) > step: later = ids[step:] ids = ids[:step] else: later = [] if ids: done = set() for i in Item.query.filter(Item.id.in_(ids)): i.sync_metadata() done.add(i.id) if state.shutdown: later = list((set(later) | set(ids)) - done) if later and state.tasks: state.tasks.queue('syncmetadata', later) later = None if later: if state.main and state.tasks: state.main.call_later(delay, lambda: state.tasks.queue('syncmetadata', later)) # else: # logger.debug('sync_metadata done')