openmedialibrary/oml/item/models.py

896 lines
30 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from datetime import datetime
import base64
import hashlib
import json
import os
import re
import shutil
import stat
import unicodedata
import ox
from sqlalchemy.orm import load_only
from sqlalchemy.schema import CreateTable
import sqlalchemy as sa
from changelog import Changelog
from db import MutableDict
import json_pickler
from .icons import icons
from .person import get_sort_name, Person
from queryparser import Parser
from settings import config
from utils import remove_empty_folders, get_ratio
from websocket import trigger_event
import db
import media
#import metaremote as meta
import meta
import settings
import state
import utils
import logging
logger = logging.getLogger(__name__)
user_items = sa.Table('useritem', db.metadata,
sa.Column('user_id', sa.String(43), sa.ForeignKey('user.id')),
sa.Column('item_id', sa.String(32), sa.ForeignKey('item.id'))
)
class Item(db.Model):
__tablename__ = 'item'
created = sa.Column(sa.DateTime())
modified = sa.Column(sa.DateTime())
id = sa.Column(sa.String(32), primary_key=True)
info = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
meta = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
# why is this in db and not in i.e. info?
added = sa.Column(sa.DateTime()) # added to local library
accessed = sa.Column(sa.DateTime())
timesaccessed = sa.Column(sa.Integer())
users = sa.orm.relationship('User', secondary=user_items,
backref=sa.orm.backref('items', lazy='dynamic'))
@property
def timestamp(self):
return utils.datetime2ts(self.modified)
def __repr__(self):
return self.id
def __init__(self, id):
if isinstance(id, list):
id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
self.id = id
self.created = datetime.utcnow()
self.modified = datetime.utcnow()
self.info = {}
self.meta = {}
@classmethod
def get(cls, id):
if isinstance(id, list):
id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
return cls.query.filter_by(id=id).first()
@classmethod
def get_or_create(cls, id, info=None):
if isinstance(id, list):
id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
item = cls.query.filter_by(id=id).first()
if not item:
item = cls(id=id)
if info:
item.info = info
state.db.session.add(item)
state.db.session.commit()
return item
@classmethod
def find(cls, data):
return Parser(cls, user_items, Find, Sort).find(data)
def add_user(self, user):
if not user in self.users:
self.users.append(user)
l = user.library
if not self in l.items:
l.items.append(self)
state.db.session.add(l)
def json(self, keys=None):
j = {}
j['id'] = self.id
j['created'] = self.created
j['modified'] = self.modified
j['timesaccessed'] = self.timesaccessed
j['accessed'] = self.accessed
j['added'] = self.added
t = Transfer.get(self.id)
if t:
j['transferadded'] = t.added
j['transferprogress'] = t.progress
# unused and slow
#j['users'] = list(map(str, list(self.users)))
if self.info:
meta_keys = [k for k in self.meta_keys if k != 'pages']
for key in self.info:
if (not keys or key in keys) and key not in meta_keys:
j[key] = self.info[key]
if self.meta:
j.update(self.meta)
for key in self.id_keys:
if key not in self.meta and key in j:
del j[key]
if keys:
for k in list(j):
if k not in keys:
del j[k]
for key in self.array_keys:
if key in j and not isinstance(j[key], list):
j[key] = [j[key]]
j['sharemetadata'] = j.get('sharemetadata', False)
return j
def get_path(self):
f = self.files.first()
return f.fullpath() if f else None
def update_sort(self):
update = False
s = Sort.get_or_create(self.id)
for key in config['itemKeys']:
if key.get('sort'):
value = self.json().get(key['id'], None)
sort_type = key.get('sortType', key['type'])
if value:
if sort_type == 'integer':
if isinstance(value, str):
value = int(re.sub('[^0-9]', '', value))
else:
value = int(value)
elif sort_type == 'float':
value = float(value)
elif sort_type == 'date':
pass
elif sort_type == 'person':
if not isinstance(value, list):
value = [value]
value = list(map(get_sort_name, value))
value = ox.sort_string('\n'.join(value)).lower()
elif sort_type == 'title':
if isinstance(value, dict):
value = list(value.values())
if isinstance(value, list):
value = ''.join(value)
value = ox.get_sort_title(value)
value = utils.sort_title(value).lower()
elif sort_type == 'boolean':
pass
else:
if isinstance(value, list):
value = '\n'.join(value)
if value:
value = str(value)
value = ox.sort_string(value).lower()
elif isinstance(value, list): #empty list
value = None
if not value and sort_type != 'boolean':
value = None
if getattr(s, key['id']) != value:
setattr(s, key['id'], value)
update = True
if update:
state.db.session.add(s)
def update_find(self):
def add(k, v):
f = Find.query.filter_by(item_id=self.id, key=k, value=v).first()
if not f:
f = Find(item_id=self.id, key=k)
if f.value != v:
f.findvalue = unicodedata.normalize('NFKD', v).lower()
f.value = v
state.db.session.add(f)
keys = []
for key in config['itemKeys']:
if key.get('find') or \
key.get('filter') or key.get('type') in [['string'], 'string'] or \
(key.get('type') == 'boolean' and key.get('sort')):
value = self.json().get(key['id'], None)
if key.get('filterMap') and value:
value = re.compile(key.get('filterMap')).findall(value)
if value: value = value[0]
if key.get('type') == 'boolean':
value = True if value else False
value = str(value).lower()
if value:
keys.append(key['id'])
if isinstance(value, dict):
value = ' '.join(list(value.values()))
if not isinstance(value, list):
value = [value]
value = [
v.decode('utf-8') if isinstance(v, bytes) else v
for v in value
]
for v in value:
add(key['id'], v)
for f in Find.query.filter_by(item_id=self.id,
key=key['id']).filter(Find.value.notin_(value)):
state.db.session.delete(f)
for f in Find.query.filter_by(item_id=self.id).filter(Find.key.notin_(keys)):
state.db.session.delete(f)
def update(self, modified=None):
for key in ('mediastate', 'coverRatio', 'previewRatio'):
if key in self.meta:
if key not in self.info:
self.info[key] = self.meta[key]
del self.meta[key]
users = list(map(str, list(self.users)))
self.info['mediastate'] = 'available' # available, unavailable, transferring
t = Transfer.get(self.id)
if t and t.added and t.progress < 1:
self.info['mediastate'] = 'transferring'
else:
self.info['mediastate'] = 'available' if settings.USER_ID in users else 'unavailable'
if modified:
self.modified = modified
else:
self.modified = datetime.utcnow()
self.update_sort()
self.update_find()
self.save()
def save(self):
state.db.session.add(self)
state.db.session.commit()
def delete(self, commit=True):
Sort.query.filter_by(item_id=self.id).delete()
Transfer.query.filter_by(item_id=self.id).delete()
Scrape.query.filter_by(item_id=self.id).delete()
state.db.session.delete(self)
icons.clear('cover:%s' % self.id)
icons.clear('preview:%s' % self.id)
if commit:
state.db.session.commit()
meta_keys = (
'author',
'categories',
'cover',
'date',
'description',
'edition',
'isbn',
'language',
'pages',
'place',
'publisher',
'series',
'sharemetadata',
'tableofcontents',
'title'
)
def update_metadata(self, data, modified=None):
update = False
record = {}
for key in self.meta_keys:
if key in data:
if self.meta.get(key) != data[key]:
record[key] = data[key]
self.meta[key] = data[key]
update = True
for key in list(self.meta):
if key not in self.meta_keys:
del self.meta[key]
update = True
if update:
self.update(modified)
self.save()
if 'cover' in record:
self.update_cover()
user = state.user()
if record and user in self.users:
Changelog.record_ts(user, modified, 'edititem', self.id, record)
if 'sharemetadata' in record and not record['sharemetadata']:
self.sync_metadata()
def edit(self, data, modified=None):
Scrape.query.filter_by(item_id=self.id).delete()
self.update_metadata(data, modified)
for f in self.files.all():
f.move()
def get_hash(self):
return utils.get_meta_hash(self.meta.copy())
def sync_metadata(self):
from user.models import Metadata
if self.meta.get('sharemetadata'):
return
peers = [u for u in self.users if u.id != settings.USER_ID]
peers.sort(key=lambda u: ox.sort_string(str(u.info.get('index', ''))
+ 'Z' + (u.info.get('name') or '')))
sync_from = None
first_peer = None
# get first peer with sharemetadata set
for u in peers:
m = Metadata.get(u.id, self.id)
if m:
if m.data.get('sharemetadata'):
sync_from = m
break
if not first_peer:
first_peer = m
# of fall back to first peer that has this item
# in case its not available locally
if not sync_from and self.info.get('mediastate') != 'available' and first_peer:
#logger.debug('syncing from first peer that has item %s', first_peer)
sync_from = first_peer
if sync_from:
if self.get_hash() != sync_from.data_hash:
logger.debug('update %s with metadata from %s', self, sync_from.user_id)
record = {}
for key in sync_from.data:
if key != 'sharemetadata' and self.meta.get(key) != sync_from.data[key]:
record[key] = self.meta[key] = sync_from.data[key]
for key in set(self.meta)-set(sync_from.data):
record[key] = self.meta[key] = [] if key in self.array_keys else ''
self.update(sync_from.modified)
self.save()
user = state.user()
if record and user in self.users:
Changelog.record_ts(user, self.modified, 'edititem', self.id, record)
if 'cover' in record:
self.update_cover()
def extract_preview(self):
path = self.get_path()
if path:
return getattr(media, self.info['extension']).cover(path)
def update_cover(self):
key = 'cover:%s'%self.id
cover = None
if 'cover' in self.meta and self.meta['cover']:
try:
cover = ox.cache.read_url(self.meta['cover'])
except:
logger.debug('unable to read cover url %s', self.meta['cover'])
cover = None
if cover:
icons[key] = cover
self.info['coverRatio'] = get_ratio(cover)
else:
del icons[key]
if not cover:
if 'previewRatio' in self.info:
self.info['coverRatio'] = self.info['previewRatio']
elif 'coverRatio' in self.info:
del self.info['coverRatio']
icons.clear('cover:%s:' % self.id)
def update_preview(self):
key = 'preview:%s'%self.id
preview = self.extract_preview()
if preview:
icons[key] = preview
self.info['previewRatio'] = get_ratio(preview)
if not 'coverRatio' in self.info:
self.info['coverRatio'] = self.info['previewRatio']
else:
del icons[key]
if 'previewRatio' in self.info:
del self.info['previewRatio']
if not preview:
if 'coverRatio' in self.info:
self.info['previewRatio'] = self.info['coverRatio']
elif 'previewRatio' in self.info:
del self.info['previewRatio']
icons.clear('preview:%s:' % self.id)
def update_icons(self):
self.update_cover()
self.update_preview()
def load_metadata(self):
'''
load metadata from user_metadata or get via isbn?
'''
for key in self.meta_keys:
if key in self.info:
if key not in self.meta:
self.meta[key] = self.info[key]
if key != 'pages':
del self.info[key]
#FIXME get from user_meta
if state.online:
if 'isbn' in self.meta:
data = meta.lookup('isbn', self.meta['isbn'])
if data:
self.meta.update(data)
def queue_download(self):
u = state.user()
if not u in self.users:
t = Transfer.get_or_create(self.id)
if not t.added:
t.added = datetime.utcnow()
t.progress = 0
t.save()
logger.debug('queue %s for download', self.id)
self.add_user(u)
def save_file(self, content):
u = state.user()
f = File.get(self.id)
content_id = media.get_id(data=content)
if content_id != self.id:
logger.debug('INVALID CONTENT %s vs %s', self.id, content_id)
return False
if not f:
path = '.import/%s.%s' % (self.id, self.info['extension'])
info = self.info.copy()
for key in ('mediastate', 'coverRatio', 'previewRatio'):
if key in info:
del info[key]
f = File.get_or_create(self.id, info, path=path)
path = self.get_path()
if not os.path.exists(path):
ox.makedirs(os.path.dirname(path))
with open(path, 'wb') as fd:
fd.write(content)
f.info = media.metadata(path)
f.save()
for key in ('tableofcontents', ):
if key not in self.meta and key in f.info:
self.meta[key] = f.info[key]
if u not in self.users:
self.add_user(u)
t = Transfer.get_or_create(self.id)
t.progress = 1
t.save()
self.added = datetime.utcnow()
Changelog.record(u, 'additem', self.id, f.info)
Changelog.record(u, 'edititem', self.id, self.meta)
self.update()
f.move()
self.update_icons()
self.save()
trigger_event('transfer', {
'id': self.id, 'progress': 1
})
return True
else:
logger.debug('TRIED TO SAVE EXISTING FILE!!!')
t = Transfer.get_or_create(self.id)
t.progress = 1
t.save()
self.update()
return False
def remove_file(self):
for f in self.files.all():
path = f.fullpath()
if os.path.exists(path):
os.unlink(path)
remove_empty_folders(os.path.dirname(path))
state.db.session.delete(f)
user = state.user()
if user in self.users:
self.users.remove(user)
for l in self.lists.filter_by(user_id=user.id):
l.items.remove(self)
if not self.users:
self.delete()
else:
self.added = None
self.update()
Transfer.query.filter_by(item_id=self.id).delete()
Changelog.record(user, 'removeitem', self.id)
class Sort(db.Model):
__tablename__ = 'sort'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('sort', lazy='dynamic'))
def __repr__(self):
return '%s_sort' % self.item_id
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
f = cls.get(item_id)
if not f:
f = cls(item_id=item_id)
state.db.session.add(f)
state.db.session.commit()
return f
Item.sort_keys = []
for key in config['itemKeys']:
if key.get('sort'):
sort_type = key.get('sortType', key['type'])
if sort_type == 'integer':
col = sa.Column(sa.BigInteger(), index=True)
elif sort_type == 'float':
col = sa.Column(sa.Float(), index=True)
elif sort_type == 'date':
col = sa.Column(sa.DateTime(), index=True)
elif sort_type == 'boolean':
col = sa.Column(sa.Boolean(), index=True)
else:
col = sa.Column(sa.String(1000), index=True)
setattr(Sort, '%s' % key['id'], col)
Item.sort_keys.append(key['id'])
Item.id_keys = ['isbn', 'lccn', 'olid', 'oclc', 'asin']
Item.item_keys = config['itemKeys']
Item.filter_keys = [k['id'] for k in config['itemKeys'] if k.get('filter')]
Item.array_keys = [k['id'] for k in config['itemKeys'] if isinstance(k['type'], list)]
class Find(db.Model):
__tablename__ = 'find'
id = sa.Column(sa.Integer(), primary_key=True)
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'))
item = sa.orm.relationship('Item', backref=sa.orm.backref('find_', lazy='dynamic'))
key = sa.Column(sa.String(200), index=True)
value = sa.Column(sa.Text())
findvalue = sa.Column(sa.Text(), index=True)
def __repr__(self):
return '%s=%s' % (self.key, self.findvalue)
@classmethod
def get(cls, item, key):
return cls.query.filter_by(item_id=item, key=key).first()
@classmethod
def get_or_create(cls, item, key):
f = cls.get(item, key)
if not f:
f = cls(item_id=item, key=key)
state.db.session.add(f)
state.db.session.commit()
return f
class File(db.Model):
__tablename__ = 'file'
created = sa.Column(sa.DateTime())
modified = sa.Column(sa.DateTime())
sha1 = sa.Column(sa.String(32), primary_key=True)
path = sa.Column(sa.String(2048))
info = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'))
item = sa.orm.relationship('Item', backref=sa.orm.backref('files', lazy='dynamic'))
@classmethod
def get(cls, sha1):
return cls.query.filter_by(sha1=sha1).first()
@classmethod
def get_or_create(cls, sha1, info=None, path=None):
f = cls.get(sha1)
if not f:
f = cls(sha1=sha1)
if info:
f.info = info
if path:
f.path = path
f.item_id = Item.get_or_create(id=sha1, info=info).id
state.db.session.add(f)
state.db.session.commit()
return f
def __repr__(self):
return self.sha1
def __init__(self, sha1):
self.sha1 = sha1
self.created = datetime.utcnow()
self.modified = datetime.utcnow()
def fullpath(self):
prefs = settings.preferences
prefix = os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books/')
return os.path.join(prefix, self.path)
def make_readonly(self):
current_path = self.fullpath()
if os.path.exists(current_path):
mode = os.stat(current_path)[stat.ST_MODE]
readonly = mode & ~stat.S_IWUSR & ~stat.S_IWGRP & ~stat.S_IWOTH
if mode != readonly:
os.chmod(current_path, readonly)
def move(self):
def format_underscores(string):
return re.sub('^\.|\.$|:|/|\?|<|>', '_', string)
prefs = settings.preferences
prefix = os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books/')
j = self.item.json()
current_path = self.fullpath()
if not os.path.exists(current_path):
logger.debug('file is missing. %s', current_path)
return
self.make_readonly()
author = '; '.join([get_sort_name(a) for a in j.get('author', [])])
if not author:
author = 'Unknown Author'
if ' (Ed.)' in author:
author = author.replace(' (Ed.)', '') + ' (Ed.)'
title = j.get('title', 'Untitled')
extension = j['extension']
if len(title) > 100:
title = title[:100]
title = format_underscores(title)
author = format_underscores(author)
publisher = j.get('publisher')
if publisher:
extra = ', '.join(publisher)
else:
extra = ''
date = j.get('date')
if date and len(date) >= 4:
extra += ' ' + date[:4]
if extra:
extra = format_underscores(extra)
title = '%s (%s)' % (title, extra.strip())
filename = '%s.%s' % (title, extension)
first = unicodedata.normalize('NFD', author[0].upper())[0].upper()
new_path = os.path.join(first, author, filename)
new_path = new_path.replace('\x00', '')
new_path = ox.decode_html(new_path)
if self.path == new_path:
return
h = ''
while os.path.exists(os.path.join(prefix, new_path)):
h = self.sha1[:len(h)+1]
filename = '%s.%s.%s' % (title, h, extension)
first = unicodedata.normalize('NFD', author[0].upper())[0].upper()
new_path = os.path.join(first, author, filename)
if current_path == os.path.join(prefix, new_path):
break
if self.path != new_path:
path = os.path.join(prefix, new_path)
ox.makedirs(os.path.dirname(path))
shutil.move(current_path, path)
self.path = new_path
self.save()
for folder in set(os.path.dirname(p) for p in [current_path, path]):
remove_empty_folders(folder)
def save(self):
state.db.session.add(self)
state.db.session.commit()
class Scrape(db.Model):
__tablename__ = 'scrape'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic'))
added = sa.Column(sa.DateTime())
def __repr__(self):
return '='.join(map(str, [self.item_id, self.added]))
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
t = cls.get(item_id)
if not t:
t = cls(item_id=item_id)
t.added = datetime.utcnow()
t.save()
return t
def save(self):
state.db.session.add(self)
state.db.session.commit()
def remove(self):
state.db.session.delete(self)
state.db.session.commit()
class Transfer(db.Model):
__tablename__ = 'transfer'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('transfer', lazy='dynamic'))
added = sa.Column(sa.DateTime())
progress = sa.Column(sa.Float())
def __repr__(self):
return '='.join(map(str, [self.item_id, self.progress]))
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
t = cls.get(item_id)
if not t:
t = cls(item_id=item_id)
t.added = datetime.utcnow()
t.progress = 0
t.save()
return t
def save(self):
state.db.session.add(self)
state.db.session.commit()
class Metadata(db.Model):
__tablename__ = 'metadata'
created = sa.Column(sa.DateTime())
modified = sa.Column(sa.DateTime())
id = sa.Column(sa.Integer(), primary_key=True)
key = sa.Column(sa.String(256))
value = sa.Column(sa.String(256))
data = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
def __repr__(self):
return '='.join([self.key, self.value])
@property
def timestamp(self):
return utils.datetime2ts(self.modified)
@classmethod
def get(cls, key, value):
return cls.query.filter_by(key=key, value=value).first()
@classmethod
def get_or_create(cls, key, value):
m = cls.get(key, value)
if not m:
m = cls(key=key, value=value)
m.created = datetime.utcnow()
m.data = {}
m.save()
return m
def save(self):
self.modified = datetime.utcnow()
state.db.session.add(self)
state.db.session.commit()
def reset(self):
user = state.user()
Changelog.record(user, 'resetmeta', self.key, self.value)
state.db.session.delete(self)
state.db.session.commit()
self.update_items()
def edit(self, data, record=True):
changed = {}
for key in data:
if key == 'id':
continue
if data[key] != self.data.get(key):
self.data[key] = data[key]
changed[key] = data[key]
if changed:
self.save()
if record:
user = state.user()
Changelog.record(user, 'editmeta', self.key, self.value, changed)
return changed
def update_items(self):
for f in Find.query.filter_by(key=self.key, value=self.value):
if f.item:
f.item.update()
@classmethod
def load(self, key, value):
m = self.get(key, value)
if m:
if 'id' in m.data:
del m.data['id']
return m.data
return {}
def remove_unused_names():
used = list(set(
get_sort_name(a)
for i in Item.query
for a in i.meta.get('author', [])
))
for p in Person.query.filter(Person.sortname.notin_(used)):
state.db.session.delete(p)
state.db.session.commit()
def update_sort_table():
current = db.get_table_columns('sort')
drop_columns = list(set(current) - set(Item.sort_keys+['item_id']))
if drop_columns:
db.drop_columns('sort', drop_columns)
add_columns = list(set(Item.sort_keys)-set(current+['item_id']))
if add_columns:
create_table = str(CreateTable(Sort.__table__).compile(db.engine)).split('\n')
sql = []
for col in add_columns:
add = [r for r in create_table if '\t%s ' % col in r][0].strip()[:-1]
sql.append('ALTER TABLE sort ADD '+add)
sql.append('CREATE INDEX ix_sort_{col} ON sort ({col})'.format(col=col))
with db.session() as s:
for q in sql:
s.connection().execute(q)
s.commit()
layout = db.get_layout()
sort_indexes = [i[len('ix_sort_'):] for i in layout['indexes'] if i.startswith('ix_sort_')]
sql = []
for col in set(Item.sort_keys)-set(sort_indexes):
sql.append('CREATE INDEX ix_sort_{col} ON sort ({col})'.format(col=col))
if sql:
with db.session() as s:
for q in sql:
s.connection().execute(q)
s.commit()
def sync_metadata(ids=None):
#logger.debug('sync_metadata(%s)', len(ids) if ids else ids)
step = 1000
delay = 10
with db.session():
if not ids:
ids = [i.id for i in Item.query.options(load_only('id'))]
if len(ids) > step:
later = ids[step:]
ids = ids[:step]
else:
later = []
if ids:
for i in Item.query.filter(Item.id.in_(ids)):
i.sync_metadata()
if later:
if state.main and state.tasks:
state.main.call_later(delay, lambda: state.tasks.queue('syncmetadata', [later]))
#else:
# logger.debug('sync_metadata done')