store metadata per user. remove primaryid. only store isbn13
This commit is contained in:
parent
90648f9e65
commit
02e040d9f5
16 changed files with 245 additions and 192 deletions
|
@ -159,44 +159,20 @@ class Changelog(db.Model):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def action_edititem(self, user, timestamp, itemid, meta):
|
def action_edititem(self, user, timestamp, itemid, meta):
|
||||||
|
from user.models import Metadata
|
||||||
|
m = Metadata.get_or_create(user.id, itemid)
|
||||||
|
m.edit(meta)
|
||||||
|
'''
|
||||||
|
FIXME: "sometimes" update item too...
|
||||||
from item.models import Item
|
from item.models import Item
|
||||||
i = Item.get(itemid)
|
i = Item.get(itemid)
|
||||||
if itemid == 'RDWQN35QAY6VW2UQEINOTQXLUCOFKIZK':
|
i.edit(meta, ts2datetime(timestamp))
|
||||||
return True
|
'''
|
||||||
if not i:
|
|
||||||
logger.debug('ignore edititem for unknown item %s %s', timestamp, itemid)
|
|
||||||
return True
|
|
||||||
if i.timestamp > timestamp:
|
|
||||||
logger.debug('ignore edititem change %s %s %s', timestamp, itemid, meta)
|
|
||||||
return True
|
|
||||||
if state.user() in i.users:
|
|
||||||
logger.debug('ignore edititem events for own items %s %s %s', timestamp, itemid, meta)
|
|
||||||
logger.debug('----------------------item.modified: %s', i.modified)
|
|
||||||
return True
|
|
||||||
primary = None
|
|
||||||
if 'primaryid' in meta:
|
|
||||||
primary = meta['primaryid']
|
|
||||||
key = primary[0]
|
|
||||||
else:
|
|
||||||
keys = [k for k in meta if k in Item.id_keys]
|
|
||||||
if keys:
|
|
||||||
key = keys[0]
|
|
||||||
primary = [key, meta[key]]
|
|
||||||
i.modified = ts2datetime(timestamp)
|
|
||||||
if primary:
|
|
||||||
if not meta[key] and i.meta.get('primaryid', [''])[0] == key:
|
|
||||||
logger.debug('remove id mapping %s %s', i.id, primary)
|
|
||||||
i.update_primaryid(*primary, scrape=False, modified=i.modified)
|
|
||||||
elif meta[key] and i.meta.get('primaryid') != primary:
|
|
||||||
logger.debug('edit mapping %s %s', i.id, primary)
|
|
||||||
i.update_primaryid(*primary, scrape=False, modified=i.modified)
|
|
||||||
else:
|
|
||||||
i.update_meta(meta, modified=i.modified)
|
|
||||||
i.save()
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def action_removeitem(self, user, timestamp, itemid):
|
def action_removeitem(self, user, timestamp, itemid):
|
||||||
from item.models import Item
|
from item.models import Item
|
||||||
|
from user.models import Metadata
|
||||||
i = Item.get(itemid)
|
i = Item.get(itemid)
|
||||||
if i:
|
if i:
|
||||||
if user in i.users:
|
if user in i.users:
|
||||||
|
@ -205,6 +181,7 @@ class Changelog(db.Model):
|
||||||
i.update()
|
i.update()
|
||||||
else:
|
else:
|
||||||
i.delete()
|
i.delete()
|
||||||
|
Metadata.query(user_id=user.id, item_id=itemid).delete()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def action_addlist(self, user, timestamp, name, query=None):
|
def action_addlist(self, user, timestamp, name, query=None):
|
||||||
|
@ -289,6 +266,8 @@ class Changelog(db.Model):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def action_editmeta(self, user, timestamp, key, value, data):
|
def action_editmeta(self, user, timestamp, key, value, data):
|
||||||
|
return True
|
||||||
|
'''>> Metadata no longer tracked per isbn'''
|
||||||
from item.models import Metadata
|
from item.models import Metadata
|
||||||
m = Metadata.get(key, value)
|
m = Metadata.get(key, value)
|
||||||
if not m or m.timestamp < timestamp:
|
if not m or m.timestamp < timestamp:
|
||||||
|
@ -299,6 +278,7 @@ class Changelog(db.Model):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def action_resetmeta(self, user, timestamp, key, value):
|
def action_resetmeta(self, user, timestamp, key, value):
|
||||||
|
return True
|
||||||
from item.models import Metadata
|
from item.models import Metadata
|
||||||
m = Metadata.get(key, value)
|
m = Metadata.get(key, value)
|
||||||
if m and m.timestamp < timestamp:
|
if m and m.timestamp < timestamp:
|
||||||
|
|
|
@ -132,17 +132,7 @@ def edit(data):
|
||||||
for id in ids:
|
for id in ids:
|
||||||
item = models.Item.get(id)
|
item = models.Item.get(id)
|
||||||
if item and item.json()['mediastate'] == 'available':
|
if item and item.json()['mediastate'] == 'available':
|
||||||
if 'primaryid' in data:
|
item.edit(data)
|
||||||
if data['primaryid']:
|
|
||||||
key, value = data['primaryid']
|
|
||||||
logger.debug('update primaryid %s %s', key, value)
|
|
||||||
value = cleanup_id(key, value)
|
|
||||||
item.update_primaryid(key, value)
|
|
||||||
else:
|
|
||||||
item.update_primaryid()
|
|
||||||
response = item.json()
|
|
||||||
else:
|
|
||||||
item.edit_metadata(data)
|
|
||||||
response = item.json()
|
response = item.json()
|
||||||
edited.append(id)
|
edited.append(id)
|
||||||
else:
|
else:
|
||||||
|
@ -264,8 +254,6 @@ def getMetadata(data):
|
||||||
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
|
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
|
||||||
if key in response and not isinstance(response[key], list):
|
if key in response and not isinstance(response[key], list):
|
||||||
response[key] = [response[key]]
|
response[key] = [response[key]]
|
||||||
if response:
|
|
||||||
response['primaryid'] = [key, value]
|
|
||||||
return response
|
return response
|
||||||
actions.register(getMetadata)
|
actions.register(getMetadata)
|
||||||
|
|
||||||
|
|
|
@ -121,11 +121,14 @@ class Item(db.Model):
|
||||||
#j['users'] = list(map(str, list(self.users)))
|
#j['users'] = list(map(str, list(self.users)))
|
||||||
|
|
||||||
if self.info:
|
if self.info:
|
||||||
j.update(self.info)
|
for key in self.info:
|
||||||
|
if (not keys or key in keys) and key not in self.meta_keys:
|
||||||
|
j[key] = self.info[key]
|
||||||
|
|
||||||
if self.meta:
|
if self.meta:
|
||||||
j.update(self.meta)
|
j.update(self.meta)
|
||||||
|
|
||||||
for key in self.id_keys + ['primaryid']:
|
for key in self.id_keys:
|
||||||
if key not in self.meta and key in j:
|
if key not in self.meta and key in j:
|
||||||
del j[key]
|
del j[key]
|
||||||
if keys:
|
if keys:
|
||||||
|
@ -220,7 +223,7 @@ class Item(db.Model):
|
||||||
for f in Find.query.filter_by(item_id=self.id).filter(Find.key.notin_(keys)):
|
for f in Find.query.filter_by(item_id=self.id).filter(Find.key.notin_(keys)):
|
||||||
state.db.session.delete(f)
|
state.db.session.delete(f)
|
||||||
|
|
||||||
def update(self):
|
def update(self, modified=None):
|
||||||
for key in ('mediastate', 'coverRatio', 'previewRatio'):
|
for key in ('mediastate', 'coverRatio', 'previewRatio'):
|
||||||
if key in self.meta:
|
if key in self.meta:
|
||||||
if key not in self.info:
|
if key not in self.info:
|
||||||
|
@ -233,17 +236,12 @@ class Item(db.Model):
|
||||||
self.info['mediastate'] = 'transferring'
|
self.info['mediastate'] = 'transferring'
|
||||||
else:
|
else:
|
||||||
self.info['mediastate'] = 'available' if settings.USER_ID in users else 'unavailable'
|
self.info['mediastate'] = 'available' if settings.USER_ID in users else 'unavailable'
|
||||||
if 'primaryid' in self.meta:
|
if modified:
|
||||||
# self.meta.update does not trigger db update!
|
self.modified = modified
|
||||||
m = Metadata.load(*self.meta['primaryid'])
|
else:
|
||||||
for key in m:
|
|
||||||
if key == 'id':
|
|
||||||
continue
|
|
||||||
self.meta[key] = m[key]
|
|
||||||
self.modified = datetime.utcnow()
|
self.modified = datetime.utcnow()
|
||||||
self.update_sort()
|
self.update_sort()
|
||||||
self.update_find()
|
self.update_find()
|
||||||
#self.modified = datetime.utcnow()
|
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
|
@ -260,14 +258,18 @@ class Item(db.Model):
|
||||||
|
|
||||||
meta_keys = (
|
meta_keys = (
|
||||||
'author',
|
'author',
|
||||||
'classification',
|
'categories',
|
||||||
|
'cover',
|
||||||
'date',
|
'date',
|
||||||
'description',
|
'description',
|
||||||
'edition',
|
'edition',
|
||||||
|
'isbn',
|
||||||
'language',
|
'language',
|
||||||
'pages',
|
'pages',
|
||||||
'place',
|
'place',
|
||||||
'publisher',
|
'publisher',
|
||||||
|
'series',
|
||||||
|
'tableofcontents',
|
||||||
'title'
|
'title'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -285,64 +287,17 @@ class Item(db.Model):
|
||||||
del self.meta[key]
|
del self.meta[key]
|
||||||
update = True
|
update = True
|
||||||
if update:
|
if update:
|
||||||
self.update()
|
self.update(modified)
|
||||||
if not modified:
|
|
||||||
modified = datetime.utcnow()
|
|
||||||
self.modified = modified
|
|
||||||
self.save()
|
self.save()
|
||||||
|
if 'cover' in record:
|
||||||
|
self.update_icons()
|
||||||
user = state.user()
|
user = state.user()
|
||||||
if record and user in self.users:
|
if record and user in self.users:
|
||||||
Changelog.record_ts(user, modified, 'edititem', self.id, record)
|
Changelog.record_ts(user, modified, 'edititem', self.id, record)
|
||||||
|
|
||||||
def update_primaryid(self, key=None, id=None, scrape=True, modified=None):
|
def edit(self, data, modified=None):
|
||||||
if key is None and id is None:
|
|
||||||
if 'primaryid' not in self.meta:
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
key = self.meta['primaryid'][0]
|
|
||||||
record = {}
|
|
||||||
if id:
|
|
||||||
if not key in self.meta or not key in self.meta[key]:
|
|
||||||
self.meta[key] = list(set([id] + self.meta.get(key, [])))
|
|
||||||
self.meta['primaryid'] = [key, id]
|
|
||||||
record[key] = id
|
|
||||||
else:
|
|
||||||
if key in self.meta:
|
|
||||||
del self.meta[key]
|
|
||||||
if 'primaryid' in self.meta:
|
|
||||||
del self.meta['primaryid']
|
|
||||||
record[key] = ''
|
|
||||||
for k in self.id_keys:
|
|
||||||
if k != key:
|
|
||||||
if k in self.meta:
|
|
||||||
del self.meta[k]
|
|
||||||
logger.debug('set primaryid %s %s', key, id)
|
|
||||||
|
|
||||||
# get metadata from external resources
|
|
||||||
if scrape:
|
|
||||||
self.scrape()
|
|
||||||
self.update_icons()
|
|
||||||
if not modified:
|
|
||||||
modified = datetime.utcnow()
|
|
||||||
self.modified = modified
|
|
||||||
self.save()
|
|
||||||
#if not scrape:
|
|
||||||
# Scrape.get_or_create(self.id)
|
|
||||||
for f in self.files.all():
|
|
||||||
f.move()
|
|
||||||
user = state.user()
|
|
||||||
if user in self.users:
|
|
||||||
Changelog.record_ts(user, modified, 'edititem', self.id, record)
|
|
||||||
|
|
||||||
def edit_metadata(self, data):
|
|
||||||
Scrape.query.filter_by(item_id=self.id).delete()
|
Scrape.query.filter_by(item_id=self.id).delete()
|
||||||
if 'primaryid' in self.meta:
|
self.update_meta(data, modified)
|
||||||
logger.debug('m: %s', self.meta['primaryid'])
|
|
||||||
m = Metadata.get_or_create(*self.meta['primaryid'])
|
|
||||||
if m.edit(data):
|
|
||||||
self.update()
|
|
||||||
else:
|
|
||||||
self.update_meta(data)
|
|
||||||
for f in self.files.all():
|
for f in self.files.all():
|
||||||
f.move()
|
f.move()
|
||||||
|
|
||||||
|
@ -388,23 +343,19 @@ class Item(db.Model):
|
||||||
for resolution in (128, 256, 512):
|
for resolution in (128, 256, 512):
|
||||||
del icons['%s:%s' % (key, resolution)]
|
del icons['%s:%s' % (key, resolution)]
|
||||||
|
|
||||||
def scrape(self):
|
def load_metadata(self):
|
||||||
primaryid = self.meta.get('primaryid')
|
'''
|
||||||
logger.debug('scrape %s', primaryid)
|
load metadata from user_metadata or get via isbn?
|
||||||
if primaryid:
|
'''
|
||||||
try:
|
for key in self.meta_keys:
|
||||||
m = meta.lookup(*primaryid)
|
if key not in self.meta and key in self.info:
|
||||||
except:
|
self.meta[key] = self.info[key]
|
||||||
logger.debug('meta.lookup %s failed:', primaryid, exc_info=True)
|
#FIXME get from user_meta
|
||||||
m = None
|
if state.online:
|
||||||
if m:
|
if 'isbn' in self.meta:
|
||||||
m['primaryid'] = primaryid
|
data = meta.lookup('isbn', self.meta['isbn'])
|
||||||
self.meta = m
|
if data:
|
||||||
self.modified = datetime.utcnow()
|
self.meta.update(data)
|
||||||
self.update()
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def queue_download(self):
|
def queue_download(self):
|
||||||
u = state.user()
|
u = state.user()
|
||||||
|
|
|
@ -46,23 +46,14 @@ def add_file(id, f, prefix, from_=None):
|
||||||
data = media.metadata(f, from_)
|
data = media.metadata(f, from_)
|
||||||
file = File.get_or_create(id, data, path)
|
file = File.get_or_create(id, data, path)
|
||||||
item = file.item
|
item = file.item
|
||||||
if 'primaryid' in file.info:
|
|
||||||
del file.info['primaryid']
|
|
||||||
state.db.session.add(file)
|
|
||||||
if 'primaryid' in item.info:
|
|
||||||
item.meta['primaryid'] = item.info.pop('primaryid')
|
|
||||||
state.db.session.add(item)
|
|
||||||
item.add_user(user)
|
item.add_user(user)
|
||||||
Changelog.record(user, 'additem', item.id, file.info)
|
|
||||||
item.added = datetime.utcnow()
|
item.added = datetime.utcnow()
|
||||||
if state.online:
|
item.load_metadata()
|
||||||
item.scrape()
|
Changelog.record(user, 'additem', item.id, file.info)
|
||||||
#Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']]))
|
|
||||||
Changelog.record(user, 'edititem', item.id, item.meta)
|
Changelog.record(user, 'edititem', item.id, item.meta)
|
||||||
item.update_icons()
|
item.update_icons()
|
||||||
item.modified = datetime.utcnow()
|
item.modified = datetime.utcnow()
|
||||||
item.update()
|
item.update()
|
||||||
#Scrape.get_or_create(item.id)
|
|
||||||
return file
|
return file
|
||||||
|
|
||||||
def run_scan():
|
def run_scan():
|
||||||
|
|
|
@ -15,7 +15,7 @@ from . import epub
|
||||||
from . import txt
|
from . import txt
|
||||||
from . import opf
|
from . import opf
|
||||||
|
|
||||||
from meta.utils import decode_html_data
|
from meta.utils import decode_html_data, to_isbn13
|
||||||
|
|
||||||
def get_id(f=None, data=None):
|
def get_id(f=None, data=None):
|
||||||
if data:
|
if data:
|
||||||
|
@ -23,7 +23,6 @@ def get_id(f=None, data=None):
|
||||||
else:
|
else:
|
||||||
return base64.b32encode(codecs.decode(ox.sha1sum(f, cached=True), 'hex')).decode()
|
return base64.b32encode(codecs.decode(ox.sha1sum(f, cached=True), 'hex')).decode()
|
||||||
|
|
||||||
|
|
||||||
def metadata(f, from_=None):
|
def metadata(f, from_=None):
|
||||||
ext = f.split('.')[-1]
|
ext = f.split('.')[-1]
|
||||||
data = {}
|
data = {}
|
||||||
|
@ -64,10 +63,17 @@ def metadata(f, from_=None):
|
||||||
data[key] = data[key].replace('\x00', '')
|
data[key] = data[key].replace('\x00', '')
|
||||||
elif isinstance(data[key], list):
|
elif isinstance(data[key], list):
|
||||||
data[key] = [e.replace('\x00', '') if isinstance(e, str) else e for e in data[key]]
|
data[key] = [e.replace('\x00', '') if isinstance(e, str) else e for e in data[key]]
|
||||||
if 'isbn' in data:
|
if 'isbn' in data and isinstance(data['isbn'], list):
|
||||||
data['primaryid'] = ['isbn', data['isbn'][0]]
|
isbns = set()
|
||||||
elif 'asin' in data:
|
for i in data['isbn']:
|
||||||
data['primaryid'] = ['asin', data['asin'][0]]
|
i = to_isbn13(i)
|
||||||
|
if i:
|
||||||
|
isbns.add(i)
|
||||||
|
if isbns:
|
||||||
|
data['isbn'] = list(isbns)[0]
|
||||||
|
else:
|
||||||
|
del data['isbn']
|
||||||
|
|
||||||
if 'author' in data:
|
if 'author' in data:
|
||||||
if isinstance(data['author'], str):
|
if isinstance(data['author'], str):
|
||||||
if data['author'].strip():
|
if data['author'].strip():
|
||||||
|
|
|
@ -5,14 +5,13 @@
|
||||||
import os
|
import os
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import zipfile
|
import zipfile
|
||||||
from io import BytesIO
|
|
||||||
import re
|
import re
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
import lxml.html
|
||||||
|
|
||||||
import stdnum.isbn
|
|
||||||
from ox import strip_tags, decode_html
|
from ox import strip_tags, decode_html
|
||||||
|
|
||||||
from utils import normalize_isbn, find_isbns, get_language
|
from utils import find_isbns, get_language, to_isbn13
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -104,13 +103,24 @@ def info(epub):
|
||||||
}.get(key, key)
|
}.get(key, key)
|
||||||
value = e.text.strip()
|
value = e.text.strip()
|
||||||
if key == 'identifier':
|
if key == 'identifier':
|
||||||
value = normalize_isbn(value)
|
value = to_isbn13(value)
|
||||||
if stdnum.isbn.is_valid(value):
|
if value:
|
||||||
data['isbn'] = [value]
|
data['isbn'] = value
|
||||||
elif key == 'author':
|
elif key == 'author':
|
||||||
data[key] = value.split(', ')
|
data[key] = value.split(', ')
|
||||||
else:
|
else:
|
||||||
data[key] = value
|
data[key] = value
|
||||||
|
guide = info.findall('{http://www.idpf.org/2007/opf}guide')
|
||||||
|
if guide:
|
||||||
|
for ref in guide[0].findall('{http://www.idpf.org/2007/opf}reference'):
|
||||||
|
if ref.attrib.get('type') == 'toc':
|
||||||
|
filename = unquote(ref.attrib['href'])
|
||||||
|
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
|
||||||
|
toc = z.read(filename)
|
||||||
|
if toc:
|
||||||
|
doc = lxml.html.document_fromstring(toc)
|
||||||
|
data['tableofcontents'] = '\n'.join([a.text_content() for a in doc.xpath('//a')])
|
||||||
|
|
||||||
if 'description' in data:
|
if 'description' in data:
|
||||||
data['description'] = strip_tags(decode_html(data['description']))
|
data['description'] = strip_tags(decode_html(data['description']))
|
||||||
text = extract_text(epub)
|
text = extract_text(epub)
|
||||||
|
@ -118,7 +128,7 @@ def info(epub):
|
||||||
if not 'isbn' in data:
|
if not 'isbn' in data:
|
||||||
isbn = extract_isbn(text)
|
isbn = extract_isbn(text)
|
||||||
if isbn:
|
if isbn:
|
||||||
data['isbn'] = [isbn]
|
data['isbn'] = isbn
|
||||||
if 'date' in data and 'T' in data['date']:
|
if 'date' in data and 'T' in data['date']:
|
||||||
data['date'] = data['date'].split('T')[0]
|
data['date'] = data['date'].split('T')[0]
|
||||||
if 'language' in data and isinstance(data['language'], str):
|
if 'language' in data and isinstance(data['language'], str):
|
||||||
|
@ -139,4 +149,3 @@ def extract_isbn(data):
|
||||||
isbns = find_isbns(data)
|
isbns = find_isbns(data)
|
||||||
if isbns:
|
if isbns:
|
||||||
return isbns[0]
|
return isbns[0]
|
||||||
|
|
||||||
|
|
|
@ -4,11 +4,9 @@
|
||||||
|
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
import stdnum.isbn
|
|
||||||
|
|
||||||
from utils import normalize_isbn, get_language
|
from utils import get_language, to_isbn13
|
||||||
from ox import strip_tags
|
from ox import strip_tags
|
||||||
import ox.iso
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -31,12 +29,9 @@ def info(opf):
|
||||||
}.get(key, key)
|
}.get(key, key)
|
||||||
value = e.text
|
value = e.text
|
||||||
if key == 'identifier':
|
if key == 'identifier':
|
||||||
isbn = normalize_isbn(value)
|
isbn = to_isbn13(value)
|
||||||
if stdnum.isbn.is_valid(isbn):
|
if isbn:
|
||||||
if not 'isbn' in data:
|
data['isbn'] = isbn
|
||||||
data['isbn'] = [isbn]
|
|
||||||
else:
|
|
||||||
data['isbn'].append(isbn)
|
|
||||||
if e.attrib.get(ns + 'scheme') == 'AMAZON':
|
if e.attrib.get(ns + 'scheme') == 'AMAZON':
|
||||||
if not 'asin' in data:
|
if not 'asin' in data:
|
||||||
data['asin'] = [value]
|
data['asin'] = [value]
|
||||||
|
|
|
@ -11,11 +11,10 @@ from glob import glob
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from PyPDF2 import PdfFileReader
|
from PyPDF2 import PdfFileReader
|
||||||
import stdnum.isbn
|
|
||||||
import ox
|
import ox
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from utils import normalize_isbn, find_isbns, get_language
|
from utils import get_language, to_isbn13, find_isbns
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -151,9 +150,9 @@ def info(pdf):
|
||||||
del data[key]
|
del data[key]
|
||||||
'''
|
'''
|
||||||
if 'identifier' in data:
|
if 'identifier' in data:
|
||||||
value = normalize_isbn(data['identifier'])
|
value = to_isbn13(data['identifier'])
|
||||||
if stdnum.isbn.is_valid(value):
|
if value:
|
||||||
data['isbn'] = [value]
|
data['isbn'] = value
|
||||||
del data['identifier']
|
del data['identifier']
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
|
@ -170,9 +169,7 @@ def info(pdf):
|
||||||
if not 'isbn' in data:
|
if not 'isbn' in data:
|
||||||
isbn = extract_isbn(text)
|
isbn = extract_isbn(text)
|
||||||
if isbn:
|
if isbn:
|
||||||
data['isbn'] = [isbn]
|
data['isbn'] = isbn
|
||||||
if 'isbn' in data and isinstance(data['isbn'], str):
|
|
||||||
data['isbn'] = [data['isbn']]
|
|
||||||
if 'date' in data and len(data['date']) == 8 and data['date'].isdigit():
|
if 'date' in data and len(data['date']) == 8 and data['date'].isdigit():
|
||||||
d = data['date']
|
d = data['date']
|
||||||
data['date'] = '%s-%s-%s' % (d[:4], d[4:6], d[6:])
|
data['date'] = '%s-%s-%s' % (d[:4], d[4:6], d[6:])
|
||||||
|
|
|
@ -23,7 +23,7 @@ def info(path):
|
||||||
text = extract_text(path)
|
text = extract_text(path)
|
||||||
isbn = extract_isbn(text)
|
isbn = extract_isbn(text)
|
||||||
if isbn:
|
if isbn:
|
||||||
data['isbn'] = [isbn]
|
data['isbn'] = isbn
|
||||||
data['textsize'] = len(text)
|
data['textsize'] = len(text)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -46,11 +46,10 @@ def info(key, value):
|
||||||
info['publisher'], info['edition'] = info['publisher'].split('; ', 1)
|
info['publisher'], info['edition'] = info['publisher'].split('; ', 1)
|
||||||
|
|
||||||
if 'ISBN-13' in content_info:
|
if 'ISBN-13' in content_info:
|
||||||
if not 'isbn' in info: info['isbn'] = []
|
info['isbn'] = content_info['ISBN-13'].replace('-', '')
|
||||||
info['isbn'].append(content_info['ISBN-13'].replace('-', ''))
|
info['isbn'].append(content_info['ISBN-13'].replace('-', ''))
|
||||||
if 'ISBN-10' in content_info:
|
elif 'ISBN-10' in content_info:
|
||||||
if not 'isbn' in info: info['isbn'] = []
|
info['isbn'] = stdnum.isbn.to_isbn13(content_info['ISBN-10'])
|
||||||
info['isbn'].append(content_info['ISBN-10'])
|
|
||||||
|
|
||||||
a = doc.xpath('//span[@class="a-size-medium"]')
|
a = doc.xpath('//span[@class="a-size-medium"]')
|
||||||
if a:
|
if a:
|
||||||
|
|
|
@ -21,14 +21,13 @@ def find(query):
|
||||||
done = set()
|
done = set()
|
||||||
for isbn in isbns:
|
for isbn in isbns:
|
||||||
if isbn not in done:
|
if isbn not in done:
|
||||||
|
isbn = stdnum.isbn.to_isbn13(isbn)
|
||||||
r = {
|
r = {
|
||||||
'isbn': [isbn],
|
'isbn': [isbn],
|
||||||
'primaryid': ['isbn', isbn]
|
'primaryid': ['isbn', isbn]
|
||||||
}
|
}
|
||||||
results.append(r)
|
results.append(r)
|
||||||
done.add(isbn)
|
done.add(isbn)
|
||||||
if len(isbn) == 10:
|
|
||||||
done.add(stdnum.isbn.to_isbn13(isbn))
|
|
||||||
if len(isbn) == 13 and isbn.startswith('978'):
|
if len(isbn) == 13 and isbn.startswith('978'):
|
||||||
done.add(stdnum.isbn.to_isbn10(isbn))
|
done.add(stdnum.isbn.to_isbn10(isbn))
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -6,7 +6,7 @@ from ox.cache import get_json, store
|
||||||
import ox.web.google
|
import ox.web.google
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
|
|
||||||
from .utils import find_isbns, get_language, decode_html_data
|
from .utils import find_isbns, get_language, decode_html_data, to_isbn13
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -51,6 +51,7 @@ def info(key, value):
|
||||||
data = {}
|
data = {}
|
||||||
for key in [
|
for key in [
|
||||||
'authors',
|
'authors',
|
||||||
|
'categories',
|
||||||
'description',
|
'description',
|
||||||
'pageCount',
|
'pageCount',
|
||||||
'publishedDate',
|
'publishedDate',
|
||||||
|
@ -83,6 +84,9 @@ def info(key, value):
|
||||||
data['isbn'].append(k['identifier'])
|
data['isbn'].append(k['identifier'])
|
||||||
else:
|
else:
|
||||||
print('unknown identifier', k)
|
print('unknown identifier', k)
|
||||||
|
if 'isbn' in data:
|
||||||
|
data['isbn'] = [to_isbn13(i) for i in data['isbn']][0]
|
||||||
|
|
||||||
if 'publisher' in data and isinstance(data['publisher'], str):
|
if 'publisher' in data and isinstance(data['publisher'], str):
|
||||||
data['publisher'] = [data['publisher']]
|
data['publisher'] = [data['publisher']]
|
||||||
if 'language' in _data:
|
if 'language' in _data:
|
||||||
|
|
|
@ -6,6 +6,16 @@ import re
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
|
|
||||||
import ox
|
import ox
|
||||||
|
import ox.iso
|
||||||
|
|
||||||
|
def to_isbn13(isbn):
|
||||||
|
try:
|
||||||
|
isbn = stdnum.isbn.validate(isbn, True)
|
||||||
|
if isbn[:2] != '97':
|
||||||
|
isbn = None
|
||||||
|
except:
|
||||||
|
isbn = None
|
||||||
|
return isbn
|
||||||
|
|
||||||
def normalize_isbn(value):
|
def normalize_isbn(value):
|
||||||
return ''.join([s for s in value if s.isdigit() or s == 'X'])
|
return ''.join([s for s in value if s.isdigit() or s == 'X'])
|
||||||
|
@ -13,14 +23,11 @@ def normalize_isbn(value):
|
||||||
def find_isbns(text):
|
def find_isbns(text):
|
||||||
if isinstance(text, bytes):
|
if isinstance(text, bytes):
|
||||||
text = text.decode()
|
text = text.decode()
|
||||||
matches = re.compile('\d[\d\-X\ ]+').findall(text)
|
matches = re.compile('\d[\d\-X\u2013\ ]+').findall(text)
|
||||||
matches = [normalize_isbn(value) for value in matches]
|
matches = [normalize_isbn(value) for value in matches]
|
||||||
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
|
matches = [to_isbn13(value) for value in matches]
|
||||||
and len(isbn) in (10, 13)
|
matches = list(set([value for value in matches if value]))
|
||||||
and isbn not in (
|
return matches
|
||||||
'0' * 10,
|
|
||||||
'0' * 13,
|
|
||||||
)]
|
|
||||||
|
|
||||||
def get_language(lang):
|
def get_language(lang):
|
||||||
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
||||||
|
|
61
oml/setup.py
61
oml/setup.py
|
@ -111,7 +111,6 @@ CREATE TABLE sort (
|
||||||
date VARCHAR(1000),
|
date VARCHAR(1000),
|
||||||
language VARCHAR(1000),
|
language VARCHAR(1000),
|
||||||
pages BIGINT,
|
pages BIGINT,
|
||||||
classification VARCHAR(1000),
|
|
||||||
extension VARCHAR(1000),
|
extension VARCHAR(1000),
|
||||||
size BIGINT,
|
size BIGINT,
|
||||||
created DATETIME,
|
created DATETIME,
|
||||||
|
@ -136,7 +135,6 @@ CREATE INDEX ix_sort_accessed ON sort (accessed);
|
||||||
CREATE INDEX ix_sort_added ON sort (added);
|
CREATE INDEX ix_sort_added ON sort (added);
|
||||||
CREATE INDEX ix_sort_asin ON sort (asin);
|
CREATE INDEX ix_sort_asin ON sort (asin);
|
||||||
CREATE INDEX ix_sort_author ON sort (author);
|
CREATE INDEX ix_sort_author ON sort (author);
|
||||||
CREATE INDEX ix_sort_classification ON sort (classification);
|
|
||||||
CREATE INDEX ix_sort_country ON sort (country);
|
CREATE INDEX ix_sort_country ON sort (country);
|
||||||
CREATE INDEX ix_sort_created ON sort (created);
|
CREATE INDEX ix_sort_created ON sort (created);
|
||||||
CREATE INDEX ix_sort_date ON sort (date);
|
CREATE INDEX ix_sort_date ON sort (date);
|
||||||
|
@ -274,7 +272,64 @@ def upgrade_db(old, new=None):
|
||||||
i.update_sort()
|
i.update_sort()
|
||||||
i.update_find()
|
i.update_find()
|
||||||
session.commit()
|
session.commit()
|
||||||
|
if old <= '20160111-603-90648f9' and new > '20160111-603-90648f9':
|
||||||
|
for f in settings.ui['filters']:
|
||||||
|
if f['id'] == 'classification':
|
||||||
|
f['id'] = 'categories'
|
||||||
|
settings.ui._save()
|
||||||
|
run_sql('ALTER TABLE sort ADD categories VARCHAR(1000)')
|
||||||
|
run_sql('ALTER TABLE sort ADD series VARCHAR(1000)')
|
||||||
|
run_sql('CREATE INDEX ix_sort_categories ON sort (categories)')
|
||||||
|
run_sql('''CREATE TABLE user_metadata (
|
||||||
|
created DATETIME,
|
||||||
|
modified DATETIME,
|
||||||
|
id INTEGER NOT NULL,
|
||||||
|
item_id VARCHAR(32),
|
||||||
|
user_id VARCHAR(43),
|
||||||
|
data_hash VARCHAR(40),
|
||||||
|
data BLOB,
|
||||||
|
PRIMARY KEY (id),
|
||||||
|
FOREIGN KEY(user_id) REFERENCES user (id)
|
||||||
|
)''')
|
||||||
|
run_sql('CREATE UNIQUE INDEX IF NOT EXISTS user_metadata_index ON user_metadata(id, user_id)')
|
||||||
|
run_sql('CREATE INDEX ix_user_metadata_data_hash ON user_metadata (data_hash)')
|
||||||
|
from meta.utils import to_isbn13
|
||||||
|
from item.models import Item
|
||||||
|
from user.models import Metadata
|
||||||
|
with db.session() as session:
|
||||||
|
for i in Item.query:
|
||||||
|
update = False
|
||||||
|
if 'primaryid' in i.meta:
|
||||||
|
del i.meta['primaryid']
|
||||||
|
update = True
|
||||||
|
if 'primaryid' in i.info:
|
||||||
|
del i.info['primaryid']
|
||||||
|
update = True
|
||||||
|
for key in i.meta_keys:
|
||||||
|
if key not in i.meta and key in i.info:
|
||||||
|
i.meta[key] = i.info[key]
|
||||||
|
update = True
|
||||||
|
if 'isbn' in i.meta and isinstance(i.meta['isbn'], list):
|
||||||
|
isbns = [to_isbn13(isbn) for isbn in i.meta['isbn']]
|
||||||
|
isbns = [isbn for isbn in isbns if isbn]
|
||||||
|
if isbns:
|
||||||
|
i.meta['isbn'] = isbns[0]
|
||||||
|
if 'isbn' in i.info:
|
||||||
|
i.info['isbn'] = i.meta['isbn']
|
||||||
|
else:
|
||||||
|
del i.meta['isbn']
|
||||||
|
if 'isbn' in i.info:
|
||||||
|
del i.info['isbn']
|
||||||
|
update = True
|
||||||
|
if 'isbn' in i.meta and not i.meta['isbn']:
|
||||||
|
del i.meta['isbn']
|
||||||
|
update = True
|
||||||
|
if update:
|
||||||
|
session.add(i)
|
||||||
|
for u in i.users:
|
||||||
|
if u.id != settings.USER_ID:
|
||||||
|
Metadata.get_or_create(u.id, i.id, i.meta, False)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
if old <= '20140527-120-3cb9819':
|
if old <= '20140527-120-3cb9819':
|
||||||
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
|
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
|
import hashlib
|
||||||
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
@ -11,6 +12,7 @@ import db
|
||||||
import json_pickler
|
import json_pickler
|
||||||
import settings
|
import settings
|
||||||
import state
|
import state
|
||||||
|
import utils
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -118,8 +120,10 @@ class User(db.Model):
|
||||||
if not i.users:
|
if not i.users:
|
||||||
i.delete()
|
i.delete()
|
||||||
Changelog.query.filter_by(user_id=self.id).delete()
|
Changelog.query.filter_by(user_id=self.id).delete()
|
||||||
if self.id in settings.ui['showFolder']:
|
Metadata.query.filter_by(user_id=self.id).delete()
|
||||||
del settings.ui['showFolder'][self.id]
|
if self.name in settings.ui['showFolder']:
|
||||||
|
del settings.ui['showFolder'][self.name]
|
||||||
|
settings.ui._save()
|
||||||
self.save()
|
self.save()
|
||||||
if was_peering:
|
if was_peering:
|
||||||
Changelog.record(state.user(), 'removepeer', self.id)
|
Changelog.record(state.user(), 'removepeer', self.id)
|
||||||
|
@ -318,3 +322,71 @@ class List(db.Model):
|
||||||
def save(self):
|
def save(self):
|
||||||
state.db.session.add(self)
|
state.db.session.add(self)
|
||||||
state.db.session.commit()
|
state.db.session.commit()
|
||||||
|
|
||||||
|
class Metadata(db.Model):
|
||||||
|
__tablename__ = 'user_metadata'
|
||||||
|
|
||||||
|
created = sa.Column(sa.DateTime())
|
||||||
|
modified = sa.Column(sa.DateTime())
|
||||||
|
|
||||||
|
id = sa.Column(sa.Integer(), primary_key=True)
|
||||||
|
item_id = sa.Column(sa.String(32))
|
||||||
|
user_id = sa.Column(sa.String(43), sa.ForeignKey('user.id'))
|
||||||
|
data_hash = sa.Column(sa.String(40), index=True)
|
||||||
|
data = sa.Column(MutableDict.as_mutable(sa.PickleType(pickler=json_pickler)))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '{item}/{user}'.format(item=self.item_id, user=self.user_id)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def timestamp(self):
|
||||||
|
return utils.datetime2ts(self.modified)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get(cls, user_id, item_id):
|
||||||
|
return cls.query.filter_by(user_id=user_id,item_id=item_id).first()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_or_create(cls, user_id, item_id, data=None, commit=True):
|
||||||
|
m = cls.get(user_id=user_id, item_id=item_id)
|
||||||
|
if not m:
|
||||||
|
m = cls(user_id=user_id, item_id=item_id)
|
||||||
|
m.created = datetime.utcnow()
|
||||||
|
if data:
|
||||||
|
m.data = data
|
||||||
|
else:
|
||||||
|
m.data = {}
|
||||||
|
m.save(commit)
|
||||||
|
elif data:
|
||||||
|
m.edit(data, commit)
|
||||||
|
return m
|
||||||
|
|
||||||
|
def save(self, commit=True):
|
||||||
|
self.modified = datetime.utcnow()
|
||||||
|
self.data_hash = hashlib.sha1(json.dumps(self.data, ensure_ascii=False, sort_keys=True).encode()).hexdigest()
|
||||||
|
state.db.session.add(self)
|
||||||
|
if commit:
|
||||||
|
state.db.session.commit()
|
||||||
|
|
||||||
|
def edit(self, data, commit=True):
|
||||||
|
changes = {}
|
||||||
|
if 'isbn' in data and isinstance(data['isbn'], list):
|
||||||
|
isbns = [utils.to_isbn13(isbn) for isbn in data['isbn']]
|
||||||
|
isbns = [isbn for isbn in isbns if isbn]
|
||||||
|
if isbn:
|
||||||
|
data['isbn'] = isbn
|
||||||
|
else:
|
||||||
|
del data['isbn']
|
||||||
|
for key in data:
|
||||||
|
if key == 'id':
|
||||||
|
continue
|
||||||
|
if data[key] != self.data.get(key):
|
||||||
|
self.data[key] = data[key]
|
||||||
|
changes[key] = data[key]
|
||||||
|
if changes:
|
||||||
|
self.save(commit)
|
||||||
|
return changes
|
||||||
|
|
||||||
|
def delete(self):
|
||||||
|
state.db.session.delete(self)
|
||||||
|
state.db.session.commit()
|
||||||
|
|
|
@ -29,7 +29,7 @@ from Crypto.PublicKey import RSA
|
||||||
from Crypto.Util.asn1 import DerSequence
|
from Crypto.Util.asn1 import DerSequence
|
||||||
|
|
||||||
|
|
||||||
from meta.utils import normalize_isbn, find_isbns, get_language
|
from meta.utils import normalize_isbn, find_isbns, get_language, to_isbn13
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
Loading…
Reference in a new issue