find
This commit is contained in:
parent
a9c5fb43fe
commit
e41942ea99
28 changed files with 240 additions and 84 deletions
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
from flask import Flask
|
||||
from flask.ext.script import Manager
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
from flask.ext.script import Command
|
||||
|
||||
|
|
@ -10,7 +11,10 @@ class Setup(Command):
|
|||
"""
|
||||
def run(self):
|
||||
import setup
|
||||
import settings
|
||||
setup.create_default_lists()
|
||||
settings.db.session.connection().execute("PRAGMA journal_mode=WAL")
|
||||
settings.db.session.commit()
|
||||
|
||||
class UpdateStatic(Command):
|
||||
"""
|
||||
|
|
@ -28,8 +32,8 @@ class UpdateStatic(Command):
|
|||
oxjs = os.path.join(settings.static_path, 'oxjs')
|
||||
if not os.path.exists(oxjs):
|
||||
r('git', 'clone', 'https://git.0x2620.org/oxjs.git', oxjs)
|
||||
r('python', os.path.join(oxjs, 'tools', 'build', 'build.py'))
|
||||
r('python', os.path.join(settings.static_path, 'py', 'build.py'))
|
||||
r('python2', os.path.join(oxjs, 'tools', 'build', 'build.py'))
|
||||
r('python2', os.path.join(settings.static_path, 'py', 'build.py'))
|
||||
|
||||
class Release(Command):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
# DHT placeholder
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import ed25519
|
||||
ENCODING='base64'
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import base64
|
||||
import models
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from flask import json
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import sqlite3
|
||||
import Image
|
||||
|
|
|
|||
|
|
@ -1,3 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import models
|
||||
from copy import deepcopy
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import os
|
||||
import re
|
||||
|
|
@ -198,6 +199,8 @@ class Item(db.Model):
|
|||
if value:
|
||||
value = unicode(value)
|
||||
value = ox.sort_string(value).lower()
|
||||
elif isinstance(value, list): #empty list
|
||||
value = ''
|
||||
setattr(self, 'sort_%s' % key['id'], value)
|
||||
|
||||
def update_find(self):
|
||||
|
|
@ -295,11 +298,14 @@ class Item(db.Model):
|
|||
|
||||
def update_cover(self):
|
||||
cover = None
|
||||
if 'cover' in self.meta:
|
||||
if 'cover' in self.meta and self.meta['cover']:
|
||||
cover = ox.cache.read_url(self.meta['cover'])
|
||||
#covers[self.id] = requests.get(self.meta['cover']).content
|
||||
if cover:
|
||||
covers[self.id] = cover
|
||||
else:
|
||||
if covers[self.id]:
|
||||
del covers[self.id]
|
||||
path = self.get_path()
|
||||
if not cover and path:
|
||||
cover = self.extract_cover()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import unicodedata
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import settings
|
||||
import models
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
|
@ -77,7 +78,7 @@ def cover(id, size=None):
|
|||
if not 'coverRatio' in item.meta:
|
||||
#img = Image.open(StringIO(str(covers[id])))
|
||||
img = Image.open(StringIO(data))
|
||||
item.meta['coverRatio'] = float(img.size[0])/img.size[1]
|
||||
item.meta['coverRatio'] = img.size[0]/img.size[1]
|
||||
db.session.add(item)
|
||||
db.session.commit()
|
||||
resp = make_response(data)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import socket
|
||||
import thread
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import loc
|
|||
import lookupbyisbn
|
||||
import openlibrary
|
||||
import worldcat
|
||||
import google
|
||||
|
||||
providers = [
|
||||
('openlibrary', 'olid'),
|
||||
|
|
@ -17,9 +18,12 @@ providers = [
|
|||
]
|
||||
|
||||
def find(title, author=None, publisher=None, date=None):
|
||||
results = google.find(title=title, author=author, publisher=publisher, date=date)
|
||||
'''
|
||||
results = openlibrary.find(title=title, author=author, publisher=publisher, date=date)
|
||||
for r in results:
|
||||
r['mainid'] = 'olid'
|
||||
'''
|
||||
return results
|
||||
|
||||
def lookup(key, value):
|
||||
|
|
|
|||
38
oml/meta/google.py
Normal file
38
oml/meta/google.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import ox.web.google
|
||||
import stdnum.isbn
|
||||
|
||||
from .utils import find_isbns
|
||||
|
||||
|
||||
def find(title, author=None, publisher=None, date=None):
|
||||
print 'google.find', title, author, publisher, date
|
||||
query = title
|
||||
if author:
|
||||
if isinstance(author, list):
|
||||
author = ' '.join(author)
|
||||
query += ' ' + author
|
||||
query += ' isbn'
|
||||
isbns = []
|
||||
for r in ox.web.google.find(query):
|
||||
isbns += find_isbns(' '.join(r))
|
||||
|
||||
results = []
|
||||
done = set()
|
||||
for isbn in isbns:
|
||||
if isbn not in done:
|
||||
key = 'isbn%d'%len(isbn)
|
||||
#r = lookup(key, isbn)
|
||||
#r['mainid'] = key
|
||||
r = {
|
||||
key: isbn,
|
||||
'mainid': key
|
||||
}
|
||||
results.append(r)
|
||||
done.add(isbn)
|
||||
if len(isbn) == 10:
|
||||
done.add(stdnum.isbn.to_isbn13(isbn))
|
||||
return results
|
||||
|
|
@ -33,7 +33,10 @@ def lookup(id):
|
|||
info = {
|
||||
'lccn': id
|
||||
}
|
||||
info['title'] = ''.join([e.text for e in mods.findall(ns + 'titleInfo')[0]])
|
||||
title = mods.findall(ns + 'titleInfo')
|
||||
if not title:
|
||||
return {}
|
||||
info['title'] = ''.join([e.text for e in title[0]])
|
||||
origin = mods.findall(ns + 'originInfo')
|
||||
if origin:
|
||||
info['place'] = []
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ def get_ids(key, value):
|
|||
if m:
|
||||
asin = m[0].split('/')[-3]
|
||||
ids.append(('asin', asin))
|
||||
if key == 'isbn10':
|
||||
ids.append(('isbn13', stdnum.isbn.to_isbn13(value)))
|
||||
if key == 'asin':
|
||||
if stdnum.isbn.is_valid(value):
|
||||
ids.append(('isbn10', value))
|
||||
|
|
@ -47,14 +49,16 @@ def lookup(id):
|
|||
r[key] = int(r[key])
|
||||
desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
|
||||
desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
|
||||
r['description'] = desc
|
||||
if r['description'] == u'Description of this item is not available at this time.':
|
||||
r['description'] = ''
|
||||
r['description'] = decode_html(strip_tags(desc))
|
||||
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
|
||||
for key in r:
|
||||
if isinstance(r[key], basestring):
|
||||
r[key] = decode_html(strip_tags(r[key])).strip()
|
||||
if 'author' in r and isinstance(r['author'], basestring):
|
||||
if 'author' in r and isinstance(r['author'], basestring) and r['author']:
|
||||
r['author'] = [r['author']]
|
||||
else:
|
||||
r['author'] = []
|
||||
if r['description'].lower() == u'Description of this item is not available at this time.'.lower():
|
||||
r['description'] = ''
|
||||
return r
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,16 @@
|
|||
|
||||
import re
|
||||
import stdnum.isbn
|
||||
|
||||
def normalize_isbn(value):
|
||||
return ''.join([s for s in value if s.isdigit() or s == 'X'])
|
||||
|
||||
def find_isbns(text):
|
||||
matches = re.compile('\d[\d\-X\ ]+').findall(text)
|
||||
matches = [normalize_isbn(value) for value in matches]
|
||||
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
|
||||
and len(isbn) in (10, 13)
|
||||
and isbn not in (
|
||||
'0' * 10,
|
||||
'0' * 13,
|
||||
)]
|
||||
|
||||
|
|
|
|||
12
oml/utils.py
12
oml/utils.py
|
|
@ -8,7 +8,7 @@ import stdnum.isbn
|
|||
|
||||
import ox
|
||||
|
||||
from meta.utils import normalize_isbn
|
||||
from meta.utils import normalize_isbn, find_isbns
|
||||
|
||||
def valid_olid(id):
|
||||
return id.startswith('OL') and id.endswith('M')
|
||||
|
|
@ -76,16 +76,6 @@ def sort_title(title):
|
|||
title = re.sub(u'[\'!¿¡,\.;\-"\:\*\[\]]', '', title)
|
||||
return title.strip()
|
||||
|
||||
def find_isbns(text):
|
||||
matches = re.compile('\d[\d\-X\ ]+').findall(text)
|
||||
matches = [normalize_isbn(value) for value in matches]
|
||||
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
|
||||
and len(isbn) in (10, 13)
|
||||
and isbn not in (
|
||||
'0' * 10,
|
||||
'0' * 13,
|
||||
)]
|
||||
|
||||
def get_position_by_id(list, key):
|
||||
for i in range(0, len(list)):
|
||||
if list[i]['id'] == key:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue