find

2014-05-16 10:06:11 +02:00 · 2014-05-16 10:06:11 +02:00 · e41942ea99
commit e41942ea99
parent a9c5fb43fe
28 changed files with 240 additions and 84 deletions
--- a/oml/main.py
+++ b/oml/main.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import os
 import sys
--- a/oml/app.py
+++ b/oml/app.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 from flask import Flask
 from flask.ext.script import Manager
--- a/oml/changelog.py
+++ b/oml/changelog.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import json
 from datetime import datetime
--- a/oml/commands.py
+++ b/oml/commands.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 from flask.ext.script import Command

@ -10,7 +11,10 @@ class Setup(Command):
        """
        def run(self):
            import setup
+            import settings
            setup.create_default_lists()
+            settings.db.session.connection().execute("PRAGMA journal_mode=WAL")
+            settings.db.session.commit()

 class UpdateStatic(Command):
        """
@ -28,8 +32,8 @@ class UpdateStatic(Command):
            oxjs = os.path.join(settings.static_path, 'oxjs')
            if not os.path.exists(oxjs):
                r('git', 'clone', 'https://git.0x2620.org/oxjs.git', oxjs)
-            r('python', os.path.join(oxjs, 'tools', 'build', 'build.py'))
-            r('python', os.path.join(settings.static_path, 'py', 'build.py'))
+            r('python2', os.path.join(oxjs, 'tools', 'build', 'build.py'))
+            r('python2', os.path.join(settings.static_path, 'py', 'build.py'))

 class Release(Command):
        """
--- a/oml/directory.py
+++ b/oml/directory.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 # DHT placeholder

--- a/oml/ed25519_utils.py
+++ b/oml/ed25519_utils.py
@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division
+
 import ed25519
 ENCODING='base64'

--- a/oml/item/add.py
+++ b/oml/item/add.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import base64
 import models
--- a/oml/item/api.py
+++ b/oml/item/api.py
@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division
+
 from datetime import datetime

 from flask import json
--- a/oml/item/covers.py
+++ b/oml/item/covers.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import sqlite3
 import Image
--- a/oml/item/migrate.py
+++ b/oml/item/migrate.py
@ -1,3 +1,7 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division
+
 import models
 from copy import deepcopy

--- a/oml/item/models.py
+++ b/oml/item/models.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import os
 import re
@ -198,6 +199,8 @@ class Item(db.Model):
                        if value:
                            value = unicode(value)
                            value = ox.sort_string(value).lower()
+                elif isinstance(value, list): #empty list
+                    value = ''
                setattr(self, 'sort_%s' % key['id'], value)

    def update_find(self):
@ -295,11 +298,14 @@ class Item(db.Model):

    def update_cover(self):
        cover = None
-        if 'cover' in self.meta:
+        if 'cover' in self.meta and self.meta['cover']:
            cover = ox.cache.read_url(self.meta['cover'])
            #covers[self.id] = requests.get(self.meta['cover']).content
            if cover:
                covers[self.id] = cover
+        else:
+            if covers[self.id]:
+                del covers[self.id]
        path = self.get_path()
        if not cover and path:
            cover = self.extract_cover()
--- a/oml/item/person.py
+++ b/oml/item/person.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import unicodedata

--- a/oml/item/query.py
+++ b/oml/item/query.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import settings
 import models
--- a/oml/item/views.py
+++ b/oml/item/views.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import os
 from datetime import datetime
@ -77,7 +78,7 @@ def cover(id, size=None):
    if not 'coverRatio' in item.meta:
        #img = Image.open(StringIO(str(covers[id])))
        img = Image.open(StringIO(data))
-        item.meta['coverRatio'] = float(img.size[0])/img.size[1]
+        item.meta['coverRatio'] = img.size[0]/img.size[1]
        db.session.add(item)
        db.session.commit()
    resp = make_response(data)
--- a/oml/localnodes.py
+++ b/oml/localnodes.py
@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division

 import socket
 import thread
--- a/oml/meta/init.py
+++ b/oml/meta/init.py
@ -7,6 +7,7 @@ import loc
 import lookupbyisbn
 import openlibrary
 import worldcat
+import google

 providers = [
    ('openlibrary', 'olid'),
@ -17,9 +18,12 @@ providers = [
 ]

 def find(title, author=None, publisher=None, date=None):
+    results = google.find(title=title, author=author, publisher=publisher, date=date)
+    '''
    results = openlibrary.find(title=title, author=author, publisher=publisher, date=date)
    for r in results:
        r['mainid'] = 'olid'
+    '''
    return results

 def lookup(key, value):
--- a/oml/meta/google.py
+++ b/oml/meta/google.py
@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from __future__ import division
+
+import ox.web.google
+import stdnum.isbn
+
+from .utils import find_isbns
+
+
+def find(title, author=None, publisher=None, date=None):
+    print 'google.find', title, author, publisher, date
+    query = title
+    if author:
+        if isinstance(author, list):
+            author = ' '.join(author)
+        query += ' ' + author
+    query += ' isbn'
+    isbns = []
+    for r in ox.web.google.find(query):
+        isbns += find_isbns(' '.join(r))
+
+    results = []
+    done = set()
+    for isbn in isbns:
+        if isbn not in done:
+            key = 'isbn%d'%len(isbn)
+            #r = lookup(key, isbn)
+            #r['mainid'] = key
+            r = {
+                key: isbn,
+                'mainid': key
+            }
+            results.append(r)
+            done.add(isbn)
+            if len(isbn) == 10:
+                done.add(stdnum.isbn.to_isbn13(isbn))
+    return results
--- a/oml/meta/loc.py
+++ b/oml/meta/loc.py
@ -33,7 +33,10 @@ def lookup(id):
    info = {
        'lccn': id
    }
-    info['title'] = ''.join([e.text for e in mods.findall(ns + 'titleInfo')[0]])
+    title = mods.findall(ns + 'titleInfo')
+    if not title:
+        return {}
+    info['title'] = ''.join([e.text for e in title[0]])
    origin = mods.findall(ns + 'originInfo')
    if origin:
        info['place'] = []
--- a/oml/meta/lookupbyisbn.py
+++ b/oml/meta/lookupbyisbn.py
@ -14,6 +14,8 @@ def get_ids(key, value):
        if m:
            asin = m[0].split('/')[-3]
            ids.append(('asin', asin))
+    if key == 'isbn10':
+        ids.append(('isbn13', stdnum.isbn.to_isbn13(value)))
    if key == 'asin':
        if stdnum.isbn.is_valid(value):
            ids.append(('isbn10', value))
@ -47,14 +49,16 @@ def lookup(id):
            r[key] = int(r[key])
    desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
    desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
-    r['description'] = desc
-    if r['description'] == u'Description of this item is not available at this time.':
-        r['description'] = ''
+    r['description'] = decode_html(strip_tags(desc))
    r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
    for key in r:
        if isinstance(r[key], basestring):
            r[key] = decode_html(strip_tags(r[key])).strip()
-    if 'author' in r and isinstance(r['author'], basestring):
+    if 'author' in r and isinstance(r['author'], basestring) and r['author']:
        r['author'] = [r['author']]
+    else:
+        r['author'] = []
+    if r['description'].lower() == u'Description of this item is not available at this time.'.lower():
+        r['description'] = ''
    return r

--- a/oml/meta/utils.py
+++ b/oml/meta/utils.py
@ -1,5 +1,16 @@
-
+import re
+import stdnum.isbn

 def normalize_isbn(value):
    return ''.join([s for s in value if s.isdigit() or s == 'X'])

+def find_isbns(text):
+    matches = re.compile('\d[\d\-X\ ]+').findall(text)
+    matches = [normalize_isbn(value) for value in matches]
+    return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
+        and len(isbn) in (10, 13)
+        and isbn not in (
+        '0' * 10,
+        '0' * 13,
+    )]
+
--- a/oml/utils.py
+++ b/oml/utils.py
@ -8,7 +8,7 @@ import stdnum.isbn

 import ox

-from meta.utils import normalize_isbn
+from meta.utils import normalize_isbn, find_isbns

 def valid_olid(id):
    return id.startswith('OL') and id.endswith('M')
@ -76,16 +76,6 @@ def sort_title(title):
    title = re.sub(u'[\'!¿¡,\.;\-"\:\*\[\]]', '', title)
    return title.strip()

-def find_isbns(text):
-    matches = re.compile('\d[\d\-X\ ]+').findall(text)
-    matches = [normalize_isbn(value) for value in matches]
-    return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
-        and len(isbn) in (10, 13)
-        and isbn not in (
-        '0' * 10,
-        '0' * 13,
-    )]
-
 def get_position_by_id(list, key):
    for i in range(0, len(list)):
        if list[i]['id'] == key: