From 2711fbb0c1d9f5f471414b6d45f7e5ebbe5213bd Mon Sep 17 00:00:00 2001 From: j Date: Sat, 7 Mar 2015 21:54:07 +0530 Subject: [PATCH] scrape metadata in background --- oml/downloads.py | 39 +++++++++++++++++++++++++++++++++++++++ oml/item/models.py | 33 +++++++++++++++++++++++++++++++++ oml/item/scan.py | 7 ++++--- oml/server.py | 3 +++ oml/setup.py | 9 +++++++++ static/js/folders.js | 1 + 6 files changed, 89 insertions(+), 3 deletions(-) diff --git a/oml/downloads.py b/oml/downloads.py index 943d68e..e8c375a 100644 --- a/oml/downloads.py +++ b/oml/downloads.py @@ -10,6 +10,8 @@ import state import settings import update +from websocket import trigger_event + import logging logger = logging.getLogger('oml.downloads') @@ -51,3 +53,40 @@ class Downloads(Thread): def join(self): self._running = False return Thread.join(self) + +class ScrapeThread(Thread): + + def __init__(self): + self._running = True + Thread.__init__(self) + self.daemon = True + self.start() + + def scrape_queue(self): + import item.models + scraped = False + for s in item.models.Scrape.query.filter( + item.models.Scrape.added!=None, + ).order_by(item.models.Scrape.added): + if not self._running: + return False + logger.debug('scrape %s', s.item) + try: + s.item.scrape() + s.remove() + trigger_event('change', {}) + scraped = True + except: + logger.debug('scrape failed %s', s.item, exc_info=1) + return scraped + + def run(self): + time.sleep(2) + with db.session(): + while self._running: + if not self.scrape_queue(): + time.sleep(10) + + def join(self): + self._running = False + return Thread.join(self) diff --git a/oml/item/models.py b/oml/item/models.py index 31052be..d457042 100644 --- a/oml/item/models.py +++ b/oml/item/models.py @@ -324,6 +324,7 @@ class Item(db.Model): if m: m['primaryid'] = primaryid self.meta = m + self.modified = datetime.utcnow() self.update() def queue_download(self): @@ -549,6 +550,38 @@ class File(db.Model): state.db.session.add(self) state.db.session.commit() +class Scrape(db.Model): + + __tablename__ = 'scrape' + + item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True) + item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic')) + + added = sa.Column(sa.DateTime()) + + def __repr__(self): + return '='.join(map(str, [self.item_id, self.added])) + + @classmethod + def get(cls, item_id): + return cls.query.filter_by(item_id=item_id).first() + + @classmethod + def get_or_create(cls, item_id): + t = cls.get(item_id) + if not t: + t = cls(item_id=item_id) + t.added = datetime.utcnow() + t.save() + return t + + def save(self): + state.db.session.add(self) + state.db.session.commit() + + def remove(self): + state.db.session.delete(self) + state.db.session.commit() class Transfer(db.Model): __tablename__ = 'transfer' diff --git a/oml/item/scan.py b/oml/item/scan.py index 69fa406..cefe8ab 100644 --- a/oml/item/scan.py +++ b/oml/item/scan.py @@ -10,7 +10,7 @@ import time import ox from changelog import Changelog -from item.models import File +from item.models import File, Scrape from user.models import List from utils import remove_empty_folders from websocket import trigger_event @@ -56,9 +56,10 @@ def add_file(id, f, prefix, from_=None): if item.meta.get('primaryid'): Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']])) item.added = datetime.utcnow() - item.scrape() item.update_icons() - item.save() + item.modified = datetime.utcnow() + item.update() + Scrape.get_or_create(item.id) return file def run_scan(): diff --git a/oml/server.py b/oml/server.py index d80d5a4..e39e73a 100644 --- a/oml/server.py +++ b/oml/server.py @@ -90,6 +90,7 @@ def run(): state.node = node.server.start() state.nodes = nodes.Nodes() state.downloads = downloads.Downloads() + state.scraping = downloads.ScrapeThread() def add_users(): with db.session(): for p in user.models.User.query.filter_by(peered=True): @@ -112,6 +113,8 @@ def run(): state.tasks.join() if state.nodes: state.nodes.join() + if state.scraping: + state.scraping.join() http_server.stop() if PID and os.path.exists(PID): os.unlink(PID) diff --git a/oml/setup.py b/oml/setup.py index bc11f6c..c830094 100644 --- a/oml/setup.py +++ b/oml/setup.py @@ -203,6 +203,15 @@ def upgrade_db(old, new=None): if old <= '20140527-120-3cb9819': run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)') + if old <= '20150307-272-557f4d3': + run_sql('''CREATE TABLE scrape ( + item_id VARCHAR(32) NOT NULL, + added DATETIME, + PRIMARY KEY (item_id), + FOREIGN KEY(item_id) REFERENCES item (id) +)''') + run_sql('CREATE INDEX idx_scrape_added ON scrape (added)') + def create_default_lists(user_id=None): with db.session(): user_id = user_id or settings.USER_ID diff --git a/static/js/folders.js b/static/js/folders.js index bedf813..c5a4b6d 100644 --- a/static/js/folders.js +++ b/static/js/folders.js @@ -333,6 +333,7 @@ oml.ui.folders = function() { }, change: function(data) { Ox.print('got change event') + Ox.Request.clearCache(); }, 'peering.accept': function(data) { Ox.print('peering.accept reload list')