scrape metadata in background

This commit is contained in:
j 2015-03-07 21:54:07 +05:30
parent e55df181db
commit 2711fbb0c1
6 changed files with 89 additions and 3 deletions

View file

@ -10,6 +10,8 @@ import state
import settings import settings
import update import update
from websocket import trigger_event
import logging import logging
logger = logging.getLogger('oml.downloads') logger = logging.getLogger('oml.downloads')
@ -51,3 +53,40 @@ class Downloads(Thread):
def join(self): def join(self):
self._running = False self._running = False
return Thread.join(self) return Thread.join(self)
class ScrapeThread(Thread):
def __init__(self):
self._running = True
Thread.__init__(self)
self.daemon = True
self.start()
def scrape_queue(self):
import item.models
scraped = False
for s in item.models.Scrape.query.filter(
item.models.Scrape.added!=None,
).order_by(item.models.Scrape.added):
if not self._running:
return False
logger.debug('scrape %s', s.item)
try:
s.item.scrape()
s.remove()
trigger_event('change', {})
scraped = True
except:
logger.debug('scrape failed %s', s.item, exc_info=1)
return scraped
def run(self):
time.sleep(2)
with db.session():
while self._running:
if not self.scrape_queue():
time.sleep(10)
def join(self):
self._running = False
return Thread.join(self)

View file

@ -324,6 +324,7 @@ class Item(db.Model):
if m: if m:
m['primaryid'] = primaryid m['primaryid'] = primaryid
self.meta = m self.meta = m
self.modified = datetime.utcnow()
self.update() self.update()
def queue_download(self): def queue_download(self):
@ -549,6 +550,38 @@ class File(db.Model):
state.db.session.add(self) state.db.session.add(self)
state.db.session.commit() state.db.session.commit()
class Scrape(db.Model):
__tablename__ = 'scrape'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic'))
added = sa.Column(sa.DateTime())
def __repr__(self):
return '='.join(map(str, [self.item_id, self.added]))
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
t = cls.get(item_id)
if not t:
t = cls(item_id=item_id)
t.added = datetime.utcnow()
t.save()
return t
def save(self):
state.db.session.add(self)
state.db.session.commit()
def remove(self):
state.db.session.delete(self)
state.db.session.commit()
class Transfer(db.Model): class Transfer(db.Model):
__tablename__ = 'transfer' __tablename__ = 'transfer'

View file

@ -10,7 +10,7 @@ import time
import ox import ox
from changelog import Changelog from changelog import Changelog
from item.models import File from item.models import File, Scrape
from user.models import List from user.models import List
from utils import remove_empty_folders from utils import remove_empty_folders
from websocket import trigger_event from websocket import trigger_event
@ -56,9 +56,10 @@ def add_file(id, f, prefix, from_=None):
if item.meta.get('primaryid'): if item.meta.get('primaryid'):
Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']])) Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']]))
item.added = datetime.utcnow() item.added = datetime.utcnow()
item.scrape()
item.update_icons() item.update_icons()
item.save() item.modified = datetime.utcnow()
item.update()
Scrape.get_or_create(item.id)
return file return file
def run_scan(): def run_scan():

View file

@ -90,6 +90,7 @@ def run():
state.node = node.server.start() state.node = node.server.start()
state.nodes = nodes.Nodes() state.nodes = nodes.Nodes()
state.downloads = downloads.Downloads() state.downloads = downloads.Downloads()
state.scraping = downloads.ScrapeThread()
def add_users(): def add_users():
with db.session(): with db.session():
for p in user.models.User.query.filter_by(peered=True): for p in user.models.User.query.filter_by(peered=True):
@ -112,6 +113,8 @@ def run():
state.tasks.join() state.tasks.join()
if state.nodes: if state.nodes:
state.nodes.join() state.nodes.join()
if state.scraping:
state.scraping.join()
http_server.stop() http_server.stop()
if PID and os.path.exists(PID): if PID and os.path.exists(PID):
os.unlink(PID) os.unlink(PID)

View file

@ -203,6 +203,15 @@ def upgrade_db(old, new=None):
if old <= '20140527-120-3cb9819': if old <= '20140527-120-3cb9819':
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)') run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
if old <= '20150307-272-557f4d3':
run_sql('''CREATE TABLE scrape (
item_id VARCHAR(32) NOT NULL,
added DATETIME,
PRIMARY KEY (item_id),
FOREIGN KEY(item_id) REFERENCES item (id)
)''')
run_sql('CREATE INDEX idx_scrape_added ON scrape (added)')
def create_default_lists(user_id=None): def create_default_lists(user_id=None):
with db.session(): with db.session():
user_id = user_id or settings.USER_ID user_id = user_id or settings.USER_ID

View file

@ -333,6 +333,7 @@ oml.ui.folders = function() {
}, },
change: function(data) { change: function(data) {
Ox.print('got change event') Ox.print('got change event')
Ox.Request.clearCache();
}, },
'peering.accept': function(data) { 'peering.accept': function(data) {
Ox.print('peering.accept reload list') Ox.print('peering.accept reload list')