scrape metadata in background

This commit is contained in:
j 2015-03-07 21:54:07 +05:30
parent e55df181db
commit 2711fbb0c1
6 changed files with 89 additions and 3 deletions

View file

@ -10,6 +10,8 @@ import state
import settings
import update
from websocket import trigger_event
import logging
logger = logging.getLogger('oml.downloads')
@ -51,3 +53,40 @@ class Downloads(Thread):
def join(self):
self._running = False
return Thread.join(self)
class ScrapeThread(Thread):
def __init__(self):
self._running = True
Thread.__init__(self)
self.daemon = True
self.start()
def scrape_queue(self):
import item.models
scraped = False
for s in item.models.Scrape.query.filter(
item.models.Scrape.added!=None,
).order_by(item.models.Scrape.added):
if not self._running:
return False
logger.debug('scrape %s', s.item)
try:
s.item.scrape()
s.remove()
trigger_event('change', {})
scraped = True
except:
logger.debug('scrape failed %s', s.item, exc_info=1)
return scraped
def run(self):
time.sleep(2)
with db.session():
while self._running:
if not self.scrape_queue():
time.sleep(10)
def join(self):
self._running = False
return Thread.join(self)

View file

@ -324,6 +324,7 @@ class Item(db.Model):
if m:
m['primaryid'] = primaryid
self.meta = m
self.modified = datetime.utcnow()
self.update()
def queue_download(self):
@ -549,6 +550,38 @@ class File(db.Model):
state.db.session.add(self)
state.db.session.commit()
class Scrape(db.Model):
__tablename__ = 'scrape'
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic'))
added = sa.Column(sa.DateTime())
def __repr__(self):
return '='.join(map(str, [self.item_id, self.added]))
@classmethod
def get(cls, item_id):
return cls.query.filter_by(item_id=item_id).first()
@classmethod
def get_or_create(cls, item_id):
t = cls.get(item_id)
if not t:
t = cls(item_id=item_id)
t.added = datetime.utcnow()
t.save()
return t
def save(self):
state.db.session.add(self)
state.db.session.commit()
def remove(self):
state.db.session.delete(self)
state.db.session.commit()
class Transfer(db.Model):
__tablename__ = 'transfer'

View file

@ -10,7 +10,7 @@ import time
import ox
from changelog import Changelog
from item.models import File
from item.models import File, Scrape
from user.models import List
from utils import remove_empty_folders
from websocket import trigger_event
@ -56,9 +56,10 @@ def add_file(id, f, prefix, from_=None):
if item.meta.get('primaryid'):
Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']]))
item.added = datetime.utcnow()
item.scrape()
item.update_icons()
item.save()
item.modified = datetime.utcnow()
item.update()
Scrape.get_or_create(item.id)
return file
def run_scan():

View file

@ -90,6 +90,7 @@ def run():
state.node = node.server.start()
state.nodes = nodes.Nodes()
state.downloads = downloads.Downloads()
state.scraping = downloads.ScrapeThread()
def add_users():
with db.session():
for p in user.models.User.query.filter_by(peered=True):
@ -112,6 +113,8 @@ def run():
state.tasks.join()
if state.nodes:
state.nodes.join()
if state.scraping:
state.scraping.join()
http_server.stop()
if PID and os.path.exists(PID):
os.unlink(PID)

View file

@ -203,6 +203,15 @@ def upgrade_db(old, new=None):
if old <= '20140527-120-3cb9819':
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
if old <= '20150307-272-557f4d3':
run_sql('''CREATE TABLE scrape (
item_id VARCHAR(32) NOT NULL,
added DATETIME,
PRIMARY KEY (item_id),
FOREIGN KEY(item_id) REFERENCES item (id)
)''')
run_sql('CREATE INDEX idx_scrape_added ON scrape (added)')
def create_default_lists(user_id=None):
with db.session():
user_id = user_id or settings.USER_ID

View file

@ -333,6 +333,7 @@ oml.ui.folders = function() {
},
change: function(data) {
Ox.print('got change event')
Ox.Request.clearCache();
},
'peering.accept': function(data) {
Ox.print('peering.accept reload list')