scrape metadata in background
This commit is contained in:
parent
e55df181db
commit
2711fbb0c1
6 changed files with 89 additions and 3 deletions
|
@ -10,6 +10,8 @@ import state
|
|||
import settings
|
||||
import update
|
||||
|
||||
from websocket import trigger_event
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('oml.downloads')
|
||||
|
||||
|
@ -51,3 +53,40 @@ class Downloads(Thread):
|
|||
def join(self):
|
||||
self._running = False
|
||||
return Thread.join(self)
|
||||
|
||||
class ScrapeThread(Thread):
|
||||
|
||||
def __init__(self):
|
||||
self._running = True
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self.start()
|
||||
|
||||
def scrape_queue(self):
|
||||
import item.models
|
||||
scraped = False
|
||||
for s in item.models.Scrape.query.filter(
|
||||
item.models.Scrape.added!=None,
|
||||
).order_by(item.models.Scrape.added):
|
||||
if not self._running:
|
||||
return False
|
||||
logger.debug('scrape %s', s.item)
|
||||
try:
|
||||
s.item.scrape()
|
||||
s.remove()
|
||||
trigger_event('change', {})
|
||||
scraped = True
|
||||
except:
|
||||
logger.debug('scrape failed %s', s.item, exc_info=1)
|
||||
return scraped
|
||||
|
||||
def run(self):
|
||||
time.sleep(2)
|
||||
with db.session():
|
||||
while self._running:
|
||||
if not self.scrape_queue():
|
||||
time.sleep(10)
|
||||
|
||||
def join(self):
|
||||
self._running = False
|
||||
return Thread.join(self)
|
||||
|
|
|
@ -324,6 +324,7 @@ class Item(db.Model):
|
|||
if m:
|
||||
m['primaryid'] = primaryid
|
||||
self.meta = m
|
||||
self.modified = datetime.utcnow()
|
||||
self.update()
|
||||
|
||||
def queue_download(self):
|
||||
|
@ -549,6 +550,38 @@ class File(db.Model):
|
|||
state.db.session.add(self)
|
||||
state.db.session.commit()
|
||||
|
||||
class Scrape(db.Model):
|
||||
|
||||
__tablename__ = 'scrape'
|
||||
|
||||
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
|
||||
item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic'))
|
||||
|
||||
added = sa.Column(sa.DateTime())
|
||||
|
||||
def __repr__(self):
|
||||
return '='.join(map(str, [self.item_id, self.added]))
|
||||
|
||||
@classmethod
|
||||
def get(cls, item_id):
|
||||
return cls.query.filter_by(item_id=item_id).first()
|
||||
|
||||
@classmethod
|
||||
def get_or_create(cls, item_id):
|
||||
t = cls.get(item_id)
|
||||
if not t:
|
||||
t = cls(item_id=item_id)
|
||||
t.added = datetime.utcnow()
|
||||
t.save()
|
||||
return t
|
||||
|
||||
def save(self):
|
||||
state.db.session.add(self)
|
||||
state.db.session.commit()
|
||||
|
||||
def remove(self):
|
||||
state.db.session.delete(self)
|
||||
state.db.session.commit()
|
||||
|
||||
class Transfer(db.Model):
|
||||
__tablename__ = 'transfer'
|
||||
|
|
|
@ -10,7 +10,7 @@ import time
|
|||
import ox
|
||||
|
||||
from changelog import Changelog
|
||||
from item.models import File
|
||||
from item.models import File, Scrape
|
||||
from user.models import List
|
||||
from utils import remove_empty_folders
|
||||
from websocket import trigger_event
|
||||
|
@ -56,9 +56,10 @@ def add_file(id, f, prefix, from_=None):
|
|||
if item.meta.get('primaryid'):
|
||||
Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']]))
|
||||
item.added = datetime.utcnow()
|
||||
item.scrape()
|
||||
item.update_icons()
|
||||
item.save()
|
||||
item.modified = datetime.utcnow()
|
||||
item.update()
|
||||
Scrape.get_or_create(item.id)
|
||||
return file
|
||||
|
||||
def run_scan():
|
||||
|
|
|
@ -90,6 +90,7 @@ def run():
|
|||
state.node = node.server.start()
|
||||
state.nodes = nodes.Nodes()
|
||||
state.downloads = downloads.Downloads()
|
||||
state.scraping = downloads.ScrapeThread()
|
||||
def add_users():
|
||||
with db.session():
|
||||
for p in user.models.User.query.filter_by(peered=True):
|
||||
|
@ -112,6 +113,8 @@ def run():
|
|||
state.tasks.join()
|
||||
if state.nodes:
|
||||
state.nodes.join()
|
||||
if state.scraping:
|
||||
state.scraping.join()
|
||||
http_server.stop()
|
||||
if PID and os.path.exists(PID):
|
||||
os.unlink(PID)
|
||||
|
|
|
@ -203,6 +203,15 @@ def upgrade_db(old, new=None):
|
|||
if old <= '20140527-120-3cb9819':
|
||||
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
|
||||
|
||||
if old <= '20150307-272-557f4d3':
|
||||
run_sql('''CREATE TABLE scrape (
|
||||
item_id VARCHAR(32) NOT NULL,
|
||||
added DATETIME,
|
||||
PRIMARY KEY (item_id),
|
||||
FOREIGN KEY(item_id) REFERENCES item (id)
|
||||
)''')
|
||||
run_sql('CREATE INDEX idx_scrape_added ON scrape (added)')
|
||||
|
||||
def create_default_lists(user_id=None):
|
||||
with db.session():
|
||||
user_id = user_id or settings.USER_ID
|
||||
|
|
|
@ -333,6 +333,7 @@ oml.ui.folders = function() {
|
|||
},
|
||||
change: function(data) {
|
||||
Ox.print('got change event')
|
||||
Ox.Request.clearCache();
|
||||
},
|
||||
'peering.accept': function(data) {
|
||||
Ox.print('peering.accept reload list')
|
||||
|
|
Loading…
Reference in a new issue