scrape metadata in background
This commit is contained in:
parent
e55df181db
commit
2711fbb0c1
6 changed files with 89 additions and 3 deletions
|
@ -10,6 +10,8 @@ import state
|
||||||
import settings
|
import settings
|
||||||
import update
|
import update
|
||||||
|
|
||||||
|
from websocket import trigger_event
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger('oml.downloads')
|
logger = logging.getLogger('oml.downloads')
|
||||||
|
|
||||||
|
@ -51,3 +53,40 @@ class Downloads(Thread):
|
||||||
def join(self):
|
def join(self):
|
||||||
self._running = False
|
self._running = False
|
||||||
return Thread.join(self)
|
return Thread.join(self)
|
||||||
|
|
||||||
|
class ScrapeThread(Thread):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._running = True
|
||||||
|
Thread.__init__(self)
|
||||||
|
self.daemon = True
|
||||||
|
self.start()
|
||||||
|
|
||||||
|
def scrape_queue(self):
|
||||||
|
import item.models
|
||||||
|
scraped = False
|
||||||
|
for s in item.models.Scrape.query.filter(
|
||||||
|
item.models.Scrape.added!=None,
|
||||||
|
).order_by(item.models.Scrape.added):
|
||||||
|
if not self._running:
|
||||||
|
return False
|
||||||
|
logger.debug('scrape %s', s.item)
|
||||||
|
try:
|
||||||
|
s.item.scrape()
|
||||||
|
s.remove()
|
||||||
|
trigger_event('change', {})
|
||||||
|
scraped = True
|
||||||
|
except:
|
||||||
|
logger.debug('scrape failed %s', s.item, exc_info=1)
|
||||||
|
return scraped
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
time.sleep(2)
|
||||||
|
with db.session():
|
||||||
|
while self._running:
|
||||||
|
if not self.scrape_queue():
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
def join(self):
|
||||||
|
self._running = False
|
||||||
|
return Thread.join(self)
|
||||||
|
|
|
@ -324,6 +324,7 @@ class Item(db.Model):
|
||||||
if m:
|
if m:
|
||||||
m['primaryid'] = primaryid
|
m['primaryid'] = primaryid
|
||||||
self.meta = m
|
self.meta = m
|
||||||
|
self.modified = datetime.utcnow()
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
def queue_download(self):
|
def queue_download(self):
|
||||||
|
@ -549,6 +550,38 @@ class File(db.Model):
|
||||||
state.db.session.add(self)
|
state.db.session.add(self)
|
||||||
state.db.session.commit()
|
state.db.session.commit()
|
||||||
|
|
||||||
|
class Scrape(db.Model):
|
||||||
|
|
||||||
|
__tablename__ = 'scrape'
|
||||||
|
|
||||||
|
item_id = sa.Column(sa.String(32), sa.ForeignKey('item.id'), primary_key=True)
|
||||||
|
item = sa.orm.relationship('Item', backref=sa.orm.backref('scraping', lazy='dynamic'))
|
||||||
|
|
||||||
|
added = sa.Column(sa.DateTime())
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '='.join(map(str, [self.item_id, self.added]))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get(cls, item_id):
|
||||||
|
return cls.query.filter_by(item_id=item_id).first()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_or_create(cls, item_id):
|
||||||
|
t = cls.get(item_id)
|
||||||
|
if not t:
|
||||||
|
t = cls(item_id=item_id)
|
||||||
|
t.added = datetime.utcnow()
|
||||||
|
t.save()
|
||||||
|
return t
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
state.db.session.add(self)
|
||||||
|
state.db.session.commit()
|
||||||
|
|
||||||
|
def remove(self):
|
||||||
|
state.db.session.delete(self)
|
||||||
|
state.db.session.commit()
|
||||||
|
|
||||||
class Transfer(db.Model):
|
class Transfer(db.Model):
|
||||||
__tablename__ = 'transfer'
|
__tablename__ = 'transfer'
|
||||||
|
|
|
@ -10,7 +10,7 @@ import time
|
||||||
import ox
|
import ox
|
||||||
|
|
||||||
from changelog import Changelog
|
from changelog import Changelog
|
||||||
from item.models import File
|
from item.models import File, Scrape
|
||||||
from user.models import List
|
from user.models import List
|
||||||
from utils import remove_empty_folders
|
from utils import remove_empty_folders
|
||||||
from websocket import trigger_event
|
from websocket import trigger_event
|
||||||
|
@ -56,9 +56,10 @@ def add_file(id, f, prefix, from_=None):
|
||||||
if item.meta.get('primaryid'):
|
if item.meta.get('primaryid'):
|
||||||
Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']]))
|
Changelog.record(user, 'edititem', item.id, dict([item.meta['primaryid']]))
|
||||||
item.added = datetime.utcnow()
|
item.added = datetime.utcnow()
|
||||||
item.scrape()
|
|
||||||
item.update_icons()
|
item.update_icons()
|
||||||
item.save()
|
item.modified = datetime.utcnow()
|
||||||
|
item.update()
|
||||||
|
Scrape.get_or_create(item.id)
|
||||||
return file
|
return file
|
||||||
|
|
||||||
def run_scan():
|
def run_scan():
|
||||||
|
|
|
@ -90,6 +90,7 @@ def run():
|
||||||
state.node = node.server.start()
|
state.node = node.server.start()
|
||||||
state.nodes = nodes.Nodes()
|
state.nodes = nodes.Nodes()
|
||||||
state.downloads = downloads.Downloads()
|
state.downloads = downloads.Downloads()
|
||||||
|
state.scraping = downloads.ScrapeThread()
|
||||||
def add_users():
|
def add_users():
|
||||||
with db.session():
|
with db.session():
|
||||||
for p in user.models.User.query.filter_by(peered=True):
|
for p in user.models.User.query.filter_by(peered=True):
|
||||||
|
@ -112,6 +113,8 @@ def run():
|
||||||
state.tasks.join()
|
state.tasks.join()
|
||||||
if state.nodes:
|
if state.nodes:
|
||||||
state.nodes.join()
|
state.nodes.join()
|
||||||
|
if state.scraping:
|
||||||
|
state.scraping.join()
|
||||||
http_server.stop()
|
http_server.stop()
|
||||||
if PID and os.path.exists(PID):
|
if PID and os.path.exists(PID):
|
||||||
os.unlink(PID)
|
os.unlink(PID)
|
||||||
|
|
|
@ -203,6 +203,15 @@ def upgrade_db(old, new=None):
|
||||||
if old <= '20140527-120-3cb9819':
|
if old <= '20140527-120-3cb9819':
|
||||||
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
|
run_sql('CREATE INDEX ix_find_findvalue ON find (findvalue)')
|
||||||
|
|
||||||
|
if old <= '20150307-272-557f4d3':
|
||||||
|
run_sql('''CREATE TABLE scrape (
|
||||||
|
item_id VARCHAR(32) NOT NULL,
|
||||||
|
added DATETIME,
|
||||||
|
PRIMARY KEY (item_id),
|
||||||
|
FOREIGN KEY(item_id) REFERENCES item (id)
|
||||||
|
)''')
|
||||||
|
run_sql('CREATE INDEX idx_scrape_added ON scrape (added)')
|
||||||
|
|
||||||
def create_default_lists(user_id=None):
|
def create_default_lists(user_id=None):
|
||||||
with db.session():
|
with db.session():
|
||||||
user_id = user_id or settings.USER_ID
|
user_id = user_id or settings.USER_ID
|
||||||
|
|
|
@ -333,6 +333,7 @@ oml.ui.folders = function() {
|
||||||
},
|
},
|
||||||
change: function(data) {
|
change: function(data) {
|
||||||
Ox.print('got change event')
|
Ox.print('got change event')
|
||||||
|
Ox.Request.clearCache();
|
||||||
},
|
},
|
||||||
'peering.accept': function(data) {
|
'peering.accept': function(data) {
|
||||||
Ox.print('peering.accept reload list')
|
Ox.print('peering.accept reload list')
|
||||||
|
|
Loading…
Reference in a new issue