openmedialibrary/oml/item/scan.py

388 lines
13 KiB
Python
Raw Normal View History

2014-05-04 17:26:43 +00:00
# -*- coding: utf-8 -*-
2014-09-02 22:32:44 +00:00
2014-05-04 17:26:43 +00:00
2014-08-12 08:16:57 +00:00
from datetime import datetime
2014-05-04 17:26:43 +00:00
import os
import shutil
2016-02-23 11:57:15 +00:00
import stat
import time
2019-01-14 11:40:40 +00:00
import unicodedata
2014-05-04 17:26:43 +00:00
import ox
2017-06-03 20:50:14 +00:00
from changelog import add_record
2016-02-13 11:40:37 +00:00
from item.models import File, Item
2014-08-12 08:16:57 +00:00
from user.models import List
2019-01-14 15:02:34 +00:00
from utils import remove_empty_folders, same_path
2014-05-04 17:26:43 +00:00
from websocket import trigger_event
2014-08-12 08:16:57 +00:00
import db
import media
import settings
2014-05-17 00:14:15 +00:00
import state
2014-05-18 23:24:04 +00:00
import logging
2015-11-29 14:56:38 +00:00
logger = logging.getLogger(__name__)
2014-05-04 17:26:43 +00:00
2015-03-14 07:35:15 +00:00
extensions = ['epub', 'pdf', 'txt', 'cbr', 'cbz']
2014-05-16 14:30:16 +00:00
2016-02-13 11:40:37 +00:00
def remove_missing(books=None):
2014-05-04 17:26:43 +00:00
dirty = False
2016-02-13 11:40:37 +00:00
logger.debug('remove missing')
2016-02-14 08:55:54 +00:00
prefix = get_prefix()
oml_prefix = os.path.dirname(prefix)
2016-02-13 11:40:37 +00:00
if books is None:
2016-02-14 08:55:54 +00:00
books = collect_books(prefix)
2014-08-09 16:14:14 +00:00
with db.session():
if os.path.exists(prefix) and os.path.exists(oml_prefix):
2016-02-13 11:40:37 +00:00
logger.debug('scan for removed files')
db_paths = []
items = {}
2014-05-20 10:30:53 +00:00
for f in File.query:
if state.shutdown:
2015-11-18 00:27:53 +00:00
return
2016-02-13 11:40:37 +00:00
path = f.fullpath()
db_paths.append(path)
2016-03-28 13:51:08 +00:00
if f.item:
items[path] = f.sha1
else:
logger.debug('remove orphaned file %s', f)
2016-03-28 13:51:08 +00:00
state.db.session.delete(f)
dirty = True
if dirty:
state.db.session.commit()
dirty = False
nfc_books = {unicodedata.normalize('NFC', path) for path in books}
removed = [
path for path in db_paths
if unicodedata.normalize('NFC', path) not in nfc_books
]
if removed and os.path.exists(prefix) and os.path.exists(oml_prefix):
2016-02-13 11:40:37 +00:00
logger.debug('%s files removed', len(removed))
ids = [items[path] for path in removed]
if ids:
orphaned = set(ids)
for i in Item.query.filter(Item.id.in_(ids)):
if state.shutdown:
continue
i.missing_file()
orphaned.remove(i.id)
dirty = True
if orphaned:
logger.debug('%s files orphaned', len(orphaned))
for f in File.query.filter(File.sha1.in_(orphaned)):
if state.shutdown:
continue
state.db.session.delete(f)
dirty = True
2014-05-20 10:30:53 +00:00
if dirty:
2014-08-09 16:14:14 +00:00
state.db.session.commit()
state.cache.clear('group:')
2016-02-13 11:40:37 +00:00
logger.debug('update filenames')
2016-01-14 10:44:11 +00:00
for f in File.query:
if state.shutdown:
2016-01-14 10:44:11 +00:00
return
f.move()
2016-02-13 11:40:37 +00:00
logger.debug('remove empty folders')
remove_empty_folders(prefix, True)
2016-02-13 11:40:37 +00:00
logger.debug('remove missing done')
2014-05-04 17:26:43 +00:00
2016-02-11 06:15:17 +00:00
def add_file(id, f, prefix, from_=None, commit=True):
2014-05-18 23:24:04 +00:00
user = state.user()
path = f[len(prefix):]
2016-01-24 13:38:07 +00:00
logger.debug('%s extract metadata %s', id, path)
data = media.metadata(f, from_)
2016-01-24 13:38:07 +00:00
logger.debug('%s create file %s', id, path)
2014-05-18 23:24:04 +00:00
file = File.get_or_create(id, data, path)
item = file.item
item.add_user(user)
2014-05-20 00:43:54 +00:00
item.added = datetime.utcnow()
2016-01-24 13:38:07 +00:00
logger.debug('%s load metadata %s', id, path)
item.load_metadata()
2017-06-03 20:50:14 +00:00
add_record('additem', item.id, file.info)
add_record('edititem', item.id, item.meta)
2016-01-24 13:38:07 +00:00
logger.debug('%s extract icons %s', id, path)
2014-05-21 00:02:21 +00:00
item.update_icons()
2015-03-07 16:24:07 +00:00
item.modified = datetime.utcnow()
2016-01-24 13:38:07 +00:00
logger.debug('%s save item', id)
2016-02-11 06:15:17 +00:00
item.update(commit=commit)
2016-01-24 13:38:07 +00:00
logger.debug('%s added', id)
2014-05-18 23:24:04 +00:00
return file
2016-02-14 08:32:07 +00:00
def get_prefix():
2015-12-24 15:11:47 +00:00
prefs = settings.preferences
2016-01-04 09:49:14 +00:00
prefix = os.path.join(os.path.expanduser(prefs['libraryPath']), 'Books' + os.sep)
if not prefix[-1] == os.sep:
prefix += os.sep
2015-12-24 15:11:47 +00:00
assert isinstance(prefix, str)
2016-02-14 08:32:07 +00:00
return prefix
2016-02-14 08:55:54 +00:00
def collect_books(prefix, status=None):
2016-02-13 11:40:37 +00:00
logger.debug('collect books')
2015-12-24 15:11:47 +00:00
books = []
2016-02-14 08:55:54 +00:00
count = 0
2015-12-24 15:11:47 +00:00
for root, folders, files in os.walk(prefix):
for f in files:
if state.shutdown:
2016-02-14 08:55:54 +00:00
return []
2015-12-24 15:11:47 +00:00
if f.startswith('.'):
continue
f = os.path.join(root, f)
2016-02-13 10:28:06 +00:00
ext = f.split('.')[-1].lower()
if ext == 'kepub':
ext = 'epub'
2015-12-24 15:11:47 +00:00
if ext in extensions:
2016-02-23 12:15:03 +00:00
books.append(os.path.normpath(f))
2016-02-14 08:55:54 +00:00
count += 1
if status and not status(count):
return None
2016-02-13 11:40:37 +00:00
logger.debug('found %s books', len(books))
return books
2016-02-13 11:40:37 +00:00
def run_scan():
logger.debug('run_scan')
2016-02-14 08:32:07 +00:00
prefix = get_prefix()
2016-02-14 08:55:54 +00:00
books = collect_books(prefix)
2016-02-13 11:40:37 +00:00
remove_missing(books)
2019-01-12 17:41:33 +00:00
ids = set()
2015-12-24 15:11:47 +00:00
added = 0
2019-01-12 17:04:15 +00:00
2016-02-18 13:52:53 +00:00
with db.session():
2019-01-12 17:04:15 +00:00
user = state.user()
2016-02-18 13:52:53 +00:00
for f in ox.sorted_strings(books):
if state.shutdown:
break
if os.path.exists(f):
id = media.get_id(f)
2016-02-02 19:30:40 +00:00
file = File.get(id)
if file:
f1 = file.fullpath()
f2 = os.path.join(prefix, f)
2019-01-14 15:02:34 +00:00
if not same_path(f1, f2) and os.path.exists(f1) and os.path.exists(f2):
logger.debug('file exists in multiple locations %s', id)
logger.debug('"%s" vs "%s"', f1, f2)
os.chmod(f2, stat.S_IWRITE)
os.unlink(f2)
continue
2019-01-12 17:41:33 +00:00
if id in ids:
logger.debug('file exists in multiple locations %s', id)
if file:
f1 = file.fullpath()
f2 = os.path.join(prefix, f)
2019-01-14 15:02:34 +00:00
if not same_path(f1, f2) and os.path.exists(f1) and os.path.exists(f2):
2019-01-12 17:41:33 +00:00
logger.debug('"%s" vs "%s"', f1, f2)
2019-01-12 18:10:19 +00:00
os.chmod(f2, stat.S_IWRITE)
os.unlink(f2)
continue
2019-01-12 17:41:33 +00:00
else:
ids.add(id)
2016-02-02 19:30:40 +00:00
if not file:
file = add_file(id, f, prefix, f)
added += 1
2019-01-12 17:04:15 +00:00
elif user not in file.item.users:
item = file.item
item.add_user(user)
logger.debug('add %s to local user', id)
add_record('additem', item.id, file.info)
add_record('edititem', item.id, item.meta)
item.update()
added += 1
if file and file.item.info.get('missing'):
logger.debug('missing file showed up again %s: %s', id, file.fullpath())
del file.item.info['missing']
file.item.save()
2019-01-12 19:08:03 +00:00
if file and not file.item.added:
file.item.added = datetime.utcnow()
if file.item.accessed:
file.item.added = file.item.accessed
file.item.save()
2019-01-12 17:41:33 +00:00
library_items = len(user.library.items)
2019-02-01 06:30:40 +00:00
if state.shutdown:
return
2016-02-14 08:55:54 +00:00
if added:
trigger_event('change', {})
logger.debug('imported %s unknown books', added)
2019-01-12 17:41:33 +00:00
if len(ids) != len(books):
logger.debug('number of books %s vs number of ids %s', len(books), len(ids))
if library_items != len(books):
library_items = set([str(i) for i in user.library.items])
gone = library_items - ids
2019-01-21 07:01:01 +00:00
first = True
2019-01-12 17:41:33 +00:00
if gone:
for id in gone:
i = Item.get(id)
2019-01-21 06:55:48 +00:00
if i.info.get('mediastate') == 'transferring':
continue
2019-01-13 04:51:04 +00:00
path = i.get_path()
2019-01-13 13:05:01 +00:00
if not path or not os.path.exists(path):
2019-01-21 07:01:01 +00:00
if first:
logger.debug('number of books %s vs number of items in library %s', len(books), library_items)
first = False
2019-01-13 04:51:04 +00:00
logger.debug('cleaning orphaned record %s %s', i, path)
i.remove_file()
2019-01-12 17:41:33 +00:00
missing = ids - library_items
if missing:
logger.debug('%s items in library without a record', len(missing))
2014-05-04 17:26:43 +00:00
def change_path(old, new):
2019-01-18 13:36:39 +00:00
old_icons = os.path.join(old, 'Metadata', 'icons.db')
new_icons = os.path.join(new, 'Metadata', 'icons.db')
if os.path.exists(old_icons) and not os.path.exists(new_icons):
ox.makedirs(os.path.dirname(new_icons))
shutil.move(old_icons, new_icons)
import item.icons
item.icons.icons = item.icons.Icons(new_icons)
new_books = os.path.join(new, 'Books')
if not os.path.exists(new_books):
ox.makedirs(new)
shutil.move(os.path.join(old, 'Books'), new_books)
remove_empty_folders(old)
else:
ox.makedirs(new_books)
run_scan()
trigger_event('change', {})
2014-05-16 14:30:16 +00:00
def run_import(options=None):
options = options or {}
2015-12-24 15:11:47 +00:00
logger.debug('run_import')
2016-01-23 12:37:25 +00:00
if state.activity.get('cancel'):
logger.debug('import canceled')
state.activity = {}
return
state.activity = {}
2015-12-24 15:11:47 +00:00
prefs = settings.preferences
prefix = os.path.expanduser(options.get('path', prefs['importPath']))
if os.path.islink(prefix):
prefix = os.path.realpath(prefix)
2016-01-04 09:49:14 +00:00
if not prefix[-1] == os.sep:
prefix += os.sep
2016-02-14 08:32:07 +00:00
prefix_books = get_prefix()
prefix_imported = os.path.join(prefix_books, '.import' + os.sep)
2015-12-24 15:11:47 +00:00
if prefix_books.startswith(prefix) or prefix.startswith(prefix_books):
error = 'invalid path'
elif not os.path.exists(prefix):
error = 'path not found'
elif not os.path.isdir(prefix):
error = 'path must be a folder'
else:
error = None
if error:
trigger_event('activity', {
2014-05-17 00:14:15 +00:00
'activity': 'import',
2015-12-24 15:11:47 +00:00
'progress': [0, 0],
'status': {'code': 404, 'text': error}
})
state.activity = {}
return
listname = options.get('list')
if listname:
listitems = []
assert isinstance(prefix, str)
books = []
2017-05-11 00:11:48 +00:00
2016-02-14 08:55:54 +00:00
def activity(count):
if count % 100 == 0:
state.activity = {
'activity': 'import',
'path': prefix,
'progress': [0, count],
}
trigger_event('activity', state.activity)
if state.activity.get('cancel'):
logger.debug('active import canceled')
state.activity = {}
return False
return True
books = collect_books(prefix, status=activity)
if books is None:
return
2015-12-24 15:11:47 +00:00
state.activity = {
'activity': 'import',
'path': prefix,
'progress': [0, len(books)],
}
trigger_event('activity', state.activity)
position = 0
added = 0
last = 0
for f in ox.sorted_strings(books):
position += 1
if not os.path.exists(f):
continue
with db.session():
2014-05-04 17:26:43 +00:00
id = media.get_id(f)
file = File.get(id)
f_import = f
2014-05-04 17:26:43 +00:00
if not file:
f = f.replace(prefix, prefix_imported)
ox.makedirs(os.path.dirname(f))
2014-05-16 14:30:16 +00:00
if options.get('mode') == 'move':
try:
shutil.move(f_import, f)
except:
shutil.copy2(f_import, f)
2014-05-16 14:30:16 +00:00
else:
shutil.copy2(f_import, f)
file = add_file(id, f, prefix_books, f_import)
2014-05-17 11:45:57 +00:00
file.move()
2014-05-04 17:26:43 +00:00
added += 1
elif options.get('mode') == 'move':
2016-02-23 11:57:15 +00:00
try:
os.chmod(f_import, stat.S_IWRITE)
os.unlink(f_import)
except:
pass
if listname:
listitems.append(file.item.id)
if state.activity.get('cancel'):
state.activity = {}
return
if state.shutdown:
return
2015-12-24 15:11:47 +00:00
if time.time() - last > 5:
last = time.time()
state.activity = {
'activity': 'import',
'progress': [position, len(books)],
'path': prefix,
'added': added,
}
trigger_event('activity', state.activity)
2014-05-26 10:41:01 +00:00
2016-02-14 08:55:54 +00:00
if listname and listitems:
with db.session():
2014-05-18 23:24:04 +00:00
l = List.get(settings.USER_ID, listname)
if l:
l.add_items(listitems)
2015-12-24 15:11:47 +00:00
trigger_event('activity', {
'activity': 'import',
'progress': [position, len(books)],
'path': prefix,
'status': {'code': 200, 'text': ''},
'added': added,
})
state.activity = {}
remove_empty_folders(prefix_books)
if options.get('mode') == 'move':
remove_empty_folders(prefix, True)
def import_folder():
2016-01-25 06:48:24 +00:00
if not (state.activity and state.activity.get('activity') == 'import'):
import_path = settings.preferences['importPath']
2016-03-23 13:48:43 +00:00
import_path = os.path.normpath(os.path.expanduser(import_path))
import_path_base = os.path.dirname(import_path)
if not os.path.exists(import_path) and os.path.exists(import_path_base):
os.makedirs(import_path)
2016-01-25 06:48:24 +00:00
logger.debug('scan importPath %s', import_path)
if os.path.exists(import_path):
run_import({
'path': import_path,
'mode': 'move'
})
remove_empty_folders(import_path, True)
if state.main:
state.main.call_later(10*60, lambda: state.tasks.queue('scanimport'))