diff --git a/ctl b/ctl index da1dcd63..1b93c214 100755 --- a/ctl +++ b/ctl @@ -9,33 +9,37 @@ fi if [ "$action" = "init" ]; then cd "`dirname "$0"`" BASE=`pwd` - python3 -m venv --system-site-packages . - + SUDO="" + PANDORA_USER=`ls -l update.py | cut -f3 -d" "` + if [ `whoami` != $PANDORA_USER ]; then + SUDO="sudo -H -u $PANDORA_USER" + fi + $SUDO python3 -m venv --system-site-packages . branch=`cat .git/HEAD | sed 's@/@\n@g' | tail -n1` # Work around broken venv module in Ubuntu 16.04 / Debian 9 if [ ! -e bin/pip ]; then - bin/python3 -m pip install -U --ignore-installed "pip<9" + $SUDO bin/python3 -m pip install -U --ignore-installed "pip<9" fi if [ ! -d static/oxjs ]; then - git clone --depth 1 -b $branch https://git.0x2620.org/oxjs.git static/oxjs + $SUDO git clone --depth 1 -b $branch https://git.0x2620.org/oxjs.git static/oxjs fi - mkdir -p src + $SUDO mkdir -p src if [ ! -d src/oxtimelines ]; then - git clone --depth 1 -b $branch https://git.0x2620.org/oxtimelines.git src/oxtimelines + $SUDO git clone --depth 1 -b $branch https://git.0x2620.org/oxtimelines.git src/oxtimelines fi for package in oxtimelines python-ox; do cd ${BASE} if [ ! -d src/${package} ]; then - git clone --depth 1 -b $branch https://git.0x2620.org/${package}.git src/${package} + $SUDO git clone --depth 1 -b $branch https://git.0x2620.org/${package}.git src/${package} fi cd ${BASE}/src/${package} - ${BASE}/bin/python setup.py develop + $SUDO ${BASE}/bin/python setup.py develop done cd ${BASE} - ./bin/pip install -r requirements.txt + $SUDO ./bin/pip install -r requirements.txt if [ ! -e pandora/gunicorn_config.py ]; then - cp pandora/gunicorn_config.py.in pandora/gunicorn_config.py + $SUDO cp pandora/gunicorn_config.py.in pandora/gunicorn_config.py fi exit 0 fi diff --git a/etc/nginx/pandora b/etc/nginx/pandora index 24125480..419120aa 100644 --- a/etc/nginx/pandora +++ b/etc/nginx/pandora @@ -42,7 +42,7 @@ server { proxy_set_header Proxy ""; proxy_redirect off; proxy_buffering off; - proxy_read_timeout 999999999; + proxy_read_timeout 99999; proxy_pass http://127.0.0.1:2622/; } @@ -55,11 +55,11 @@ server { proxy_buffering off; proxy_read_timeout 90; #should be in sync with gunicorn timeout proxy_connect_timeout 90; #should be in sync with gunicorn timeout + client_max_body_size 32m; if (!-f $request_filename) { proxy_pass http://127.0.0.1:2620; break; } - client_max_body_size 32m; } error_page 400 /; diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 470af358..0326e37d 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -378,6 +378,8 @@ class Annotation(models.Model): streams = self.item.streams() if streams: j['videoRatio'] = streams[0].aspect_ratio + if 'clip' in keys: + j[key] = self.clip.public_id for key in keys: if key not in j: if key in self._clip_keys: diff --git a/pandora/app/config.py b/pandora/app/config.py index 3a9dd36d..36dfdfe3 100644 --- a/pandora/app/config.py +++ b/pandora/app/config.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import division, print_function, absolute_import +import codecs import os -import sys +import re import shutil import subprocess +import sys import time -import codecs from os.path import dirname, exists, join from glob import glob @@ -71,7 +72,7 @@ def load_config(init=False): if getattr(settings, 'SITEURL', False): config['site']['url'] = settings.SITEURL settings.URL = config['site']['url'] - settings.EMAIL_SUBJECT_PREFIX = '[%s]'%settings.SITENAME + settings.EMAIL_SUBJECT_PREFIX = '[%s]' % settings.SITENAME settings.DEFAULT_FROM_EMAIL = config['site']['email']['system'] settings.SERVER_EMAIL = config['site']['email']['system'] config['site']['videoprefix'] = settings.VIDEO_PREFIX @@ -79,18 +80,32 @@ def load_config(init=False): config['site']['googleapikey'] = getattr(settings, 'GOOGLE_API_KEY') config['site']['version'] = get_version() config['site']['dontValidateUser'] = not settings.AUTH_CHECK_USERNAME - if not 'folderdepth' in config['site']: + if 'folderdepth' not in config['site']: config['site']['folderdepth'] = settings.USE_IMDB and 4 or 3 - if 'sendReferrer' in config and not 'sendReferrer' in config['site']: + if 'sendReferrer' in config and 'sendReferrer' not in config['site']: config['site']['sendReferrer'] = config.pop('sendReferrer') # enable default filters if needed default_filters = [f['id'] for f in config['user']['ui']['filters']] + available_filters = [key['id'] for key in config['itemKeys'] if key.get('filter')] + unknown_ids = set(default_filters) - set(available_filters) + if unknown_ids: + sys.stderr.write('WARNING: unknown item keys in default filters: %s.\n' % list(unknown_ids)) + unused_filters = [key for key in available_filters if key not in default_filters] + if len(unused_filters) < len(unknown_ids): + sys.stderr.write('you need at least 5 item filters') + else: + auto_filters = unused_filters[:len(unknown_ids)] + default_filters += auto_filters + for key in auto_filters: + config['user']['ui']['filters'].append({ + "id": key, "sort": [{"key": "items", "operator": "-"}] + }) + sys.stderr.write(' using the following document filters instead: %s.\n' % auto_filters) for key in config['itemKeys']: if key['id'] in default_filters and not key.get('filter'): key['filter'] = True sys.stderr.write('enabled filter for "%s" since its used as default filter.\n' % (key['id'])) - config['keys'] = {} for key in config['itemKeys']: config['keys'][key['id']] = key @@ -148,6 +163,17 @@ def load_config(init=False): if level not in config[key]: config[key] = default.get(key, 0) + config['user']['ui']['documentsSort'] = [ + s for s in config['user']['ui']['documentsSort'] + if get_by_id(config['documentKeys'], s['key']) + ] + if not config['user']['ui']['documentsSort']: + sort_key = [k for k in config['documentKeys'] if k['id'] != '*'][0] + config['user']['ui']['documentsSort'] = [{ + "key": sort_key['id'], + "operator": sort_key.get('operator', '+') + }] + for key in ('language', 'importMetadata'): if key not in config: sys.stderr.write("adding default value:\n\t\"%s\": %s,\n\n" % (key, json.dumps(default[key]))) @@ -161,6 +187,32 @@ def load_config(init=False): if 'downloadFormat' not in config['video']: config['video']['downloadFormat'] = default['video']['downloadFormat'] + + # enable default document filters if needed + default_filters = [f['id'] for f in config['user']['ui']['documentFilters']] + available_filters = [key['id'] for key in config['documentKeys'] if key.get('filter')] + unknown_ids = set(default_filters) - set(available_filters) + if unknown_ids: + sys.stderr.write('WARNING: unknown document keys in default filters: %s.\n' % list(unknown_ids)) + unused_filters = [key for key in available_filters if key not in default_filters] + if len(unused_filters) < len(unknown_ids): + sys.stderr.write('you need at least 5 item filters') + else: + auto_filters = unused_filters[:len(unknown_ids)] + default_filters += auto_filters + for key in auto_filters: + config['user']['ui']['documentFilters'].append({ + "id": key, "sort": [{"key": "items", "operator": "-"}] + }) + sys.stderr.write(' using the following document filters instead: %s.\n' % auto_filters) + + for key in config['documentKeys']: + if key['id'] in default_filters and not key.get('filter'): + key['filter'] = True + sys.stderr.write('enabled filter for document key "%s" since its used as default filter.\n' % (key['id'])) + + + old_formats = getattr(settings, 'CONFIG', {}).get('video', {}).get('formats', []) formats = config.get('video', {}).get('formats') if set(old_formats) != set(formats): @@ -348,11 +400,17 @@ def update_geoip(force=False): path = os.path.join(settings.GEOIP_PATH, 'GeoLite2-City.mmdb') if not os.path.exists(path) or force: url = 'http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz' - print('download', url) - ox.net.save_url(url, "%s.gz"%path) - if os.path.exists(path): - os.unlink(path) - os.system('gunzip "%s.gz"' % path) + index = ox.net.read_url('https://db-ip.com/db/download/ip-to-country-lite').decode() + match = re.compile('href=[\'"](http.*.mmdb.gz)').findall(index) + if match: + url = match[0] + print('download', url) + ox.net.save_url(url, "%s.gz" % path) + if os.path.exists(path): + os.unlink(path) + os.system('gunzip "%s.gz"' % path) + else: + print('failed to download dbip-country-lite-2020-03.mmdb.gz') def init(): if not settings.RELOADER_RUNNING: diff --git a/pandora/archive/external.py b/pandora/archive/external.py index 4555f53f..69179e4f 100644 --- a/pandora/archive/external.py +++ b/pandora/archive/external.py @@ -97,6 +97,17 @@ def download(item_id, url): tmp = tmp.decode('utf-8') os.chdir(tmp) cmd = ['youtube-dl', '-q', media['url']] + if settings.CONFIG['video'].get('reuseUload', False): + max_resolution = max(settings.CONFIG['video']['resolutions']) + format = settings.CONFIG['video']['formats'][0] + if format == 'mp4': + cmd += [ + '-f', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio', + '--merge-output-format', 'mp4' + ] + elif format == 'webm': + cmd += ['--merge-output-format', 'webm'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) diff --git a/pandora/archive/extract.py b/pandora/archive/extract.py index f038d474..90422d47 100644 --- a/pandora/archive/extract.py +++ b/pandora/archive/extract.py @@ -191,7 +191,7 @@ def stream(video, target, profile, info, audio_track=0, flags={}): w = info['video'][0]['width'] - flags['crop']['left'] - flags['crop']['right'] x = flags['crop']['left'] y = flags['crop']['top'] - crop = ',crop=w=%s:h=%s:x=%s:y=%s' (w, h, x, y) + crop = ',crop=w=%s:h=%s:x=%s:y=%s' % (w, h, x, y) aspect = dar * (info['video'][0]['width'] / info['video'][0]['height']) * (w/h) if abs(w/h - aspect) < 0.02: aspect = '%s:%s' % (w, h) @@ -216,6 +216,7 @@ def stream(video, target, profile, info, audio_track=0, flags={}): '-vb', '%dk' % bitrate, '-aspect', aspect, # '-vf', 'yadif', + '-max_muxing_queue_size', '512', '-vf', 'hqdn3d%s,scale=%s:%s' % (crop, width, height), '-g', '%d' % int(fps*5), ] @@ -238,6 +239,7 @@ def stream(video, target, profile, info, audio_track=0, flags={}): '-preset:v', 'medium', '-profile:v', 'high', '-level', '4.0', + '-pix_fmt', 'yuv420p', ] video_settings += ['-map', '0:%s,0:0' % info['video'][0]['id']] audio_only = False @@ -609,11 +611,14 @@ def timeline_strip(item, cuts, info, prefix): timeline_image.save(timeline_file) -def chop(video, start, end, subtitles=None): +def chop(video, start, end, subtitles=None, dest=None, encode=False): t = end - start - tmp = tempfile.mkdtemp() ext = os.path.splitext(video)[1] - choped_video = '%s/tmp%s' % (tmp, ext) + if dest is None: + tmp = tempfile.mkdtemp() + choped_video = '%s/tmp%s' % (tmp, ext) + else: + choped_video = dest if subtitles and ext == '.mp4': subtitles_f = choped_video + '.full.srt' with open(subtitles_f, 'wb') as fd: @@ -625,25 +630,167 @@ def chop(video, start, end, subtitles=None): if subtitles_f: os.unlink(subtitles_f) else: + if encode: + bpp = 0.17 + if ext == '.mp4': + vcodec = [ + '-c:v', 'libx264', + '-preset:v', 'medium', + '-profile:v', 'high', + '-level', '4.0', + ] + acodec = [ + '-c:a', 'aac', + '-aq', '6', + '-strict', '-2' + ] + else: + vcodec = [ + '-c:v', 'libvpx', + '-deadline', 'good', + '-cpu-used', '0', + '-lag-in-frames', '25', + '-auto-alt-ref', '1', + ] + acodec = [ + '-c:a', 'libvorbis', + '-aq', '6', + ] + info = ox.avinfo(video) + if not info['audio']: + acodec = [] + if not info['video']: + vcodec = [] + else: + height = info['video'][0]['height'] + width = info['video'][0]['width'] + fps = 30 + bitrate = height*width*fps*bpp/1000 + vcodec += ['-vb', '%dk' % bitrate] + encoding = vcodec + acodec + else: + encoding = [ + '-c:v', 'copy', + '-c:a', 'copy', + ] cmd = [ settings.FFMPEG, '-y', '-i', video, '-ss', '%.3f' % start, '-t', '%.3f' % t, - '-c:v', 'copy', - '-c:a', 'copy', + ] + encoding + [ '-f', ext[1:], choped_video ] + print(cmd) p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=open('/dev/null', 'w'), stderr=open('/dev/null', 'w'), close_fds=True) p.wait() - f = open(choped_video, 'rb') - os.unlink(choped_video) if subtitles_f and os.path.exists(subtitles_f): os.unlink(subtitles_f) - os.rmdir(tmp) - return f + if dest is None: + f = open(choped_video, 'rb') + os.unlink(choped_video) + os.rmdir(tmp) + return f + else: + return None + +def has_faststart(path): + cmd = [settings.FFPROBE, '-v', 'trace', '-i', path] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + close_fds=True) + stdout, stderr = p.communicate() + moov = "type:'moov'" + mdat = "type:'mdat'" + blocks = [b for b in stdout.decode().split('\n') if moov in b or mdat in b] + if blocks and moov in blocks[0]: + return True + return False + +def remux_stream(src, dst): + info = ox.avinfo(src) + if info.get('audio'): + audio = ['-c:a', 'copy'] + else: + audio = [] + if info.get('video'): + video = ['-c:v', 'copy'] + else: + video = [] + cmd = [ + settings.FFMPEG, + '-nostats', '-loglevel', 'error', + '-map_metadata', '-1', '-sn', + '-i', src, + ] + video + [ + ] + audio + [ + '-movflags', '+faststart', + dst + ] + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=open('/dev/null', 'w'), + stderr=open('/dev/null', 'w'), + close_fds=True) + p.wait() + return True, None + + +def ffprobe(path, *args): + cmd = [settings.FFPROBE, '-loglevel', 'error', '-print_format', 'json', '-i', path] + list(args) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) + stdout, stderr = p.communicate() + return json.loads(stdout.decode()) + + +def get_chapters(path): + info = ffprobe(path, '-show_chapters') + chapters = [] + n = 0 + for chapter in info.get('chapters', []): + n += 1 + chapters.append({ + 'in': chapter['start_time'], + 'out': chapter['end_time'], + 'value': chapter.get('tags', {}).get('title', 'Chapter %s' % n) + }) + return chapters + +def get_text_subtitles(path): + subtitles = [] + for stream in ffprobe(path, '-show_streams')['streams']: + if stream.get('codec_name') in ('subrip', 'aas', 'text'): + subtitles.append({ + 'index': stream['index'], + 'language': stream['tags']['language'], + }) + return subtitles + +def has_img_subtitles(path): + subtitles = [] + for stream in ffprobe(path, '-show_streams')['streams']: + if stream.get('codec_type') == 'subtitle' and stream.get('codec_name') in ('dvbsub', 'pgssub'): + subtitles.append({ + 'index': stream['index'], + 'language': stream['tags']['language'], + }) + return subtitles + +def extract_subtitles(path, language=None): + extra = [] + if language: + tracks = get_text_subtitles(path) + track = [t for t in tracks if t['language'] == language] + if track: + extra = ['-map', '0:%s' % track[0]['index']] + else: + raise Exception("unknown language: %s" % language) + cmd = ['ffmpeg', '-loglevel', 'error', '-i', path] + extra + ['-f', 'srt', '-'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) + stdout, stderr = p.communicate() + return ox.srt.loads(stdout.decode()) diff --git a/pandora/archive/models.py b/pandora/archive/models.py index 306590b2..75d25de7 100644 --- a/pandora/archive/models.py +++ b/pandora/archive/models.py @@ -376,6 +376,50 @@ class File(models.Model): return save_chunk(stream, stream.media, chunk, offset, name, done_cb) return False, 0 + def extract_text_data(self): + if self.data: + for sub in extract.get_text_subtitles(self.data.path): + srt = extract.extract_subtitles(self.data.path, sub['language']) + # fixme add subtitles, possibly with language! + chapters = extract.get_chapters(self.data.path) + if chapters: + # fixme add chapters as notes + pass + + def get_codec(self, type): + track = self.info.get(type) + if track: + return track[0].get('codec') + + MP4_VCODECS = ['h264'] + MP4_ACODECS = ['aac', None] + WEBM_VCODECS = ['vp8', 'vp9'] + WEBM_ACODECS = ['vorbis', 'opus', None] + + def can_remux(self): + config = settings.CONFIG['video'] + height = self.info['video'][0]['height'] if self.info.get('video') else None + max_resolution = max(config['resolutions']) + if height <= max_resolution and self.extension in ('mov', 'mkv', 'mp4', 'm4v'): + vcodec = self.get_codec('video') + acodec = self.get_codec('audio') + if vcodec in self.MP4_VCODECS and acodec in self.MP4_ACODECS: + return True + return False + + def can_stream(self): + config = settings.CONFIG['video'] + height = self.info['video'][0]['height'] if self.info.get('video') else None + max_resolution = max(config['resolutions']) + if height <= max_resolution and config['formats'][0] == self.extension: + vcodec = self.get_codec('video') + acodec = self.get_codec('audio') + if self.extension in ['mp4', 'm4v'] and vcodec in self.MP4_VCODECS and acodec in self.MP4_ACODECS: + return extract.has_faststart(self.data.path) + elif self.extension == 'webm' and vcodec in self.WEBM_VCODECS and acodec in self.WEBM_ACODECS: + return True + return False + def stream_resolution(self): config = settings.CONFIG['video'] height = self.info['video'][0]['height'] if self.info.get('video') else None @@ -523,7 +567,7 @@ class File(models.Model): n += 1 profile = '%sp.%s' % (resolution, config['formats'][0]) target = os.path.join(tmp, language + '_' + profile) - ok, error = extract.stream(media, target, profile, info, audio_track=i+1, flags=self.flags) + ok, error = extract.stream(media, target, profile, info, audio_track=i+1, flags={}) if ok: tinfo = ox.avinfo(target) del tinfo['path'] @@ -747,18 +791,31 @@ class Stream(models.Model): derivative.encode() def encode(self): + reuse = settings.CONFIG['video'].get('reuseUpload', False) media = self.source.media.path if self.source else self.file.data.path - if not self.media: self.media.name = self.path(self.name()) target = self.media.path info = ox.avinfo(media) - ok, error = extract.stream(media, target, self.name(), info, flags=self.flags) + + done = False + if reuse and not self.source: + if self.file.can_stream(): + ok, error = True, None + ox.makedirs(os.path.dirname(target)) + shutil.move(self.file.data.path, target) + self.file.data.name = '' + self.file.save() + elif self.file.can_remux(): + ok, error = extract.remux_stream(media, target) + done = True + if not done: + ok, error = extract.stream(media, target, self.name(), info, flags=self.flags) + # file could have been moved while encoding # get current version from db and update - _self = Stream.objects.get(id=self.id) - _self.update_status(ok, error) - return _self + self.refresh_from_db() + self.update_status(ok, error) def get_index(self): index = 1 diff --git a/pandora/archive/tasks.py b/pandora/archive/tasks.py index e9cc8712..92446d58 100644 --- a/pandora/archive/tasks.py +++ b/pandora/archive/tasks.py @@ -128,7 +128,7 @@ def process_stream(fileId): stream = streams[0] stream.make_timeline() stream.extract_derivatives() - file = models.File.objects.get(id=fileId) + file.refresh_from_db() file.encoding = False file.save() file.item.update_selected() @@ -158,13 +158,12 @@ def extract_stream(fileId): if created: file.extract_frames() stream.media.name = stream.path(stream.name()) - stream = stream.encode() + stream.encode() if stream.available: stream.make_timeline() stream.extract_derivatives() file.extract_tracks() - # get current version from db - file = models.File.objects.get(id=fileId) + file.refresh_from_db() if not file.item.rendered \ and not file.item.files.exclude(id=fileId).filter(Q(queued=True) | Q(encoding=True)).count(): file.item.update_timeline() @@ -209,7 +208,8 @@ def download_media(item_id, url): @task(queue='default') def move_media(data, user): from changelog.models import add_changelog - from item.models import get_item, Item + from item.models import get_item, Item, ItemSort + from item.utils import is_imdb_id from annotation.models import Annotation user = models.User.objects.get(username=user) @@ -218,7 +218,7 @@ def move_media(data, user): i = Item.objects.get(public_id=data['item']) else: data['public_id'] = data.pop('item').strip() - if len(data['public_id']) != 7: + if not is_imdb_id(data['public_id']): del data['public_id'] if 'director' in data and isinstance(data['director'], string_types): if data['director'] == '': @@ -228,6 +228,11 @@ def move_media(data, user): i = get_item(data, user=user) else: i = get_item({'imdbId': data['public_id']}, user=user) + try: + i.sort + except ItemSort.DoesNotExist: + i.update_sort() + changed = [i.public_id] old_item = None for f in models.File.objects.filter(oshash__in=data['ids']): diff --git a/pandora/archive/views.py b/pandora/archive/views.py index 62731fc8..0b8abddd 100644 --- a/pandora/archive/views.py +++ b/pandora/archive/views.py @@ -368,7 +368,7 @@ def direct_upload(request): return render_to_json_response(response) -@login_required_json +#@login_required_json def getTaskStatus(request, data): ''' Gets the status for a given task diff --git a/pandora/clip/models.py b/pandora/clip/models.py index f4f55651..37641c15 100644 --- a/pandora/clip/models.py +++ b/pandora/clip/models.py @@ -121,7 +121,9 @@ class MetaClip(object): annotations = annotations.filter(q) entity_cache = {} j['annotations'] = [ - a.json(keys=['value', 'id', 'layer'], entity_cache=entity_cache) for a in annotations + a.json(keys=['value', 'id', 'layer'], entity_cache=entity_cache) + for a in annotations + if a.value ] if 'layers' in keys: j['layers'] = self.get_layers() diff --git a/pandora/clip/views.py b/pandora/clip/views.py index df2c9103..7c2fcd23 100644 --- a/pandora/clip/views.py +++ b/pandora/clip/views.py @@ -78,7 +78,7 @@ def findClips(request, data): takes { query: object, // find clips, query object, see `find` itemsQuery: object, // limit to matching items, query object, see `find` - keys: [string], // list of properties to return + keys: [string], // list of properties to return, include 'annotations' to get all annotations for a clip positions: [int], // list of positions range: [int, int], // range of results to return sort: [object] // list of sort objects, see `find` @@ -102,8 +102,6 @@ def findClips(request, data): subtitles = utils.get_by_key(layers, 'isSubtitles', True) layer_ids = [k['id'] for k in layers] keys = list(filter(lambda k: k not in layer_ids + ['annotations'], data['keys'])) - if list(filter(lambda k: k not in models.Clip.clip_keys, keys)): - qs = qs.select_related('item__sort') clips = {} response['data']['items'] = clip_jsons = [] diff --git a/pandora/config.0xdb.jsonc b/pandora/config.0xdb.jsonc index 7b300891..8440de9a 100644 --- a/pandora/config.0xdb.jsonc +++ b/pandora/config.0xdb.jsonc @@ -38,6 +38,7 @@ "canAddItems": {"staff": true, "admin": true}, "canAddDocuments": {"staff": true, "admin": true}, "canDownloadVideo": {"guest": -1, "member": -1, "friend": -1, "staff": -1, "admin": -1}, + "canDownloadSource": {"guest": -1, "member": -1, "friend": -1, "staff": -1, "admin": -1}, "canEditAnnotations": {"staff": true, "admin": true}, "canEditEntities": {"staff": true, "admin": true}, "canEditDocuments": {"staff": true, "admin": true}, @@ -97,7 +98,7 @@ text of clips (in grid view, below the icon). Excluding a layer from this list means it will not be included in find annotations. */ - "clipLayers": ["subtitles"], + "clipLayers": ["subtitles", "keywords"], "documentKeys": [ { "id": "*", @@ -709,6 +710,14 @@ "advanced": true, "find": true }, + { + "id": "tags", + "title": "Tags", + "type": "layer", + "autocomplete": true, + "filter": true, + "find": true + }, { "id": "subtitles", "title": "Subtitles", @@ -997,6 +1006,15 @@ tooltip that appears on mouseover. */ "layers": [ + { + "id": "tags", + "title": "Tags", + "canAddAnnotations": {"member": true, "staff": true, "admin": true}, + "item": "Tag", + "autocomplete": true, + "overlap": true, + "type": "string" + }, { "id": "privatenotes", "title": "Private Notes", diff --git a/pandora/config.indiancinema.jsonc b/pandora/config.indiancinema.jsonc index be2a79db..786a53d4 100644 --- a/pandora/config.indiancinema.jsonc +++ b/pandora/config.indiancinema.jsonc @@ -39,6 +39,7 @@ "canAddItems": {"researcher": true, "staff": true, "admin": true}, "canAddDocuments": {"researcher": true, "staff": true, "admin": true}, "canDownloadVideo": {"guest": -1, "member": -1, "researcher": 3, "staff": 3, "admin": 3}, + "canDownloadSource": {"guest": -1, "member": -1, "researcher": -1, "staff": -1, "admin": -1}, "canEditAnnotations": {"staff": true, "admin": true}, "canEditDocuments": {"researcher": true, "staff": true, "admin": true}, "canEditEntities": {"staff": true, "admin": true}, @@ -75,7 +76,7 @@ "canSeeExtraItemViews": {"researcher": true, "staff": true, "admin": true}, "canSeeMedia": {"researcher": true, "staff": true, "admin": true}, "canSeeDocument": {"guest": 1, "member": 1, "researcher": 2, "staff": 3, "admin": 3}, - "canSeeItem": {"guest": 3, "member": 3, "researcher": 3, "staff": 3, "admin": 3}, + "canSeeItem": {"guest": 2, "member": 2, "researcher": 2, "staff": 3, "admin": 3}, "canSeeSize": {"researcher": true, "staff": true, "admin": true}, "canSeeSoftwareVersion": {"researcher": true, "staff": true, "admin": true}, "canSendMail": {"staff": true, "admin": true} @@ -1695,7 +1696,7 @@ "annotationsCalendarSize": 128, "annotationsHighlight": "none", "annotationsMapSize": 128, - "annotationsRange": "all", + "annotationsRange": "selection", "annotationsSize": 256, "annotationsSort": "position", "calendarFind": "", @@ -1852,7 +1853,7 @@ "videoSize": "small", "videoSubtitles": true, "videoSubtitlesOffset": 0, - "videoTimeline": "slitscan", + "videoTimeline": "keyframes", "videoView": "player", "videoVolume": 1 }, diff --git a/pandora/config.padma.jsonc b/pandora/config.padma.jsonc index e797cd15..c78740a9 100644 --- a/pandora/config.padma.jsonc +++ b/pandora/config.padma.jsonc @@ -38,6 +38,7 @@ "canAddItems": {"member": true, "staff": true, "admin": true}, "canAddDocuments": {"member": true, "staff": true, "admin": true}, "canDownloadVideo": {"guest": 0, "member": 0, "staff": 4, "admin": 4}, + "canDownloadSource": {"guest": -1, "member": -1, "staff": 4, "admin": 4}, "canEditAnnotations": {"staff": true, "admin": true}, "canEditEntities": {"staff": true, "admin": true}, "canEditDocuments": {"staff": true, "admin": true}, @@ -1160,7 +1161,7 @@ "annotationsHighlight": "none", "annotationsHighlight": false, "annotationsMapSize": 128, - "annotationsRange": "position", + "annotationsRange": "selection", "annotationsSize": 256, "annotationsSort": "position", "calendarFind": "", @@ -1310,7 +1311,7 @@ "videoSize": "large", "videoSubtitles": false, "videoSubtitlesOffset": 0, - "videoTimeline": "antialias", + "videoTimeline": "keyframes", "videoView": "player", "videoVolume": 1 }, diff --git a/pandora/config.pandora.jsonc b/pandora/config.pandora.jsonc index 9658b265..bd76faa2 100644 --- a/pandora/config.pandora.jsonc +++ b/pandora/config.pandora.jsonc @@ -45,6 +45,7 @@ examples (config.SITENAME.jsonc) that are part of this pan.do/ra distribution. "canAddItems": {"member": true, "staff": true, "admin": true}, "canAddDocuments": {"member": true, "staff": true, "admin": true}, "canDownloadVideo": {"guest": 1, "member": 1, "staff": 4, "admin": 4}, + "canDownloadSource": {"member": 1, "staff": 4, "admin": 4}, "canEditAnnotations": {"staff": true, "admin": true}, "canEditDocuments": {"staff": true, "admin": true}, "canEditEntities": {"staff": true, "admin": true}, @@ -550,7 +551,7 @@ examples (config.SITENAME.jsonc) that are part of this pan.do/ra distribution. { "id": "country", "title": "Country", - "type": "string", + "type": ["string"], "autocomplete": true, "columnWidth": 180, "filter": true, @@ -990,11 +991,6 @@ examples (config.SITENAME.jsonc) that are part of this pan.do/ra distribution. {"name": "Private", "color": [255, 128, 128]} ], /* - "sendReferrer", if set to false, will cause all outgoing links to originate - from one single URL - */ - "sendReferrer": false, - /* "site" contains various settings for this instance. In "email", "contact" if the address in the contact form (to), "system" is the address used by the system (from). @@ -1278,6 +1274,6 @@ examples (config.SITENAME.jsonc) that are part of this pan.do/ra distribution. "formats": ["webm", "mp4"], "previewRatio": 1.3333333333, "resolutions": [240, 480], - "torrent": true + "torrent": false } } diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py new file mode 100644 index 00000000..040658ce --- /dev/null +++ b/pandora/document/fulltext.py @@ -0,0 +1,97 @@ +import subprocess + +from django.conf import settings + + +def extract_text(pdf): + cmd = ['pdftotext', pdf, '-'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + stdout = stdout.decode() + return stdout.strip() + +def ocr_image(path): + cmd = ['tesseract', path, '-', 'txt'] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + stdout = stdout.decode() + return stdout.strip() + +class FulltextMixin: + _ES_INDEX = "document-index" + + @classmethod + def elasticsearch(cls): + from elasticsearch import Elasticsearch + es = Elasticsearch(settings.ELASTICSEARCH_HOST) + return es + + def extract_fulltext(self): + if self.file: + if self.extension == 'pdf': + return extract_text(self.file.path) + elif self.extension in ('png', 'jpg'): + return ocr_image(self.file.path) + elif self.extension == 'html': + return self.data.get('text', '') + return '' + + def has_fulltext_key(self): + return bool([k for k in settings.CONFIG['documentKeys'] if k.get('fulltext')]) + + def delete_fulltext(self): + if self.has_fulltext_key(): + from elasticsearch.exceptions import NotFoundError + try: + res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id) + except NotFoundError: + pass + + def update_fulltext(self): + if self.has_fulltext_key(): + text = self.extract_fulltext() + if text: + doc = { + 'text': text.lower() + } + res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc) + + @classmethod + def find_fulltext(cls, query): + ids = cls.find_fulltext_ids(query) + return cls.objects.filter(id__in=ids) + + @classmethod + def find_fulltext_ids(cls, query): + if not query: + return [] + elif query[0] == '"' and query[-1] == '"': + query = { + "match_phrase": { + "text": query.lower()[1:-1] + }, + } + else: + query = { + "match": { + "text": { + "query": query.lower(), + "operator": "and" + } + } + } + ids = [] + res = None + from_ = 0 + es = cls.elasticsearch() + while not res or len(ids) < res['hits']['total']['value']: + res = es.search(index=cls._ES_INDEX, body={ + "from": from_, + "_source": False, + "query": query + }) + if not res['hits']['hits']: + break + ids += [int(r['_id']) for r in res['hits']['hits']] + from_ += len(res['hits']['hits']) + return ids diff --git a/pandora/document/managers.py b/pandora/document/managers.py index 7c97b0f1..fa0cd64a 100644 --- a/pandora/document/managers.py +++ b/pandora/document/managers.py @@ -36,6 +36,8 @@ def get_key_type(k): }.get(key_type, key_type) return key_type + + def parseCondition(condition, user, item=None, owner=None): ''' ''' @@ -68,6 +70,9 @@ def buildCondition(k, op, v, user, exclude=False, owner=None): k = 'collection' key_type = get_key_type(k) + + key_config = (utils.get_by_id(settings.CONFIG['documentKeys'], k) or {'type': 'string'}) + facet_keys = models.Document.facet_keys if k == 'id': if op == '&' and isinstance(v, list): @@ -128,6 +133,12 @@ def buildCondition(k, op, v, user, exclude=False, owner=None): else: q = Q(id=0) return q + elif key_config.get('fulltext'): + qs = models.Document.find_fulltext_ids(v) + q = Q(id__in=qs) + if exclude: + q = ~Q(id__in=qs) + return q elif key_type == 'boolean': q = Q(**{'find__key': k, 'find__value': v}) if exclude: diff --git a/pandora/document/models.py b/pandora/document/models.py index 81fd4dd1..4d85f785 100644 --- a/pandora/document/models.py +++ b/pandora/document/models.py @@ -30,6 +30,8 @@ from user.models import Group from . import managers from . import utils +from . import tasks +from .fulltext import FulltextMixin User = get_user_model() @@ -40,7 +42,7 @@ def get_path(f, x): return f.path(x) @python_2_unicode_compatible -class Document(models.Model): +class Document(models.Model, FulltextMixin): created = models.DateTimeField(auto_now_add=True) modified = models.DateTimeField(auto_now=True) @@ -153,6 +155,8 @@ class Document(models.Model): i = key['id'] if i == 'rightslevel': save(i, self.rightslevel) + if key.get('fulltext'): + continue elif i not in ('*', 'dimensions') and i not in self.facet_keys: value = data.get(i) if isinstance(value, list): @@ -409,6 +413,8 @@ class Document(models.Model): and document_key['value'].get('type') == 'map' \ and self.get_value(document_key['value']['key']): value = re.compile(document_key['value']['map']).findall(self.get_value(document_key['value']['key'])) + if value and document_key['value'].get('format'): + value = [document_key['value']['format'].format(value[0])] return value[0] if value else default elif key == 'user': return self.user.username @@ -502,6 +508,7 @@ class Document(models.Model): self.oshash = ox.oshash(self.file.path) self.save() self.delete_cache() + tasks.extract_fulltext.delay(self.id) return True, self.file.size return save_chunk(self, self.file, chunk, offset, name, done_cb) @@ -518,8 +525,14 @@ class Document(models.Model): else: path = src if self.extension == 'pdf': + crop = [] if page: - page = int(page) + if ',' in page: + crop = list(map(int, page.split(','))) + page = crop[0] + crop = crop[1:] + else: + page = int(page) if page and page > 1 and page <= self.pages: src = os.path.join(folder, '1024p%d.jpg' % page) else: @@ -529,6 +542,18 @@ class Document(models.Model): self.extract_page(page) if size: path = os.path.join(folder, '%dp%d.jpg' % (size, page)) + if len(crop) == 4: + path = os.path.join(folder, '%dp%d,%s.jpg' % (1024, page, ','.join(map(str, crop)))) + if not os.path.exists(path): + img = Image.open(src).crop(crop) + img.save(path) + else: + img = Image.open(path) + src = path + if size < max(img.size): + path = os.path.join(folder, '%dp%d,%s.jpg' % (size, page, ','.join(map(str, crop)))) + if not os.path.exists(path): + resize_image(src, path, size=size) elif self.extension in ('jpg', 'png', 'gif'): if os.path.exists(src): if size and page: @@ -649,6 +674,7 @@ def delete_document(sender, **kwargs): if t.file: t.delete_cache() t.file.delete(save=False) + t.delete_fulltext() pre_delete.connect(delete_document, sender=Document) class ItemProperties(models.Model): diff --git a/pandora/document/tasks.py b/pandora/document/tasks.py new file mode 100644 index 00000000..f08983bb --- /dev/null +++ b/pandora/document/tasks.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +from celery.task import task + +@task(queue="encoding") +def extract_fulltext(id): + from . import models + d = models.Document.objects.get(id=id) + d.update_fulltext() diff --git a/pandora/document/utils.py b/pandora/document/utils.py index c579f0dc..233276b5 100644 --- a/pandora/document/utils.py +++ b/pandora/document/utils.py @@ -15,7 +15,7 @@ def pdfinfo(pdf): p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) stdout, stderr = p.communicate() data = {} - for line in stdout.decode('utf-8').strip().split('\n'): + for line in stdout.decode('utf-8', 'replace').strip().split('\n'): parts = line.split(':') key = parts[0].lower().strip() if key: diff --git a/pandora/document/views.py b/pandora/document/views.py index a95ca06e..2cc080e6 100644 --- a/pandora/document/views.py +++ b/pandora/document/views.py @@ -210,7 +210,7 @@ def parse_query(data, user): for key in ('keys', 'group', 'file', 'range', 'position', 'positions', 'sort'): if key in data: query[key] = data[key] - print(query.get('sort'), data.get('sort')) + #print(query.get('sort'), data.get('sort')) query['qs'] = models.Document.objects.find(data, user) query['item'] = get_item(data.get('query', {})) return query @@ -439,7 +439,7 @@ def upload(request): def autocompleteDocuments(request, data): ''' - Returns autocomplete strings for a given documeny key and search string + Returns autocomplete strings for a given document key and search string takes { key: string, // document key value: string, // search string diff --git a/pandora/edit/models.py b/pandora/edit/models.py index 2dfe2ee0..4ce6ab29 100644 --- a/pandora/edit/models.py +++ b/pandora/edit/models.py @@ -248,7 +248,7 @@ class Edit(models.Model): clips_query = self.clip_query() if clips_query['conditions']: clips = clip.models.Clip.objects.find({'query': clips_query}, user) - items = [i['id'] for i in self.get_items(user).values('id')] + items = self.get_items(user).values('id') clips = clips.filter(item__in=items) else: clips = clip.models.Clip.objects.filter(id=None) diff --git a/pandora/item/management/commands/sync_itemsort.py b/pandora/item/management/commands/sync_itemsort.py index d70889ac..8af5e14a 100644 --- a/pandora/item/management/commands/sync_itemsort.py +++ b/pandora/item/management/commands/sync_itemsort.py @@ -107,6 +107,8 @@ class Command(BaseCommand): print(sql) cursor.execute(sql) transaction.commit() + for i in models.Item.objects.filter(sort=None): + i.save() if rebuild: print("Updating sort values...") ids = [i['id'] for i in models.Item.objects.all().values('id')] @@ -115,3 +117,5 @@ class Command(BaseCommand): if options['debug']: print(i) i.update_sort() + for i in models.Item.objects.filter(sort=None): + i.save() diff --git a/pandora/item/management/commands/update_external.py b/pandora/item/management/commands/update_external.py index aea8c530..4dbd1cbd 100644 --- a/pandora/item/management/commands/update_external.py +++ b/pandora/item/management/commands/update_external.py @@ -17,9 +17,9 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('--all', action='store_true', dest='all', - default=False, help='update all items, otherwise oldes N'), + default=False, help='update all items, otherwise oldes N') parser.add_argument('-n', '--items', action='store', dest='items', type=int, - default=30, help='number of items ot update'), + default=30, help='number of items ot update') def handle(self, **options): offset = 0 diff --git a/pandora/item/managers.py b/pandora/item/managers.py index cd185441..be409e5f 100644 --- a/pandora/item/managers.py +++ b/pandora/item/managers.py @@ -165,7 +165,10 @@ def parseCondition(condition, user, owner=None): else: q = Q(id__in=l.items.all()) if exclude: - q = ~q + if isinstance(q, list): + q = [~x for x in q] + else: + q = ~q else: q = Q(id=0) return q diff --git a/pandora/item/models.py b/pandora/item/models.py index 5dae6d91..b9dd78ee 100644 --- a/pandora/item/models.py +++ b/pandora/item/models.py @@ -14,14 +14,15 @@ from glob import glob from six import PY2, string_types from six.moves.urllib.parse import quote -from django.db import models, transaction, connection -from django.db.models import Q, Sum, Max + from django.conf import settings from django.contrib.auth import get_user_model - +from django.core.files.temp import NamedTemporaryFile +from django.db import models, transaction, connection +from django.db.models import Q, Sum, Max from django.db.models.signals import pre_delete -from django.utils import datetime_safe from django.utils.encoding import python_2_unicode_compatible +from django.utils import datetime_safe import ox from oxdjango.fields import JSONField, to_json @@ -214,6 +215,8 @@ class Item(models.Model): and item_key['value'].get('type') == 'map' \ and self.get(item_key['value']['key']): value = re.compile(item_key['value']['map']).findall(self.get(item_key['value']['key'])) + if value and item_key['value'].get('format'): + value = [item_key['value']['format'].format(value[0])] return value[0] if value else default return default @@ -387,8 +390,9 @@ class Item(models.Model): if self.oxdbId != oxdbId: q = Item.objects.filter(oxdbId=oxdbId).exclude(id=self.id) if q.count() != 0: - if len(self.public_id) == 7: + if utils.is_imdb_id(self.public_id): self.oxdbId = None + self.update_sort() q[0].merge_with(self, save=False) else: n = 1 @@ -401,14 +405,14 @@ class Item(models.Model): q = Item.objects.filter(oxdbId=oxdbId).exclude(id=self.id) self.oxdbId = oxdbId update_poster = True - if len(self.public_id) != 7: + if not utils.is_imdb_id(self.public_id): update_ids = True # id changed, what about existing item with new id? - if settings.USE_IMDB and len(self.public_id) != 7 and self.oxdbId != self.public_id: + if settings.USE_IMDB and not utils.is_imdb_id(self.public_id) and self.oxdbId != self.public_id: self.public_id = self.oxdbId # FIXME: move files to new id here - if settings.USE_IMDB and len(self.public_id) == 7: + if settings.USE_IMDB and utils.is_imdb_id(self.public_id): for key in ('title', 'year', 'director', 'season', 'episode', 'seriesTitle', 'episodeTitle'): if key in self.data: @@ -418,7 +422,7 @@ class Item(models.Model): if settings.USE_IMDB: defaults = list(filter(lambda k: 'default' in k, settings.CONFIG['itemKeys'])) for k in defaults: - if len(self.public_id) == 7: + if utils.is_imdb_id(self.public_id): if k['id'] in self.data and self.data[k['id']] == k['default']: del self.data[k['id']] else: @@ -637,6 +641,9 @@ class Item(models.Model): if self.poster_height: i['posterRatio'] = self.poster_width / self.poster_height + if keys and 'source' in keys: + i['source'] = self.streams().exclude(file__data='').exists() + streams = self.streams() i['durations'] = [s.duration for s in streams] i['duration'] = sum(i['durations']) @@ -938,6 +945,8 @@ class Item(models.Model): s.oxdbId = self.oxdbId if not settings.USE_IMDB and s.public_id.isupper() and s.public_id.isalpha(): s.public_id = ox.sort_string(str(ox.fromAZ(s.public_id))) + else: + s.public_id = ox.sort_string(s.public_id) s.modified = self.modified or datetime.now() s.created = self.created or datetime.now() s.rightslevel = self.level @@ -1041,6 +1050,8 @@ class Item(models.Model): set_value(s, name, value) elif sort_type == 'year': value = self.get(source) + if isinstance(value, str): + value = value[:4] set_value(s, name, value) elif sort_type == 'date': value = value_ = self.get(source) @@ -1179,6 +1190,37 @@ class Item(models.Model): return None return path + def extract_clip(self, in_, out, resolution, format, track=None, force=False): + streams = self.streams(track) + stream = streams[0].get(resolution, format) + if streams.count() > 1 and stream.info['duration'] < out: + video = NamedTemporaryFile(suffix='.%s' % format) + r = self.merge_streams(video.name, resolution, format) + if not r: + return False + path = video.name + duration = sum(item.cache['durations']) + else: + path = stream.media.path + duration = stream.info['duration'] + + cache_name = '%s_%sp_%s.%s' % (self.public_id, resolution, '%s,%s' % (in_, out), format) + cache_path = os.path.join(settings.MEDIA_ROOT, self.path('cache/%s' % cache_name)) + if os.path.exists(cache_path) and not force: + return cache_path + if duration >= out: + subtitles = utils.get_by_key(settings.CONFIG['layers'], 'isSubtitles', True) + if subtitles: + srt = self.srt(subtitles['id'], encoder=ox.srt) + if len(srt) < 4: + srt = None + else: + srt = None + ox.makedirs(os.path.dirname(cache_path)) + extract.chop(path, in_, out, subtitles=srt, dest=cache_path, encode=True) + return cache_path + return False + @property def timeline_prefix(self): videos = self.streams() diff --git a/pandora/item/tasks.py b/pandora/item/tasks.py index a0c9cac5..eca80083 100644 --- a/pandora/item/tasks.py +++ b/pandora/item/tasks.py @@ -22,6 +22,7 @@ def cronjob(**kwargs): if limit_rate('item.tasks.cronjob', 8 * 60 * 60): update_random_sort() update_random_clip_sort() + clear_cache.delay() def update_random_sort(): from . import models @@ -125,6 +126,33 @@ def load_subtitles(public_id): item.update_sort() item.update_facets() + +@task(queue="encoding") +def extract_clip(public_id, in_, out, resolution, format, track=None): + from . import models + try: + item = models.Item.objects.get(public_id=public_id) + except models.Item.DoesNotExist: + return False + if item.extract_clip(in_, out, resolution, format, track): + return True + return False + + +@task(queue="encoding") +def clear_cache(days=60): + import subprocess + path = os.path.join(settings.MEDIA_ROOT, 'media') + cmd = ['find', path, '-iregex', '.*/frames/.*', '-atime', '+%s' % days, '-type', 'f', '-exec', 'rm', '{}', ';'] + subprocess.check_output(cmd) + path = os.path.join(settings.MEDIA_ROOT, 'items') + cmd = ['find', path, '-iregex', '.*/cache/.*', '-atime', '+%s' % days, '-type', 'f', '-exec', 'rm', '{}', ';'] + subprocess.check_output(cmd) + path = settings.MEDIA_ROOT + cmd = ['find', path, '-type', 'd', '-size', '0', '-prune', '-exec', 'rmdir', '{}', ';'] + subprocess.check_output(cmd) + + @task(ignore_results=True, queue='default') def update_sitemap(base_url): from . import models @@ -133,13 +161,47 @@ def update_sitemap(base_url): def absolute_url(url): return base_url + url - urlset = ET.Element('urlset') - urlset.attrib['xmlns'] = "http://www.sitemaps.org/schemas/sitemap/0.9" - urlset.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" - urlset.attrib['xsi:schemaLocation'] = "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" - urlset.attrib['xmlns:video'] = "http://www.google.com/schemas/sitemap-video/1.1" + state = {} + state['part'] = 1 + state['count'] = 0 - url = ET.SubElement(urlset, "url") + def new_urlset(): + urlset = ET.Element('urlset') + urlset.attrib['xmlns'] = "http://www.sitemaps.org/schemas/sitemap/0.9" + urlset.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" + urlset.attrib['xsi:schemaLocation'] = "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" + urlset.attrib['xmlns:video'] = "http://www.google.com/schemas/sitemap-video/1.1" + return urlset + + def save_urlset(): + s = ET.SubElement(sitemap_index, "sitemap") + loc = ET.SubElement(s, "loc") + loc.text = absolute_url("sitemap%06d.xml" % state['part']) + lastmod = ET.SubElement(s, "lastmod") + lastmod.text = datetime.now().strftime("%Y-%m-%d") + data = b'\n' + ET.tostring(state['urlset']) + path = os.path.abspath(os.path.join(settings.MEDIA_ROOT, 'sitemap%06d.xml.gz' % state['part'])) + with open(path[:-3], 'wb') as f: + f.write(data) + with gzip.open(path, 'wb') as f: + f.write(data) + state['part'] += 1 + state['count'] = 0 + state['urlset'] = new_urlset() + + def tick(): + state['count'] += 1 + if state['count'] > 40000: + save_urlset() + + sitemap_index = ET.Element('sitemapindex') + sitemap_index.attrib['xmlns'] = "http://www.sitemaps.org/schemas/sitemap/0.9" + sitemap_index.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" + sitemap_index.attrib['xsi:schemaLocation'] = "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" + + state['urlset'] = new_urlset() + + url = ET.SubElement(state['urlset'], "url") loc = ET.SubElement(url, "loc") loc.text = absolute_url('') # always, hourly, daily, weekly, monthly, yearly, never @@ -151,9 +213,10 @@ def update_sitemap(base_url): # priority of page on site values 0.1 - 1.0 priority = ET.SubElement(url, "priority") priority.text = '1.0' + tick() for page in [s['id'] for s in settings.CONFIG['sitePages']]: - url = ET.SubElement(urlset, "url") + url = ET.SubElement(state['urlset'], "url") loc = ET.SubElement(url, "loc") loc.text = absolute_url(page) # always, hourly, daily, weekly, monthly, yearly, never @@ -162,11 +225,12 @@ def update_sitemap(base_url): # priority of page on site values 0.1 - 1.0 priority = ET.SubElement(url, "priority") priority.text = '1.0' + tick() allowed_level = settings.CONFIG['capabilities']['canSeeItem']['guest'] can_play = settings.CONFIG['capabilities']['canPlayVideo']['guest'] for i in models.Item.objects.filter(level__lte=allowed_level): - url = ET.SubElement(urlset, "url") + url = ET.SubElement(state['urlset'], "url") # URL of the page. This URL must begin with the protocol (such as http) loc = ET.SubElement(url, "loc") loc.text = absolute_url("%s/info" % i.public_id) @@ -202,11 +266,12 @@ def update_sitemap(base_url): el.text = "%s" % int(duration) el = ET.SubElement(video, "video:live") el.text = "no" + tick() # Featured Lists from itemlist.models import List for l in List.objects.filter(Q(status='featured') | Q(status='public')): - url = ET.SubElement(urlset, "url") + url = ET.SubElement(state['urlset'], "url") # URL of the page. This URL must begin with the protocol (such as http) loc = ET.SubElement(url, "loc") loc.text = absolute_url("list==%s" % quote(l.get_id())) @@ -220,10 +285,12 @@ def update_sitemap(base_url): # priority of page on site values 0.1 - 1.0 priority = ET.SubElement(url, "priority") priority.text = '1.0' if l.status == 'featured' else '0.75' + tick() + # Featured Edits from edit.models import Edit for l in Edit.objects.filter(Q(status='featured') | Q(status='public')): - url = ET.SubElement(urlset, "url") + url = ET.SubElement(state['urlset'], "url") # URL of the page. This URL must begin with the protocol (such as http) loc = ET.SubElement(url, "loc") loc.text = absolute_url(l.get_absolute_url()[1:]) @@ -237,10 +304,12 @@ def update_sitemap(base_url): # priority of page on site values 0.1 - 1.0 priority = ET.SubElement(url, "priority") priority.text = '1.0' if l.status == 'featured' else '0.75' + tick() + # Featured Collections from documentcollection.models import Collection for l in Collection.objects.filter(Q(status='featured') | Q(status='public')): - url = ET.SubElement(urlset, "url") + url = ET.SubElement(state['urlset'], "url") # URL of the page. This URL must begin with the protocol (such as http) loc = ET.SubElement(url, "loc") loc.text = absolute_url("documents/collection==%s" % quote(l.get_id())) @@ -254,10 +323,11 @@ def update_sitemap(base_url): # priority of page on site values 0.1 - 1.0 priority = ET.SubElement(url, "priority") priority.text = '1.0' if l.status == 'featured' else '0.75' + tick() from document.models import Document for d in Document.objects.filter(rightslevel=0).filter(Q(extension='html') | Q(extension='pdf')): - url = ET.SubElement(urlset, "url") + url = ET.SubElement(state['urlset'], "url") # URL of the page. This URL must begin with the protocol (such as http) loc = ET.SubElement(url, "loc") loc.text = absolute_url(d.get_id()) @@ -273,8 +343,10 @@ def update_sitemap(base_url): priority.text = '0.75' if d.collections.filter(Q(status='featured') | Q(status='public')).count(): priority.text = '1.0' - - data = b'\n' + ET.tostring(urlset) + tick() + if state['count']: + save_urlset() + data = b'\n' + ET.tostring(sitemap_index) with open(sitemap[:-3], 'wb') as f: f.write(data) with gzip.open(sitemap, 'wb') as f: diff --git a/pandora/item/urls.py b/pandora/item/urls.py index 948a1878..332da86d 100644 --- a/pandora/item/urls.py +++ b/pandora/item/urls.py @@ -16,6 +16,7 @@ urlpatterns = [ url(r'^(?P[A-Z0-9].*)/download$', views.download), url(r'^(?P[A-Z0-9].*)/download/$', views.download), url(r'^(?P[A-Z0-9].*)/download/source/(?P\d+)?$', views.download_source), + url(r'^(?P[A-Z0-9].*)/download/(?P\d+)p(?P\d+)\.(?Pwebm|ogv|mp4)$', views.download), url(r'^(?P[A-Z0-9].*)/download/(?P\d+)p\.(?Pwebm|ogv|mp4)$', views.download), #video diff --git a/pandora/item/utils.py b/pandora/item/utils.py index 38ca83ff..9d0beb66 100644 --- a/pandora/item/utils.py +++ b/pandora/item/utils.py @@ -103,3 +103,7 @@ def normalize_dict(encoding, data): elif isinstance(data, list): return [normalize_dict(encoding, value) for value in data] return data + + +def is_imdb_id(id): + return bool(len(id) >= 7 and str(id).isdigit()) diff --git a/pandora/item/views.py b/pandora/item/views.py index 02d8f924..97f8aa23 100644 --- a/pandora/item/views.py +++ b/pandora/item/views.py @@ -638,6 +638,32 @@ def edit(request, data): return render_to_json_response(response) actions.register(edit, cache=False) + +def extractClip(request, data): + ''' + Extract and cache clip + + takes { + item: string + resolution: int + format: string + in: float + out: float + } + returns { + taskId: string, // taskId + } + ''' + item = get_object_or_404_json(models.Item, public_id=data['item']) + if not item.access(request.user): + return HttpResponseForbidden() + + response = json_response() + t = tasks.extract_clip.delay(data['item'], data['in'], data['out'], data['resolution'], data['format']) + response['data']['taskId'] = t.task_id + return render_to_json_response(response) +actions.register(extractClip, cache=False) + @login_required_json def remove(request, data): ''' @@ -966,6 +992,8 @@ def download_source(request, id, part=None): raise Http404 parts = ['%s - %s ' % (item.get('title'), settings.SITENAME), item.public_id] + if len(streams) > 1: + parts.append('.Part %d' % (part + 1)) parts.append('.') parts.append(f.extension) filename = ''.join(parts) @@ -976,7 +1004,7 @@ def download_source(request, id, part=None): response['Content-Disposition'] = "attachment; filename*=UTF-8''%s" % quote(filename.encode('utf-8')) return response -def download(request, id, resolution=None, format='webm'): +def download(request, id, resolution=None, format='webm', part=None): item = get_object_or_404(models.Item, public_id=id) if not resolution or int(resolution) not in settings.CONFIG['video']['resolutions']: resolution = max(settings.CONFIG['video']['resolutions']) @@ -984,22 +1012,35 @@ def download(request, id, resolution=None, format='webm'): resolution = int(resolution) if not item.access(request.user) or not item.rendered: return HttpResponseForbidden() + if part is not None: + part = int(part) - 1 + streams = item.streams() + if part > len(streams): + raise Http404 ext = '.%s' % format parts = ['%s - %s ' % (item.get('title'), settings.SITENAME), item.public_id] if resolution != max(settings.CONFIG['video']['resolutions']): parts.append('.%dp' % resolution) + if part is not None: + parts.append('.Part %d' % (part + 1)) parts.append(ext) filename = ''.join(parts) video = NamedTemporaryFile(suffix=ext) content_type = mimetypes.guess_type(video.name)[0] - r = item.merge_streams(video.name, resolution, format) - if not r: - return HttpResponseForbidden() - elif r is True: - response = HttpResponse(FileWrapper(video), content_type=content_type) - response['Content-Length'] = os.path.getsize(video.name) + if part is None: + r = item.merge_streams(video.name, resolution, format) + if not r: + return HttpResponseForbidden() + elif r is True: + response = HttpResponse(FileWrapper(video), content_type=content_type) + response['Content-Length'] = os.path.getsize(video.name) + else: + response = HttpFileResponse(r, content_type=content_type) else: - response = HttpFileResponse(r, content_type=content_type) + stream = streams[part].get(resolution, format) + path = stream.media.path + content_type = mimetypes.guess_type(path)[0] + response = HttpFileResponse(path, content_type=content_type) response['Content-Disposition'] = "attachment; filename*=UTF-8''%s" % quote(filename.encode('utf-8')) return response @@ -1056,6 +1097,23 @@ def video(request, id, resolution, format, index=None, track=None): ext = '.%s' % format duration = stream.info['duration'] + filename = u"Clip of %s - %s-%s - %s %s%s" % ( + item.get('title'), + ox.format_duration(t[0] * 1000).replace(':', '.')[:-4], + ox.format_duration(t[1] * 1000).replace(':', '.')[:-4], + settings.SITENAME.replace('/', '-'), + item.public_id, + ext + ) + content_type = mimetypes.guess_type(path)[0] + + cache_name = '%s_%sp_%s.%s' % (item.public_id, resolution, '%s,%s' % (t[0], t[1]), format) + cache_path = os.path.join(settings.MEDIA_ROOT, item.path('cache/%s' % cache_name)) + if os.path.exists(cache_path): + response = HttpFileResponse(cache_path, content_type=content_type) + response['Content-Disposition'] = "attachment; filename*=UTF-8''%s" % quote(filename.encode('utf-8')) + return response + # multipart request beyond first part, merge parts and chop that if not index and streams.count() > 1 and stream.info['duration'] < t[1]: video = NamedTemporaryFile(suffix=ext) @@ -1065,7 +1123,6 @@ def video(request, id, resolution, format, index=None, track=None): path = video.name duration = sum(item.cache['durations']) - content_type = mimetypes.guess_type(path)[0] if len(t) == 2 and t[1] > t[0] and duration >= t[1]: # FIXME: could be multilingual here subtitles = utils.get_by_key(settings.CONFIG['layers'], 'isSubtitles', True) @@ -1076,20 +1133,12 @@ def video(request, id, resolution, format, index=None, track=None): else: srt = None response = HttpResponse(extract.chop(path, t[0], t[1], subtitles=srt), content_type=content_type) - filename = u"Clip of %s - %s-%s - %s %s%s" % ( - item.get('title'), - ox.format_duration(t[0] * 1000).replace(':', '.')[:-4], - ox.format_duration(t[1] * 1000).replace(':', '.')[:-4], - settings.SITENAME, - item.public_id, - ext - ) response['Content-Disposition'] = "attachment; filename*=UTF-8''%s" % quote(filename.encode('utf-8')) return response else: filename = "%s - %s %s%s" % ( item.get('title'), - settings.SITENAME, + settings.SITENAME.replace('/', '-'), item.public_id, ext ) @@ -1326,6 +1375,15 @@ def sitemap_xml(request): response['Content-Type'] = 'application/xml' return response +def sitemap_part_xml(request, part): + part = int(part) + sitemap = os.path.abspath(os.path.join(settings.MEDIA_ROOT, 'sitemap%06d.xml' % part)) + if not os.path.exists(sitemap): + raise Http404 + response = HttpFileResponse(sitemap) + response['Content-Type'] = 'application/xml' + return response + def item_json(request, id): level = settings.CONFIG['capabilities']['canSeeItem']['guest'] if not request.user.is_anonymous(): diff --git a/pandora/itemlist/models.py b/pandora/itemlist/models.py index e9e877bb..7d696381 100644 --- a/pandora/itemlist/models.py +++ b/pandora/itemlist/models.py @@ -271,12 +271,13 @@ class List(models.Model): self.save() for i in self.poster_frames: from item.models import Item - qs = Item.objects.filter(public_id=i['item']) - if qs.count() > 0: - if i.get('position'): - frame = qs[0].frame(i['position']) - if frame: - frames.append(frame) + if 'item' in i: + qs = Item.objects.filter(public_id=i['item']) + if qs.count() > 0: + if i.get('position'): + frame = qs[0].frame(i['position']) + if frame: + frames.append(frame) self.icon.name = self.path('icon.jpg') icon = self.icon.path if frames: diff --git a/pandora/manage.py b/pandora/manage.py index d33da6f9..700a1802 100755 --- a/pandora/manage.py +++ b/pandora/manage.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import signal import sys diff --git a/pandora/person/migrations/0002_auto_20190723_1446.py b/pandora/person/migrations/0002_auto_20190723_1446.py new file mode 100644 index 00000000..acb168e1 --- /dev/null +++ b/pandora/person/migrations/0002_auto_20190723_1446.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.22 on 2019-07-23 14:46 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('person', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='person', + name='imdbId', + field=models.CharField(blank=True, max_length=16), + ), + ] diff --git a/pandora/person/models.py b/pandora/person/models.py index 20c637c4..84fe86d6 100644 --- a/pandora/person/models.py +++ b/pandora/person/models.py @@ -38,7 +38,7 @@ class Person(models.Model): #FIXME: how to deal with aliases aliases = fields.TupleField(default=[]) - imdbId = models.CharField(max_length=7, blank=True) + imdbId = models.CharField(max_length=16, blank=True) wikipediaId = models.CharField(max_length=1000, blank=True) objects = managers.PersonManager() diff --git a/pandora/place/managers.py b/pandora/place/managers.py index ad824591..75d59b1d 100644 --- a/pandora/place/managers.py +++ b/pandora/place/managers.py @@ -141,4 +141,5 @@ class PlaceManager(Manager): user) if conditions: qs = qs.filter(conditions) + qs = qs.distinct() return qs diff --git a/pandora/place/views.py b/pandora/place/views.py index 9b9ba72f..01533b52 100644 --- a/pandora/place/views.py +++ b/pandora/place/views.py @@ -239,8 +239,8 @@ def findPlaces(request, data): qs = order_query(query['qs'], query['sort']) qs = qs.distinct() if 'keys' in data: - qs = qs.select_related('user__profile') qs = qs[query['range'][0]:query['range'][1]] + qs = qs.select_related() response['data']['items'] = [p.json(data['keys'], request.user) for p in qs] elif 'position' in query: ids = [i.get_id() for i in qs] diff --git a/pandora/settings.py b/pandora/settings.py index 1e8f1d56..83dfb602 100644 --- a/pandora/settings.py +++ b/pandora/settings.py @@ -178,6 +178,7 @@ CACHES = { AUTH_PROFILE_MODULE = 'user.UserProfile' AUTH_CHECK_USERNAME = True FFMPEG = 'ffmpeg' +FFPROBE = 'ffprobe' FFMPEG_SUPPORTS_VP9 = True FFMPEG_DEBUG = False @@ -204,6 +205,9 @@ CELERY_BROKER_URL = 'amqp://pandora:box@localhost:5672//pandora' SEND_CELERY_ERROR_EMAILS = False +# Elasticsearch +ELASTICSEARCH_HOST = None + #with apache x-sendfile or lighttpd set this to True XSENDFILE = False diff --git a/pandora/taskqueue/models.py b/pandora/taskqueue/models.py index 6a36e3bf..9dda9da1 100644 --- a/pandora/taskqueue/models.py +++ b/pandora/taskqueue/models.py @@ -183,13 +183,17 @@ class Task(models.Model): def json(self): if self.status != 'canceled': self.update() - return { + data = { 'started': self.started, 'ended': self.ended, 'status': self.status, - 'title': self.item.get('title'), - 'item': self.item.public_id, - 'user': self.user and self.user.username or '', 'id': self.public_id, + 'user': self.user and self.user.username or '', } + try: + data['title'] = self.item.get('title') + data['item'] = self.item.public_id + except: + pass + return data diff --git a/pandora/text/templates/pdf/viewer.html b/pandora/text/templates/pdf/viewer.html index 1cdd5c19..d5bcc6f5 100644 --- a/pandora/text/templates/pdf/viewer.html +++ b/pandora/text/templates/pdf/viewer.html @@ -1,4 +1,4 @@ - +