diff --git a/pandora/archive/external.py b/pandora/archive/external.py index 69179e4f..4555f53f 100644 --- a/pandora/archive/external.py +++ b/pandora/archive/external.py @@ -97,17 +97,6 @@ def download(item_id, url): tmp = tmp.decode('utf-8') os.chdir(tmp) cmd = ['youtube-dl', '-q', media['url']] - if settings.CONFIG['video'].get('reuseUload', False): - max_resolution = max(settings.CONFIG['video']['resolutions']) - format = settings.CONFIG['video']['formats'][0] - if format == 'mp4': - cmd += [ - '-f', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/bestvideo+bestaudio', - '--merge-output-format', 'mp4' - ] - elif format == 'webm': - cmd += ['--merge-output-format', 'webm'] - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) diff --git a/pandora/archive/extract.py b/pandora/archive/extract.py index 872ee7ea..f85c464e 100644 --- a/pandora/archive/extract.py +++ b/pandora/archive/extract.py @@ -698,99 +698,3 @@ def chop(video, start, end, subtitles=None, dest=None, encode=False): return f else: return None - -def has_faststart(path): - cmd = [settings.FFPROBE, '-v', 'trace', '-i', path] - p = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - close_fds=True) - stdout, stderr = p.communicate() - moov = "type:'moov'" - mdat = "type:'mdat'" - blocks = [b for b in stdout.decode().split('\n') if moov in b or mdat in b] - if blocks and moov in blocks[0]: - return True - return False - -def remux_stream(src, dst): - info = ox.avinfo(src) - if info.get('audio'): - audio = ['-c:a', 'copy'] - else: - audio = [] - if info.get('video'): - video = ['-c:v', 'copy'] - else: - video = [] - cmd = [ - settings.FFMPEG, - '-nostats', '-loglevel', 'error', - '-map_metadata', '-1', '-sn', - '-i', src, - ] + video + [ - ] + audio + [ - '-movflags', '+faststart', - dst - ] - p = subprocess.Popen(cmd, stdin=subprocess.PIPE, - stdout=open('/dev/null', 'w'), - stderr=open('/dev/null', 'w'), - close_fds=True) - p.wait() - return True, None - - -def ffprobe(path, *args): - cmd = [settings.FFPROBE, '-loglevel', 'error', '-print_format', 'json', '-i', path] + list(args) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) - stdout, stderr = p.communicate() - return json.loads(stdout.decode()) - - -def get_chapters(path): - info = ffprobe(path, '-show_chapters') - chapters = [] - n = 0 - for chapter in info.get('chapters', []): - n += 1 - chapters.append({ - 'in': chapter['start_time'], - 'out': chapter['end_time'], - 'value': chapter.get('tags', {}).get('title', 'Chapter %s' % n) - }) - return chapters - -def get_text_subtitles(path): - subtitles = [] - for stream in ffprobe(path, '-show_streams')['streams']: - if stream.get('codec_name') in ('subrip', 'aas', 'text'): - subtitles.append({ - 'index': stream['index'], - 'language': stream['tags']['language'], - }) - return subtitles - -def has_img_subtitles(path): - subtitles = [] - for stream in ffprobe(path, '-show_streams')['streams']: - if stream.get('codec_type') == 'subtitle' and stream.get('codec_name') in ('dvbsub', 'pgssub'): - subtitles.append({ - 'index': stream['index'], - 'language': stream['tags']['language'], - }) - return subtitles - -def extract_subtitles(path, language=None): - extra = [] - if language: - tracks = get_text_subtitles(path) - track = [t for t in tracks if t['language'] == language] - if track: - extra = ['-map', '0:%s' % track[0]['index']] - else: - raise Exception("unknown language: %s" % language) - cmd = ['ffmpeg', '-loglevel', 'error', '-i', path] + extra + ['-f', 'srt', '-'] - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) - stdout, stderr = p.communicate() - return ox.srt.loads(stdout.decode()) diff --git a/pandora/archive/models.py b/pandora/archive/models.py index 75d25de7..199c90b1 100644 --- a/pandora/archive/models.py +++ b/pandora/archive/models.py @@ -376,50 +376,6 @@ class File(models.Model): return save_chunk(stream, stream.media, chunk, offset, name, done_cb) return False, 0 - def extract_text_data(self): - if self.data: - for sub in extract.get_text_subtitles(self.data.path): - srt = extract.extract_subtitles(self.data.path, sub['language']) - # fixme add subtitles, possibly with language! - chapters = extract.get_chapters(self.data.path) - if chapters: - # fixme add chapters as notes - pass - - def get_codec(self, type): - track = self.info.get(type) - if track: - return track[0].get('codec') - - MP4_VCODECS = ['h264'] - MP4_ACODECS = ['aac', None] - WEBM_VCODECS = ['vp8', 'vp9'] - WEBM_ACODECS = ['vorbis', 'opus', None] - - def can_remux(self): - config = settings.CONFIG['video'] - height = self.info['video'][0]['height'] if self.info.get('video') else None - max_resolution = max(config['resolutions']) - if height <= max_resolution and self.extension in ('mov', 'mkv', 'mp4', 'm4v'): - vcodec = self.get_codec('video') - acodec = self.get_codec('audio') - if vcodec in self.MP4_VCODECS and acodec in self.MP4_ACODECS: - return True - return False - - def can_stream(self): - config = settings.CONFIG['video'] - height = self.info['video'][0]['height'] if self.info.get('video') else None - max_resolution = max(config['resolutions']) - if height <= max_resolution and config['formats'][0] == self.extension: - vcodec = self.get_codec('video') - acodec = self.get_codec('audio') - if self.extension in ['mp4', 'm4v'] and vcodec in self.MP4_VCODECS and acodec in self.MP4_ACODECS: - return extract.has_faststart(self.data.path) - elif self.extension == 'webm' and vcodec in self.WEBM_VCODECS and acodec in self.WEBM_ACODECS: - return True - return False - def stream_resolution(self): config = settings.CONFIG['video'] height = self.info['video'][0]['height'] if self.info.get('video') else None @@ -791,27 +747,13 @@ class Stream(models.Model): derivative.encode() def encode(self): - reuse = settings.CONFIG['video'].get('reuseUpload', False) media = self.source.media.path if self.source else self.file.data.path + if not self.media: self.media.name = self.path(self.name()) target = self.media.path info = ox.avinfo(media) - - done = False - if reuse and not self.source: - if self.file.can_stream(): - ok, error = True, None - ox.makedirs(os.path.dirname(target)) - shutil.move(self.file.data.path, target) - self.file.data.name = '' - self.file.save() - elif self.file.can_remux(): - ok, error = extract.remux_stream(media, target) - done = True - if not done: - ok, error = extract.stream(media, target, self.name(), info, flags=self.flags) - + ok, error = extract.stream(media, target, self.name(), info, flags=self.flags) # file could have been moved while encoding # get current version from db and update self.refresh_from_db() diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py index d64cd258..3990b63d 100644 --- a/pandora/document/fulltext.py +++ b/pandora/document/fulltext.py @@ -27,11 +27,10 @@ class FulltextMixin: return es def extract_fulltext(self): - if self.file: - if self.extension == 'pdf': - return extract_text(self.file.path) - elif self.extension in ('png', 'jpg'): - return ocr_image(self.file.path) + if self.extension == 'pdf': + return extract_text(self.file.path) + elif self.extension in ('png', 'jpg'): + return ocr_image(self.file.path) elif self.extension == 'html': return self.data.get('text', '') return '' @@ -39,17 +38,13 @@ class FulltextMixin: def delete_fulltext(self): res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id) - def has_fulltext_key(): - return bool([k for k in settings.CONFIG['documentKeys'] if k.get('fulltext')]) - def update_fulltext(self): - if self.has_fulltext_key(): - text = self.extract_fulltext() - if text: - doc = { - 'text': text.lower() - } - res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc) + text = self.extract_fulltext() + if text: + doc = { + 'text': text.lower() + } + res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc) @classmethod def find_fulltext(cls, query): diff --git a/pandora/settings.py b/pandora/settings.py index 83dfb602..e4f46698 100644 --- a/pandora/settings.py +++ b/pandora/settings.py @@ -178,7 +178,6 @@ CACHES = { AUTH_PROFILE_MODULE = 'user.UserProfile' AUTH_CHECK_USERNAME = True FFMPEG = 'ffmpeg' -FFPROBE = 'ffprobe' FFMPEG_SUPPORTS_VP9 = True FFMPEG_DEBUG = False