# -*- coding: utf-8 -*- # -*- Mode: Python; -*- # vi:si:et:sw=2:sts=2:ts=2 from sqlobject import * from turbogears.database import PackageHub import turbogears import re from urllib import quote, quote_plus import os from os.path import abspath, join, dirname from datetime import datetime import time import math from glob import glob import shutil import oxdb_cache import cache import oxdb_import from oxdb_utils import oxdb_title, oxdb_director, oxdb_id from subtitles import * import midentify hub = PackageHub('oxdbarchive') __connection__ = hub class Archive(SQLObject): name = UnicodeCol(length=255, alternateID=True) basePath = UnicodeCol() def _get_basePath(self): basePath = self._SO_get_basePath() if not basePath.endswith('/'): basePath = basePath + "/" self.basePath = basePath return basePath def _get_files(self): q = ArchiveFile.select(ArchiveFile.q.archiveID == self.id) return [f for f in q] def _get_file_list(self): files = {} for f in self.files: try: d = dict(md5sum = f.md5sum, size = f.size) files[f.path] = d except SQLObjectNotFound: f.destroySelf() return files def addLocalFile(self, fname, movie = None): params = oxdb_import.oxdb_file_stats(fname) params = oxdb_import.oxdb_file_metadata(params) params['date'] = datetime.fromtimestamp(params['date']) return self.addFile(params, movie) def addFile(self, params, movie = None): ''' updates or adds new file to database, params is a dict with at least md5sum, path, date but also needs audio, video, length, size, bpp for new files ''' params['path'] = params['path'].replace(self.basePath, '') q = ArchiveFile.select(AND( ArchiveFile.q.archiveID == self.id, ArchiveFile.q.md5sum == params['md5sum'], )) if q.count() > 0: '''update existing entry''' f = q[0] #FIXME: deal with double files here. right now they are changed if f.path != params['path']: ret = "this file is already in the database, first time at:\n\t%s\n\t" % f.path else: ret = "updated entry" for field in ('path', 'date'): setattr(f, field, params[field]) else: #just a new md5? happens for srt files quite often qq = ArchiveFile.select(AND( ArchiveFile.q.archiveID == self.id, ArchiveFile.q.path == params['path'], )) f = None if qq.count() == 1: f = qq[0] ret = "updated entry" else: ''' add new file to database ''' title = oxdb_title(params['path']) director = oxdb_director(params['path']) oxdb = oxdb_id(title, director) f = ArchiveFile( archive = self, path = params['path'], date = params['date'], oxdb = oxdb, md5sum = params['md5sum'], audio = params['audio'], video = params['video'], length = params['length'], size = params['size'], bpp = params['bpp'], date_added = datetime.now(), subtitle = params['path'].endswith('.srt'), ) ret = "added entry" f.updateMeta() return ret def removeFile(self, md5sum): ''' remove file based on md5sum from archive ''' q = ArchiveFile.select(AND( ArchiveFile.q.archiveID == self.id, ArchiveFile.q.md5sum == md5sum, )) if q.count() == 1: for i in q: ArchiveFile.delete(i.id) return dict(result="file removed") return dict(result="not in archive") def importFiles(self): stats = {'skipped': 0, 'added': 0, 'remove':0} print self.basePath files = oxdb_import.oxdb_spider(self.basePath) oxdb_files = self.file_list() md5sum_on_disk = [] for f in files: meta = oxdb_import.oxdb_file_stats(f) f = f.replace(base, '') if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']: stats['skipped'] += 1 md5sum_on_disk.append(oxdb_files[f]['md5sum']) else: meta = oxdb_import.oxdb_file_metadata(meta) #remove base meta['path'] = f.encode('utf-8') #ignore files changed in the last 5 minutes print self.addFile(meta), f stats['added'] += 1 md5sum_on_disk.append(meta['md5sum']) for f in oxdb_files: if oxdb_files[f]['md5sum'] not in md5sum_on_disk: print "remove", f self.removeFile({'md5sum':oxdb_files[f]['md5sum']}) stats['remove'] += 1 print stats return stats class ArchiveFile(SQLObject): ''' ALTER TABLE file_meta CHANGE size size bigint; ALTER TABLE file_meta CHANGE pixels pixels bigint; ALTER TABLE file_meta CHANGE srt srt LONGTEXT; ''' md5sum = UnicodeCol(length=128, alternateID=True) oxdb = UnicodeCol(length=128) path = UnicodeCol() date = DateTimeCol() archive = ForeignKey('Archive') audio = UnicodeCol() video = UnicodeCol() length = IntCol() size = IntCol() bpp = IntCol(default = 0) pixels = IntCol(default = 0) date_added = DateTimeCol(default = datetime.now) pubDate = DateTimeCol(default = datetime.now) modDate = DateTimeCol(default = datetime.now) height = IntCol(default = -1) width = IntCol(default = -1) frameAspect = UnicodeCol(default = "1.6", length = 100) bitrate = IntCol(default = -1) fps = IntCol(default = -1) srt = UnicodeCol(default = '') subtitle_meta_id = IntCol(default = -1) subtitle = BoolCol(default = False) part = IntCol(default = 1) broken = BoolCol(default = False) extracted = BoolCol(default = False) filename = UnicodeCol(default = '') def _get_part(self): part = 1 parts = re.compile('Part (\d)').findall(self.path) if not parts: parts = re.compile('CD (\d)').findall(self.path) if parts: part = int(parts[-1]) self.part = part return part def _get_offset(self): if not self.part: self.part = 1 if self.part == 1: return 0 length = 0 q = ArchiveFile.select(AND( ArchiveFile.q.oxdb == self.oxdb, ArchiveFile.q.part < self.part, ArchiveFile.q.subtitle == False, )) for f in q: length += f.length return length def _get_ext(self): return self.path.split('.')[-1] def _get_preferredVersion(self): e = self.nameExtra.lower() for pref in ('directors cut', 'long version'): if pref in e: return True return False def _get_nameExtra(self): path = os.path.basename(self.path) parts = path.replace(self.title, '').split('.')[:-1] parts = filter(lambda x: not x.startswith('Part'), parts) parts = filter(lambda x: not x.startswith('Season'), parts) parts = filter(lambda x: not x.startswith('Episode'), parts) parts = filter(lambda x: not x.startswith('vo'), parts) extra = '.'.join(parts) if extra.startswith('.'): extra = extra[1:] return extra def _get_title(self): return oxdb_title(self.path) def _get_director(self): return oxdb_director(self.path) def _get_absolutePath(self): return join(self.archive.basePath, self.path) def updateMeta(self): self.findSubtitleLink() if os.path.exists(self.absolutePath): info = midentify.identify(self.absolutePath) self.length = info['length'] self.width = info['width'] self.frameAspect = "%0.6f" % info['aspect'] self.height = info['height'] self.bitrate = info['video_bitrate'] self.fps = info['fps'] self.audio = info['audio_codec'] self.video = info['video_codec'] self.updatePixels() self.updateBpp() self.loadSubtitleFromFile() self.oxdb = oxdb_id(self.title, self.director) def _get_frameAspect(self): aspect = float(self._SO_get_frameAspect()) if self.subtitle: return 1 if aspect == -1: if self.height: aspect = float(self.width) / self.height else: aspect = 16.0 / 10 self.frameAspect = "%0.6f" % aspect return aspect def _get_sceneHeight(self): default = 80 if not self.subtitle: h = int(128 / self.frameAspect) h = h + h % 2 return h return default def _get_movieFile(self): if self.subtitle and self.subtitle_meta_id>0: try: m = ArchiveFile.get(self.subtitle_meta_id) except: m = None self.subtitle_meta_id = -1 self.srt = '' return m return None def _get_subtitleFile(self): if not self.subtitle and self.subtitle_meta_id>0: try: s = ArchiveFile.get(self.subtitle_meta_id) except: s = None self.subtitle_meta_id = -1 self.srt = '' return s return None def findSubtitleLink(self): subtitle = not self.subtitle q = ArchiveFile.select(AND( ArchiveFile.q.oxdb == self.oxdb, ArchiveFile.q.part == self.part, ArchiveFile.q.subtitle == subtitle, )) self.subtitle_meta_id = -1 if q.count(): for f in q: if not f.path.endswith('.sub'): if f.nameExtra == self.nameExtra or f.nameExtra == 'en': self.subtitle_meta_id = f.id def _get_mini_movie_file(self): return join(oxdb_cache.mini_movie_folder, self.md5sum[:4], "%s.avi" % self.md5sum) def removeMiniMovie(self): if os.path.exists(self.mini_movie_file): os.remove(self.mini_movie_file) def _findSubtitleByStart(self, start): if self.srt: d = srt2dict(self.srt) for s in d.values(): if s['start'] == start: return s return None def extractAll(self, force = False): self.updateMeta() self.extractClipMovie() self.extractTimeline() def extractClip(self, inpoint, outpoint=-1, flash_folder=oxdb_cache.frame_cache_root): movie_file = self.mini_movie_file flash_folder = join(flash_folder, self.oxdb) flash_movie = join(flash_folder, "%s.flv" % inpoint.replace(':', '.')) if not os.path.exists(flash_folder): os.makedirs(flash_folder) width = 128 height = int(width / (self.width / self.height)) height = height - height % 2 if outpoint == -1: s = self._findSubtitleByStart(inpoint) if s: outpoint = s['stop'] else: outpoint = shift_time(2000, inpoint) if self.part > 1: offset = self.offset extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0) #extract_flash_ng(self.absolutePath, flash_movie, inpoint, outpoint, width, height, offset) def extractFrame(self, position, img_folder=oxdb_cache.frame_cache_root): if self.movieFile: return self.movieFile.extractFrame(position, img_folder) movie_file = self.mini_movie_file img_folder = join(img_folder, self.oxdb) if not os.path.exists(img_folder): os.makedirs(img_folder) extract_frame(movie_file, position, img_folder, offset = 0, redo = False) def extractFrames(self, img_folder=oxdb_cache.frame_cache_root): if self.movieFile: return self.movieFile.extractFrames(img_folder) movie_file = self.absolutePath img_folder = join(img_folder, self.oxdb) if not os.path.exists(img_folder): os.makedirs(img_folder) extract_subtitles(movie_file, self.srt.encode('utf-8'), img_folder, width=100, offset=self.offset) def extractClipMovie(self, force = False): if self.broken: return mini_movie_file = self.mini_movie_file movie_file = self.absolutePath if not movie_file or not os.path.exists(movie_file): return if os.path.exists(mini_movie_file): print "clip exists, skipping extraction", mini_movie_file return if not os.path.exists(dirname(mini_movie_file)): os.makedirs(dirname(mini_movie_file)) options = '' options += " -ovc lavc -lavcopts vcodec=mjpeg" options += " -af volnorm=1 -oac mp3lame -lameopts br=64:mode=3 -af resample=44100" options += " -vf scale -zoom -xy 128" options += ' "%s"' % movie_file options += ' -o "%s"' % mini_movie_file cmd = "mencoder %s >/dev/null 2>&1" % options print cmd.encode('utf-8') os.system(cmd.encode('utf-8')) def _get_timelineFile(self): return join(oxdb_cache.cache_root, 'timeline', self.md5sum[:4], "%s.png" % self.md5sum) def removeTimeline(self): if os.path.exists(self.timelineFile): os.unlink(self.timelineFile) def extractTimeline(self, force = False): if self.broken: return if force: self.removeTimeline() #return if its not a video if self.height <= 0: return t = self.timelineFile if os.path.exists(self.mini_movie_file): if not os.path.exists(t): os.makedirs(os.path.dirname(t)) #lets only extract the timeline if it does not exist yet if os.path.exists(t): print "skipping, ", self.path return extractTimelineScript = abspath(join(dirname(__file__), "tools/extract_timeline.py")) cmd = "python %s %s %s" %(extractTimelineScript, t, self.mini_movie_file) os.system(cmd) def loadSubtitleFromFile(self): if self.movieFile: movieFile = self.movieFile subtitle = self else: movieFile = self subtitle = self.subtitleFile if movieFile: movieFile.srt = '' if subtitle and movieFile: if not subtitle.absolutePath or not os.path.exists(subtitle.absolutePath): return if not subtitle.absolutePath.endswith('.srt'): print "this is not a subtitle", subtitle.absolutePath return movieFile.srt = loadSrt(subtitle.absolutePath) def _set_fps(self, fps): fps = int(fps * 10000) self._SO_set_fps(fps) def _get_fps(self): fps = self._SO_get_fps() if fps: return float(fps) / 10000 return 0.0 def _get_resolution(self): if self.subtitle or (not self.width or not self.height): return u'' return "%sx%s" % (self.width, self.height) def updateBpp(self): if self.height and self.width and self.fps and self.bitrate: bpp = int(self.height * self.width * self.fps / self.bitrate) self.bpp = bpp else: bpp = 0 return bpp def updatePixels(self): if self.length and self.fps and self.width and self.height: pixels = int((self.length / 1000) * self.fps * self.width * self.height) self.pixels = pixels else: pixels = 0 return pixels def _get_pixels(self): pixels = self._SO_get_pixels() if not pixels: pixels = self.updatePixels() return pixels def clip(self, position): return cache.loadClip(self, position) def frame(self, position): return cache.loadFrame(self, position) def timeline(self): return cache.loadTimeline(self)