oxdbarchive/oxdbarchive/model.py

500 lines
14 KiB
Python

# -*- coding: utf-8 -*-
# -*- Mode: Python; -*-
# vi:si:et:sw=2:sts=2:ts=2
from sqlobject import *
from turbogears.database import PackageHub
import turbogears
import re
from urllib import quote, quote_plus
import os
from os.path import abspath, join, dirname
from datetime import datetime
import time
import math
from glob import glob
import shutil
import oxdb_cache
import cache
import oxdb_import
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id
from subtitles import *
import midentify
hub = PackageHub('oxdbarchive')
__connection__ = hub
class Archive(SQLObject):
name = UnicodeCol(length=255, alternateID=True)
basePath = UnicodeCol()
def _get_basePath(self):
basePath = self._SO_get_basePath()
if not basePath.endswith('/'):
basePath = basePath + "/"
self.basePath = basePath
return basePath
def _get_files(self):
q = ArchiveFile.select(ArchiveFile.q.archiveID == self.id)
return [f for f in q]
def _get_file_list(self):
files = {}
for f in self.files:
try:
d = dict(md5sum = f.md5sum, size = f.size)
files[f.path] = d
except SQLObjectNotFound:
f.destroySelf()
return files
def addLocalFile(self, fname, movie = None):
params = oxdb_import.oxdb_file_stats(fname)
params = oxdb_import.oxdb_file_metadata(params)
params['date'] = datetime.fromtimestamp(params['date'])
return self.addFile(params, movie)
def addFile(self, params, movie = None):
'''
updates or adds new file to database,
params is a dict with at least md5sum, path, date but also needs
audio, video, length, size, bpp for new files
'''
params['path'] = params['path'].replace(self.basePath, '')
q = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.md5sum == params['md5sum'],
))
if q.count() > 0:
'''update existing entry'''
f = q[0]
#FIXME: deal with double files here. right now they are changed
if f.path != params['path']:
ret = "this file is already in the database, first time at:\n\t%s\n\t" % f.path
else:
ret = "updated entry"
for field in ('path', 'date'):
setattr(f, field, params[field])
else:
#just a new md5? happens for srt files quite often
qq = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.path == params['path'],
))
f = None
if qq.count() == 1:
f = qq[0]
ret = "updated entry"
else:
''' add new file to database '''
title = oxdb_title(params['path'])
director = oxdb_director(params['path'])
oxdb = oxdb_id(title, director)
f = ArchiveFile(
archive = self,
path = params['path'],
date = params['date'],
oxdb = oxdb,
md5sum = params['md5sum'],
audio = params['audio'],
video = params['video'],
length = params['length'],
size = params['size'],
bpp = params['bpp'],
date_added = datetime.now(),
subtitle = params['path'].endswith('.srt'),
)
ret = "added entry"
f.updateMeta()
return ret
def removeFile(self, md5sum):
'''
remove file based on md5sum from archive
'''
q = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.md5sum == md5sum,
))
if q.count() == 1:
for i in q:
ArchiveFile.delete(i.id)
return dict(result="file removed")
return dict(result="not in archive")
def importFiles(self):
stats = {'skipped': 0, 'added': 0, 'remove':0}
print self.basePath
files = oxdb_import.oxdb_spider(self.basePath)
oxdb_files = self.file_list()
md5sum_on_disk = []
for f in files:
meta = oxdb_import.oxdb_file_stats(f)
f = f.replace(base, '')
if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
stats['skipped'] += 1
md5sum_on_disk.append(oxdb_files[f]['md5sum'])
else:
meta = oxdb_import.oxdb_file_metadata(meta)
#remove base
meta['path'] = f.encode('utf-8')
#ignore files changed in the last 5 minutes
print self.addFile(meta), f
stats['added'] += 1
md5sum_on_disk.append(meta['md5sum'])
for f in oxdb_files:
if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
print "remove", f
self.removeFile({'md5sum':oxdb_files[f]['md5sum']})
stats['remove'] += 1
print stats
return stats
class ArchiveFile(SQLObject):
'''
ALTER TABLE file_meta CHANGE size size bigint;
ALTER TABLE file_meta CHANGE pixels pixels bigint;
ALTER TABLE file_meta CHANGE srt srt LONGTEXT;
'''
md5sum = UnicodeCol(length=128, alternateID=True)
oxdb = UnicodeCol(length=128)
path = UnicodeCol()
date = DateTimeCol()
archive = ForeignKey('Archive')
audio = UnicodeCol()
video = UnicodeCol()
length = IntCol()
size = IntCol()
bpp = IntCol(default = 0)
pixels = IntCol(default = 0)
date_added = DateTimeCol(default = datetime.now)
pubDate = DateTimeCol(default = datetime.now)
modDate = DateTimeCol(default = datetime.now)
height = IntCol(default = -1)
width = IntCol(default = -1)
frameAspect = UnicodeCol(default = "1.6", length = 100)
bitrate = IntCol(default = -1)
fps = IntCol(default = -1)
srt = UnicodeCol(default = '')
subtitle_meta_id = IntCol(default = -1)
subtitle = BoolCol(default = False)
part = IntCol(default = 1)
broken = BoolCol(default = False)
extracted = BoolCol(default = False)
filename = UnicodeCol(default = '')
def _get_part(self):
part = 1
parts = re.compile('Part (\d)').findall(self.path)
if not parts:
parts = re.compile('CD (\d)').findall(self.path)
if parts:
part = int(parts[-1])
self.part = part
return part
def _get_offset(self):
if not self.part:
self.part = 1
if self.part == 1:
return 0
length = 0
q = ArchiveFile.select(AND(
ArchiveFile.q.oxdb == self.oxdb,
ArchiveFile.q.part < self.part,
ArchiveFile.q.subtitle == False,
))
for f in q:
length += f.length
return length
def _get_ext(self):
return self.path.split('.')[-1]
def _get_preferredVersion(self):
e = self.nameExtra.lower()
for pref in ('directors cut', 'long version'):
if pref in e:
return True
return False
def _get_nameExtra(self):
path = os.path.basename(self.path)
parts = path.replace(self.title, '').split('.')[:-1]
parts = filter(lambda x: not x.startswith('Part'), parts)
parts = filter(lambda x: not x.startswith('Season'), parts)
parts = filter(lambda x: not x.startswith('Episode'), parts)
parts = filter(lambda x: not x.startswith('vo'), parts)
extra = '.'.join(parts)
if extra.startswith('.'):
extra = extra[1:]
return extra
def _get_title(self):
return oxdb_title(self.path)
def _get_director(self):
return oxdb_director(self.path)
def _get_absolutePath(self):
return join(self.archive.basePath, self.path)
def updateMeta(self):
self.findSubtitleLink()
if os.path.exists(self.absolutePath):
info = midentify.identify(self.absolutePath)
self.length = info['length']
self.width = info['width']
self.frameAspect = "%0.6f" % info['aspect']
self.height = info['height']
self.bitrate = info['video_bitrate']
self.fps = info['fps']
self.audio = info['audio_codec']
self.video = info['video_codec']
self.updatePixels()
self.updateBpp()
self.loadSubtitleFromFile()
self.oxdb = oxdb_id(self.title, self.director)
def _get_frameAspect(self):
aspect = float(self._SO_get_frameAspect())
if self.subtitle:
return 1
if aspect == -1:
if self.height:
aspect = float(self.width) / self.height
else:
aspect = 16.0 / 10
self.frameAspect = "%0.6f" % aspect
return aspect
def _get_sceneHeight(self):
default = 80
if not self.subtitle:
h = int(128 / self.frameAspect)
h = h + h % 2
return h
return default
def _get_movieFile(self):
if self.subtitle and self.subtitle_meta_id>0:
try:
m = ArchiveFile.get(self.subtitle_meta_id)
except:
m = None
self.subtitle_meta_id = -1
self.srt = ''
return m
return None
def _get_subtitleFile(self):
if not self.subtitle and self.subtitle_meta_id>0:
try:
s = ArchiveFile.get(self.subtitle_meta_id)
except:
s = None
self.subtitle_meta_id = -1
self.srt = ''
return s
return None
def findSubtitleLink(self):
subtitle = not self.subtitle
q = ArchiveFile.select(AND(
ArchiveFile.q.oxdb == self.oxdb,
ArchiveFile.q.part == self.part,
ArchiveFile.q.subtitle == subtitle,
))
self.subtitle_meta_id = -1
if q.count():
for f in q:
if not f.path.endswith('.sub'):
if f.nameExtra == self.nameExtra or f.nameExtra == 'en':
self.subtitle_meta_id = f.id
def _get_mini_movie_file(self):
return join(oxdb_cache.mini_movie_folder, self.md5sum[:4], "%s.avi" % self.md5sum)
def removeMiniMovie(self):
if os.path.exists(self.mini_movie_file):
os.remove(self.mini_movie_file)
def _findSubtitleByStart(self, start):
if self.srt:
d = srt2dict(self.srt)
for s in d.values():
if s['start'] == start:
return s
return None
def extractAll(self, force = False):
self.updateMeta()
self.extractClipMovie()
self.extractTimeline()
def extractClip(self, inpoint, outpoint=-1, flash_folder=oxdb_cache.frame_cache_root):
movie_file = self.mini_movie_file
flash_folder = join(flash_folder, self.oxdb)
flash_movie = join(flash_folder, "%s.flv" % inpoint.replace(':', '.'))
if not os.path.exists(flash_folder):
os.makedirs(flash_folder)
width = 128
height = int(width / (self.width / self.height))
height = height - height % 2
if outpoint == -1:
s = self._findSubtitleByStart(inpoint)
if s:
outpoint = s['stop']
else:
outpoint = shift_time(2000, inpoint)
if self.part > 1:
offset = self.offset
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
#extract_flash_ng(self.absolutePath, flash_movie, inpoint, outpoint, width, height, offset)
def extractFrame(self, position, img_folder=oxdb_cache.frame_cache_root):
if self.movieFile:
return self.movieFile.extractFrame(position, img_folder)
movie_file = self.mini_movie_file
img_folder = join(img_folder, self.oxdb)
if not os.path.exists(img_folder):
os.makedirs(img_folder)
extract_frame(movie_file, position, img_folder, offset = 0, redo = False)
def extractFrames(self, img_folder=oxdb_cache.frame_cache_root):
if self.movieFile:
return self.movieFile.extractFrames(img_folder)
movie_file = self.absolutePath
img_folder = join(img_folder, self.oxdb)
if not os.path.exists(img_folder):
os.makedirs(img_folder)
extract_subtitles(movie_file, self.srt.encode('utf-8'), img_folder, width=100, offset=self.offset)
def extractClipMovie(self, force = False):
if self.broken:
return
mini_movie_file = self.mini_movie_file
movie_file = self.absolutePath
if not movie_file or not os.path.exists(movie_file):
return
if os.path.exists(mini_movie_file):
print "clip exists, skipping extraction", mini_movie_file
return
if not os.path.exists(dirname(mini_movie_file)):
os.makedirs(dirname(mini_movie_file))
options = ''
options += " -ovc lavc -lavcopts vcodec=mjpeg"
options += " -af volnorm=1 -oac mp3lame -lameopts br=64:mode=3 -af resample=44100"
options += " -vf scale -zoom -xy 128"
options += ' "%s"' % movie_file
options += ' -o "%s"' % mini_movie_file
cmd = "mencoder %s >/dev/null 2>&1" % options
print cmd.encode('utf-8')
os.system(cmd.encode('utf-8'))
def _get_timelineFile(self):
return join(oxdb_cache.cache_root, 'timeline', self.md5sum[:4], "%s.png" % self.md5sum)
def removeTimeline(self):
if os.path.exists(self.timelineFile):
os.unlink(self.timelineFile)
def extractTimeline(self, force = False):
if self.broken:
return
if force:
self.removeTimeline()
#return if its not a video
if self.height <= 0:
return
t = self.timelineFile
if os.path.exists(self.mini_movie_file):
if not os.path.exists(t):
os.makedirs(os.path.dirname(t))
#lets only extract the timeline if it does not exist yet
if os.path.exists(t):
print "skipping, ", self.path
return
extractTimelineScript = abspath(join(dirname(__file__), "tools/extract_timeline.py"))
cmd = "python %s %s %s" %(extractTimelineScript, t, self.mini_movie_file)
os.system(cmd)
def loadSubtitleFromFile(self):
if self.movieFile:
movieFile = self.movieFile
subtitle = self
else:
movieFile = self
subtitle = self.subtitleFile
if movieFile:
movieFile.srt = ''
if subtitle and movieFile:
if not subtitle.absolutePath or not os.path.exists(subtitle.absolutePath):
return
if not subtitle.absolutePath.endswith('.srt'):
print "this is not a subtitle", subtitle.absolutePath
return
movieFile.srt = loadSrt(subtitle.absolutePath)
def _set_fps(self, fps):
fps = int(fps * 10000)
self._SO_set_fps(fps)
def _get_fps(self):
fps = self._SO_get_fps()
if fps:
return float(fps) / 10000
return 0.0
def _get_resolution(self):
if self.subtitle or (not self.width or not self.height):
return u''
return "%sx%s" % (self.width, self.height)
def updateBpp(self):
if self.height and self.width and self.fps and self.bitrate:
bpp = int(self.height * self.width * self.fps / self.bitrate)
self.bpp = bpp
else:
bpp = 0
return bpp
def updatePixels(self):
if self.length and self.fps and self.width and self.height:
pixels = int((self.length / 1000) * self.fps * self.width * self.height)
self.pixels = pixels
else:
pixels = 0
return pixels
def _get_pixels(self):
pixels = self._SO_get_pixels()
if not pixels:
pixels = self.updatePixels()
return pixels
def clip(self, position):
return cache.loadClip(self, position)
def frame(self, position):
return cache.loadFrame(self, position)
def timeline(self):
return cache.loadTimeline(self)