oxdbarchive/oxdbarchive/model.py
2008-10-13 14:21:13 +02:00

792 lines
24 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
from datetime import datetime
from glob import glob
from os.path import abspath, join, dirname, exists
from urllib import quote, quote_plus
import gzip
import math
import os
import re
import shutil
import socket
import StringIO
import time
import urllib2
from sqlobject import *
from turbogears.database import PackageHub
import turbogears
import simplejson
from oxlib.net import getUrl
import oxlib
import subtitles
import cache
import oxdb_import
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
from extract import *
import midentify
def debug(string):
#print string
return
hub = PackageHub('oxdbarchive')
__connection__ = hub
class Archive(SQLObject):
name = UnicodeCol(length=255, alternateID=True)
basePath = UnicodeCol()
baseUrlFrontend = UnicodeCol(default = '')
published = DateTimeCol(default=datetime.now)
modified = DateTimeCol(default=datetime.now)
def _get_basePath(self):
basePath = self._SO_get_basePath()
if not basePath.endswith('/'):
basePath = basePath + "/"
self.basePath = basePath
return basePath
def findNew(self):
#only update archive if not modified for more than 2 hours
if self.isOnline():
if self.sinceLastModification() > 60*60*2:
self.importFiles()
else:
print "ignoring %s, was changed less than 2 hours ago" % self.basePath
else:
print "WARNING %s, is not online, check power and disk" % self.basePath
def isOnline(self):
r = os.system('find "%s" >/dev/null 2>&1' % self.basePath)
if r:
return False
r = glob("%s*" % self.basePath)
if not r:
return False
return True
def sinceLastModification(self):
lastModification = max([os.stat(f).st_mtime for f in glob('%s*/*'% self.basePath)])
return time.time() - lastModification
def notifyFrontend(self, action, sha1sum):
if self.baseUrlFrontend:
dto = socket.getdefaulttimeout()
socket.setdefaulttimeout(256)
url = "%s/%s?sha1sum=%s" % (self.baseUrlFrontend, action, sha1sum)
#print url
try:
result = getUrl(url)
except urllib2.HTTPError, e:
print "ERROR:", e.code
data = e.read()
if e.headers.get('content-encoding', None) == 'gzip':
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
print data
raise
except:
import traceback
traceback.print_exc()
raise
#print "Frontend:", result['result']
socket.setdefaulttimeout(dto)
def _get_files(self):
q = ArchiveFile.select(ArchiveFile.q.archiveID == self.id)
return [f for f in q]
def _get_file_list(self):
files = {}
for f in self.files:
try:
d = dict(sha1sum = f.sha1sum, size = f.size)
files[f.path] = d
except SQLObjectNotFound:
f.destroySelf()
return files
def addLocalFile(self, fname, movie = None):
params = oxdb_import.oxdb_file_stats(fname)
params = oxdb_import.oxdb_file_metadata(params)
params['date'] = datetime.fromtimestamp(params['date'])
return self.addFile(params, movie)
def addFile(self, params, movie = None):
'''
updates or adds new file to database,
params is a dict with at least sha1sum, path, date but also needs
audio, video, length, size, bpp for new files
'''
params['path'] = params['path'].replace(self.basePath, u'')
q = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.sha1sum == params['sha1sum'],
))
if q.count() > 0:
'''update existing entry'''
f = q[0]
#FIXME: deal with double files here. right now they are changed
if f.path != params['path']:
ret = "this file is already in the database, first time at:\n\t%s\n\t" % f.path
else:
ret = "updated entry"
for field in ('path', 'date'):
setattr(f, field, params[field])
else:
#just a new md5? happens for srt files quite often
qq = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.path == params['path'].encode('utf-8'),
))
f = None
if qq.count() == 1:
f = qq[0]
ret = "updated entry"
else:
''' add new file to database '''
title = oxdb_title(params['path'])
director = oxdb_director(params['path'])
oxdb = oxdb_id(title, director)
f = ArchiveFile(
archive = self,
path = params['path'],
date = params['date'],
oxdb = oxdb,
sha1sum = params['sha1sum'],
audio = params['audio'],
video = params['video'],
length = params['length'],
size = params['size'],
bpp = params['bpp'],
date_added = datetime.now(),
subtitle = params['path'].endswith('.srt'),
)
ret = "added entry"
f.updateMeta()
f.extractAll()
f.modified = datetime.now()
return ret.encode('utf-8')
def removeFile(self, sha1sum):
'''
remove file based on sha1sum from archive
'''
q = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.sha1sum == sha1sum,
))
if q.count() == 1:
for i in q:
ArchiveFile.delete(i.id)
self.notifyFrontend('remove', sha1sum)
return dict(result="file removed")
return dict(result="not in archive")
def importFiles(self):
stats = {'skipped': 0, 'added': 0, 'remove':0}
print self.basePath
files = oxdb_import.oxdb_spider(self.basePath)
oxdb_files = self.file_list
sha1sum_on_disk = []
for f in files:
meta = oxdb_import.oxdb_file_stats(f)
f = f.replace(self.basePath, '')
if f in oxdb_files and oxdb_files[f]['size'] == meta['size']:
stats['skipped'] += 1
sha1sum_on_disk.append(oxdb_files[f]['sha1sum'])
else:
meta = oxdb_import.oxdb_file_metadata(meta)
#FIXME: check input
for key in ('bpp', 'size', 'length', 'date'):
meta[key] = int(float(meta[key]))
meta['date'] = datetime.fromtimestamp(meta['date'])
print self.addFile(meta), f.encode('utf-8')
stats['added'] += 1
sha1sum_on_disk.append(meta['sha1sum'])
self.notifyFrontend('add', meta['sha1sum'])
for f in oxdb_files:
if oxdb_files[f]['sha1sum'] not in sha1sum_on_disk:
print "remove", f.encode('utf-8')
self.removeFile(oxdb_files[f]['sha1sum'])
stats['remove'] += 1
print stats
print "updating information on frontend"
self.updateFrontend()
return stats
'''
Interaction with frontend,
- update send modified files since last sync.
- bootstrap send all files
- sync get list from frontend and remove/add those that are not in sync
'''
def updateFrontend(self):
for f in ArchiveFile.select(ArchiveFile.q.published >= self.published):
self.notifyFrontend('add', f.sha1sum)
self.published=datetime.now()
def bootstrapFrontend(self):
for f in self.files:
self.notifyFrontend('add', f.sha1sum)
def syncFrontend(self, addOnly=False):
dto = socket.getdefaulttimeout()
socket.setdefaulttimeout(256)
data = getUrl("%s/list" % self.baseUrlFrontend)
sha1sums = simplejson.loads(data)['files']
socket.setdefaulttimeout(dto)
for sha1sum in sha1sums:
try:
f = ArchiveFile.bySha1sum(sha1sum)
except SQLObjectNotFound:
self.notifyFrontend('remove', sha1sum)
if not addOnly:
for f in ArchiveFile.select(NOT(IN(ArchiveFile.q.sha1sum, sha1sums))):
self.notifyFrontend('add', f.sha1sum)
class ArchiveFile(SQLObject):
'''
ALTER TABLE archive_file CHANGE size size bigint;
ALTER TABLE archive_file CHANGE pixels pixels bigint;
ALTER TABLE archive_file CHANGE srt srt LONGTEXT;
'''
md5sum = UnicodeCol(length=128, alternateID=True)
sha1sum = UnicodeCol(length=40, alternateID=True)
oxdb = UnicodeCol(length=128)
path = UnicodeCol()
date = DateTimeCol()
archive = ForeignKey('Archive')
audio = UnicodeCol()
video = UnicodeCol()
length = IntCol()
size = IntCol()
bpp = FloatCol(default = -1)
pixels = IntCol(default = 0)
date_added = DateTimeCol(default=datetime.now)
published = DateTimeCol(default=datetime.now)
modified = DateTimeCol(default=datetime.now)
height = IntCol(default = -1)
width = IntCol(default = -1)
frameAspect = UnicodeCol(default = "1.6", length = 100)
bitrate = IntCol(default = -1)
fps = IntCol(default = -1)
srt = UnicodeCol(default = '')
subtitle_meta_id = IntCol(default = -1)
subtitle = BoolCol(default = False)
part = IntCol(default = 1)
broken = BoolCol(default = False)
extracted = BoolCol(default = False)
def genSHA1(self):
self.sha1sum = oxdb_import.oxdb_sha1sum(self.absolutePath)
def _get_part(self):
part = 1
parts = re.compile('Part (\d)').findall(self.path)
if not parts:
parts = re.compile('CD (\d)').findall(self.path)
if parts:
part = int(parts[-1])
self.part = part
return part
def _get_offset(self):
if not self.part:
self.part = 1
if self.part == 1:
return 0
length = 0
q = ArchiveFile.select(AND(
ArchiveFile.q.oxdb == self.oxdb,
ArchiveFile.q.part < self.part,
ArchiveFile.q.subtitle == False,
))
for f in q:
length += f.length
return length
def _get_ext(self):
return self.path.split('.')[-1]
def _get_preferredVersion(self):
e = self.nameExtra.lower()
for pref in ('directors cut', 'long version'):
if pref in e:
return True
return False
def _get_nameExtra(self):
path = os.path.basename(self.path)
parts = path.replace(self.title, '').split('.')[:-1]
parts = filter(lambda x: not x.startswith('Part'), parts)
parts = filter(lambda x: not x.startswith('Season'), parts)
parts = filter(lambda x: not x.startswith('Episode'), parts)
parts = filter(lambda x: not x.startswith('vo'), parts)
extra = '.'.join(parts)
if extra.startswith('.'):
extra = extra[1:]
return extra
def _get_title(self):
return oxdb_title(self.path)
def _get_director(self):
return oxdb_director(self.path)
def _get_absolutePath(self):
return join(self.archive.basePath, self.path)
def updateMeta(self):
self.findSubtitleLink()
if exists(self.absolutePath):
if not self.broken:
info = midentify.identify(self.absolutePath)
self.length = info['length']
self.width = info['width']
self.frameAspect = "%0.6f" % info['aspect']
self.height = info['height']
self.bitrate = info['video_bitrate']
self.fps = info['fps']
self.audio = info['audio_codec']
self.video = info['video_codec']
self.updatePixels()
self.updateBpp()
self.loadSubtitleFromFile()
self.oxdb = oxdb_id(self.title, self.director)
def _get_frameAspect(self):
aspect = float(self._SO_get_frameAspect())
if self.subtitle:
return 1
if aspect == -1:
if self.height:
aspect = float(self.width) / self.height
else:
aspect = 16.0 / 10
self.frameAspect = "%0.6f" % aspect
return aspect
def _get_sceneWidth(self):
return 128
def _get_sceneHeight(self):
default = 80
if not self.subtitle:
h = int(self.sceneWidth / self.frameAspect)
h = h + h % 2
return h
return default
def _get_movieFile(self):
if self.subtitle and self.subtitle_meta_id>0:
try:
m = ArchiveFile.get(self.subtitle_meta_id)
except:
m = None
self.subtitle_meta_id = -1
self.srt = ''
return m
return None
def _get_subtitleFile(self):
if not self.subtitle and self.subtitle_meta_id>0:
try:
s = ArchiveFile.get(self.subtitle_meta_id)
except:
s = None
self.subtitle_meta_id = -1
self.srt = ''
return s
return None
def findSubtitleLink(self):
subtitle = not self.subtitle
q = ArchiveFile.select(AND(
ArchiveFile.q.oxdb == self.oxdb,
ArchiveFile.q.part == self.part,
ArchiveFile.q.subtitle == subtitle,
))
self.subtitle_meta_id = -1
if q.count():
for f in q:
if not f.path.endswith('.sub'):
if f.nameExtra == self.nameExtra or f.nameExtra == 'en':
self.subtitle_meta_id = f.id
def _get_mini_movie_file_old(self):
return join(cache.cache_root, 'mini', self.md5sum[:4], "%s.avi" % self.md5sum)
def _get_frameFolder_old(self):
f = join(cache.cache_root, 'frame', self.md5sum[:4], self.md5sum)
oxdb_makedir(f)
return f
def _get_stillFolder_old(self):
f = join(cache.cache_root, 'still', self.md5sum[:4], self.md5sum)
oxdb_makedir(f)
return f
def _get_timelineFile_old(self):
return join(cache.cache_root, 'timeline', self.md5sum[:4], "%s.png" % self.md5sum)
def _get_posterStillFile_old(self):
return join(cache.cache_root, 'posterStill', self.md5sum[:4], "%s.png" % self.md5sum)
def _get_cache_root(self):
return join(cache.cache_root, self.sha1sum[:2], self.sha1sum)
def _get_mini_movie_file(self):
return join(self.cache_root, "mini.avi")
def _get_timelineFile(self):
return join(self.cache_root, "timeline.png")
def _get_posterStillFile(self):
return join(self.cache_root, "posterStill.png")
def _get_frameFolder(self):
f = join(self.cache_root, "frames")
oxdb_makedir(f)
return f
def _get_stillFolder(self):
f = join(self.cache_root, "still")
oxdb_makedir(f)
return f
def moveCache(self):
if os.path.exists(self.mini_movie_file_old):
for folder, folders, files in os.walk(self.frameFolder_old):
for f in files:
os.rename(join(self.frameFolder_old, f), join(self.frameFolder, f))
for folder, folders, files in os.walk(self.stillFolder_old):
for f in files:
os.rename(join(self.stillFolder_old, f), join(self.stillFolder, f))
if os.path.exists(self.mini_movie_file_old):
os.rename(self.mini_movie_file_old, self.mini_movie_file)
if os.path.exists(self.timelineFile_old):
os.rename(self.timelineFile_old, self.timelineFile)
if os.path.exists(self.posterStillFile_old):
os.rename(self.posterStillFile_old, self.posterStillFile)
def removeMiniMovie(self):
if exists(self.mini_movie_file):
os.remove(self.mini_movie_file)
def _startPoints(self):
inpoints = []
if self.srt:
d = subtitles.Subtitle(srt=self.srt)
for s in d.values():
inpoints.append(s['in'])
elif self.length:
minutes = int((float(self.length) / 1000) / 60)
minute = 1000 * 60
inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
return inpoints
def _findSubtitleByInPoint(self, inpoint):
if self.srt:
d = subtitles.Subtitle(srt=self.srt)
for s in d.values():
if s['in'] == inpoint:
return s
return None
def extractAll(self, force = False):
if not self.extracted or force:
self.updateMeta()
self.extractClipMovie(force)
self.extractTimeline(force)
#enable this later
#self.extractFrames()
#self.extractClips()
def extractFlvClip(self, inpoint, outpoint=-1, flash_folder=-1):
if flash_folder == -1:
flash_folder = self.frameFolder
movie_file = self.mini_movie_file
position = inpoint.replace(':', '.')
flash_movie = join(self.frameFolder, '%s.%s' % (position, 'flv'))
width = self.sceneWidth
height = self.sceneHeight
inpoint = inpoint.replace('.', ':')
if outpoint == -1:
s = self._findSubtitleByInPoint(inpoint)
if s:
outpoint = s['out']
else:
outpoint = subtitles.shiftTime(5000, inpoint)
else:
outpoint = outpoint.replace('.', ':')
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
#extract_flash_ng(self.absolutePath, flash_movie, inpoint, outpoint, width, height, offset)
def extractOggClip(self, inpoint, outpoint=-1, clip_folder=-1):
if clip_folder == -1:
clip_folder = self.frameFolder
movie_file = self.mini_movie_file
position = inpoint.replace(':', '.')
clip_movie = join(self.frameFolder, '%s.%s' % (position, 'ogv'))
width = self.sceneWidth
height = self.sceneHeight
inpoint = inpoint.replace('.', ':')
if outpoint == -1:
s = self._findSubtitleByInPoint(inpoint)
if s:
outpoint = s['out']
else:
outpoint = subtitles.shiftTime(5000, inpoint)
else:
outpoint = outpoint.replace('.', ':')
extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
def extractClips(self, img_folder=cache.frame_cache_root):
for p in self._startPoints():
self.extractFlvClip(p)
def extractFrame(self, position, img_folder=-1):
if img_folder == -1:
img_folder = self.frameFolder
if self.movieFile:
return self.movieFile.extractFrame(position, img_folder)
movie_file = self.mini_movie_file
oxdb_makedir(img_folder)
position = position.replace('.', ':')
extract_frame(movie_file, position, img_folder, offset = 0, redo = False)
def extractFrames(self, img_folder=cache.frame_cache_root):
for p in self._startPoints():
self.frame(p)
def extractPosterStill(self, position):
oxdb_makedir(dirname(self.posterStillFile))
mFile = self.absolutePath
if os.path.splitext(mFile)[-1] in ('.mov', '.mpg', '.mpeg'):
mFile = self.mini_movie_file
extract_poster_still(mFile, self.posterStillFile, position)
def extractClipMovie(self, force = False):
if self.broken:
return
#do not try to extract non movie files, or files that could not be identified
if not self.height or self.path.split('.')[-1] in ('mp3', 'wav', 'srt', 'sub', 'idx', 'rar','jpg', 'png'):
self.extracted = True
return
mini_movie_file = self.mini_movie_file
movie_file = self.absolutePath
if not movie_file or not exists(movie_file):
return
if not exists(mini_movie_file):
self.extracted = False
if self.extracted and not force:
debug("clip exists, skipping extraction %s" % mini_movie_file)
return
self.extracted = False
oxdb_makedir(dirname(mini_movie_file))
options = ''
options += " -ovc lavc -lavcopts vcodec=mjpeg"
options += " -af volnorm=1 -oac mp3lame -lameopts br=64:mode=3 -af resample=44100"
options += " -vf scale -zoom -xy %s" % self.sceneWidth
options += ' "%s"' % movie_file.replace('"', '\\"')
options += ' -o "%s"' % mini_movie_file
cmd = "mencoder %s >/dev/null 2>&1" % options
debug(cmd.encode('utf-8'))
r = os.system(cmd.encode('utf-8'))
if r == 0:
self.extracted = True
def extractOggMovie(self, force = False):
if self.broken:
return
if not self.height:
#only if midentify works we should try to extract the clip
return
if self.path.split('.')[-1] in ('mp3', 'wav', 'srt', 'sub', 'idx', 'rar','jpg', 'png'):
#ignore files known to not be
return
oggFile = self.mini_movie_file.replace('.avi', '.ogg')
movieFile = self.absolutePath
if not movieFile or not exists(movieFile):
return
if exists(oggFile) and not force:
debug("clip exists, skipping extraction %s" % oggFile)
return
self.extractedOgg = False
oxdb_makedir(dirname(oggFile))
options = ''
options += " --no-skeleton -K 16 -V 180 -a -1 -H 44100 -S 1 --speedlevel 0 -c 2 "
options += " -x %s -y %s" % (self.sceneWidth, self.sceneHeight)
options += ' "%s"' % movieFile.replace('"', '\\"')
options += ' -o "%s"' % oggFile
cmd = "ffmpeg2theora %s >/dev/null 2>&1" % options
r = os.system(cmd.encode('utf-8'))
if r == 0:
self.extractedOgg = True
def extractH264Movie(self, force = False):
if self.broken:
return
if not self.height:
#only if midentify works we should try to extract the clip
return
if self.path.split('.')[-1] in ('mp3', 'wav', 'srt', 'sub', 'idx', 'rar','jpg', 'png'):
#ignore files known to not be
return
h264File = self.mini_movie_file.replace('.avi', '.mp4')
h264FileTmp = h264File + ".tmp.mp4"
movieFile = self.absolutePath
if not movieFile or not exists(movieFile):
return
if exists(h264File) and not force:
debug("clip exists, skipping extraction %s" % h264File)
return
self.extractedH264 = False
height = self.sceneHeight
oxdb_makedir(dirname(h264File))
options = ''
options += " -vcodec libx264 -b 112k -bf 3 -subq 6 -cmp 256 -refs 5 -qmin 10 "
options += " -qmax 51 -qdiff 4 -coder 1 -loop 1 -me hex -me_range 16 -trellis 1 "
options += " -flags +mv4 -flags2 +bpyramid+wpred+mixed_refs+brdo+8x8dct "
options += " -partitions parti4x4+parti8x8+partp4x4+partp8x8+partb8x8 -g 250 "
options += " -keyint_min 16 -sc_threshold 40 -i_qfactor 0.71"
options += " -s %sx%s" % (self.sceneWidth, height)
cmd = '''ffmpeg -y -i "%s" -an -pass 1 -threads 2 %s "%s" >/dev/null 2>&1''' % (movieFile, options, h264FileTmp)
print cmd
r = os.system(cmd.encode('utf-8'))
cmd = '''ffmpeg -y -i "%s" -acodec libfaac -ac 2 -ar 44100 -ab 48k -pass 2 -threads 2 %s "%s" >/dev/null 2>&1''' % (movieFile, options, h264FileTmp)
r = os.system(cmd.encode('utf-8'))
cmd = '''qt-faststart "%s" "%s" >/dev/null 2>&1 && rm "%s"''' % (h264FileTmp, h264File, h264FileTmp)
r = os.system(cmd.encode('utf-8'))
if r == 0:
self.extractedH264 = True
def removeTimeline(self):
if exists(self.timelineFile):
os.unlink(self.timelineFile)
def extractTimeline(self, force = False):
if self.broken:
return
if force:
self.removeTimeline()
#return if its not a video
if self.height <= 0:
return
if not (self.extracted and exists(self.mini_movie_file)):
debug("mini movie missing, skipping %s" % self.path)
return
t = self.timelineFile
if exists(t):
debug("timeline exists , %s" % self.path)
return
oxdb_makedir(dirname(t))
#this fails in tg-admin shell
extractTimelineScript = abspath(join(dirname(cache.cache_root), "tools/extract_timeline.py"))
cmd = "python %s %s %s" %(extractTimelineScript, t, self.mini_movie_file)
os.system(cmd)
def loadSubtitleFromFile(self):
if self.movieFile:
movieFile = self.movieFile
subtitle = self
else:
movieFile = self
subtitle = self.subtitleFile
if movieFile:
movieFile.srt = ''
if subtitle and movieFile:
if not subtitle.absolutePath or not exists(subtitle.absolutePath):
return
if not subtitle.absolutePath.endswith('.srt'):
debug("this is not a subtitle %s" % subtitle.absolutePath)
return
movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
def _set_fps(self, fps):
fps = int(fps * 10000)
self._SO_set_fps(fps)
def _get_fps(self):
fps = self._SO_get_fps()
if fps:
return float(fps) / 10000
return 0.0
def _get_resolution(self):
if self.subtitle or (not self.width or not self.height):
return u''
return "%sx%s" % (self.width, self.height)
def updateBpp(self):
if self.height and self.width and self.fps and self.bitrate:
bpp = float(self.height * self.width * self.fps / self.bitrate)
self.bpp = bpp
else:
bpp = -1
return bpp
def updatePixels(self):
if self.length and self.fps and self.width and self.height:
pixels = int((self.length / 1000) * self.fps * self.width * self.height)
self.pixels = pixels
else:
pixels = 0
return pixels
def _get_pixels(self):
pixels = self._SO_get_pixels()
if not pixels:
pixels = self.updatePixels()
return pixels
def flvClip(self, position):
return cache.loadFlvClip(self, position)
def oggClip(self, position):
return cache.loadOggClip(self, position)
def frame(self, position):
return cache.loadFrame(self, position)
def timeline(self):
return cache.loadTimeline(self)
def posterStill(self, position):
return cache.loadPosterStill(self, position)
def still(self, position):
still = os.path.join(self.stillFolder, "%s.%s" % (position.replace(':', '.'), 'jpg'))
if not exists(still):
oxdb_makedir(dirname(still))
movieFile = self.absolutePath
extract_frame(movieFile, position, self.stillFolder, -1)
if exists(still):
return cache.loadFile(still)
return ''