oxdbarchive/oxdbarchive/model.py

754 lines
23 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import gzip
import StringIO
from glob import glob
import os
from sqlobject import *
from turbogears.database import PackageHub
import turbogears
import re
from urllib import quote, quote_plus
import urllib2
import os
from os.path import abspath, join, dirname, exists
from datetime import datetime
import time
import math
from glob import glob
import shutil
import socket
import simplejson
from oxlib.net import getUrl
import oxlib
import subtitles
import cache
import oxdb_import
from oxdb_utils import oxdb_title, oxdb_director, oxdb_id, oxdb_makedir
from extract import *
import midentify
def debug(string):
#print string
return
hub = PackageHub('oxdbarchive')
__connection__ = hub
class Archive(SQLObject):
name = UnicodeCol(length=255, alternateID=True)
basePath = UnicodeCol()
baseUrlFrontend = UnicodeCol(default = '')
published = DateTimeCol(default=datetime.now)
modified = DateTimeCol(default=datetime.now)
def _get_basePath(self):
basePath = self._SO_get_basePath()
if not basePath.endswith('/'):
basePath = basePath + "/"
self.basePath = basePath
return basePath
def findNew(self):
#only update archive if not modified for more than 2 hours
if archive.isOnline():
if archive.sinceLastModification() > 60*60*2:
archive.importFiles()
else:
print "ignoring %s, was changed withing last 2 hours" % archive.basePath
else:
print "WARNING %s, is not online, check power and disk" % archive.basePath
def isOnline(self):
r = os.system('find "%s" >/dev/null 2>&1' % self.basePath)
if r:
return False
r = glob("%s*" % self.basePath)
if not r:
return False
return True
def sinceLastModification(self):
return time.time() - max([os.stat(f).st_mtime for f in glob('%s/*'% self.basePath)])
def notifyFrontend(self, action, md5sum):
if self.baseUrlFrontend:
dto = socket.getdefaulttimeout()
socket.setdefaulttimeout(256)
url = "%s/%s?md5sum=%s" % (self.baseUrlFrontend, action, md5sum)
#print url
try:
result = getUrl(url)
except urllib2.HTTPError, e:
print "ERROR:", e.code
data = e.read()
if e.headers.get('content-encoding', None) == 'gzip':
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
print data
raise
except:
import traceback
traceback.print_exc()
raise
#print "Frontend:", result['result']
socket.setdefaulttimeout(dto)
def _get_files(self):
q = ArchiveFile.select(ArchiveFile.q.archiveID == self.id)
return [f for f in q]
def _get_file_list(self):
files = {}
for f in self.files:
try:
d = dict(md5sum = f.md5sum, size = f.size)
files[f.path] = d
except SQLObjectNotFound:
f.destroySelf()
return files
def addLocalFile(self, fname, movie = None):
params = oxdb_import.oxdb_file_stats(fname)
params = oxdb_import.oxdb_file_metadata(params)
params['date'] = datetime.fromtimestamp(params['date'])
return self.addFile(params, movie)
def addFile(self, params, movie = None):
'''
updates or adds new file to database,
params is a dict with at least md5sum, path, date but also needs
audio, video, length, size, bpp for new files
'''
params['path'] = params['path'].replace(self.basePath, u'')
q = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.md5sum == params['md5sum'],
))
if q.count() > 0:
'''update existing entry'''
f = q[0]
#FIXME: deal with double files here. right now they are changed
if f.path != params['path']:
ret = "this file is already in the database, first time at:\n\t%s\n\t" % f.path
else:
ret = "updated entry"
for field in ('path', 'date'):
setattr(f, field, params[field])
else:
#just a new md5? happens for srt files quite often
qq = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.path == params['path'].encode('utf-8'),
))
f = None
if qq.count() == 1:
f = qq[0]
ret = "updated entry"
else:
''' add new file to database '''
title = oxdb_title(params['path'])
director = oxdb_director(params['path'])
oxdb = oxdb_id(title, director)
f = ArchiveFile(
archive = self,
path = params['path'],
date = params['date'],
oxdb = oxdb,
md5sum = params['md5sum'],
audio = params['audio'],
video = params['video'],
length = params['length'],
size = params['size'],
bpp = params['bpp'],
date_added = datetime.now(),
subtitle = params['path'].endswith('.srt'),
)
ret = "added entry"
f.updateMeta()
f.extractAll()
f.modified = datetime.now()
return ret.encode('utf-8')
def removeFile(self, md5sum):
'''
remove file based on md5sum from archive
'''
q = ArchiveFile.select(AND(
ArchiveFile.q.archiveID == self.id,
ArchiveFile.q.md5sum == md5sum,
))
if q.count() == 1:
for i in q:
ArchiveFile.delete(i.id)
self.notifyFrontend('remove', md5sum)
return dict(result="file removed")
return dict(result="not in archive")
def importFiles(self):
stats = {'skipped': 0, 'added': 0, 'remove':0}
print self.basePath
files = oxdb_import.oxdb_spider(self.basePath)
oxdb_files = self.file_list
md5sum_on_disk = []
for f in files:
meta = oxdb_import.oxdb_file_stats(f)
f = f.replace(self.basePath, '')
if f in oxdb_files and oxdb_files[f]['size'] == meta['size']:
stats['skipped'] += 1
md5sum_on_disk.append(oxdb_files[f]['md5sum'])
else:
meta = oxdb_import.oxdb_file_metadata(meta)
#FIXME: check input
for key in ('bpp', 'size', 'length', 'date'):
meta[key] = int(float(meta[key]))
meta['date'] = datetime.fromtimestamp(meta['date'])
print self.addFile(meta), f.encode('utf-8')
stats['added'] += 1
md5sum_on_disk.append(meta['md5sum'])
self.notifyFrontend('add', meta['md5sum'])
for f in oxdb_files:
if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
print "remove", f.encode('utf-8')
self.removeFile(oxdb_files[f]['md5sum'])
stats['remove'] += 1
print stats
print "updating information on frontend"
self.updateFrontend()
return stats
'''
Interaction with frontend,
- update send modified files since last sync.
- bootstrap send all files
- sync get list from frontend and remove/add those that are not in sync
'''
def updateFrontend(self):
for f in ArchiveFile.select(ArchiveFile.q.published >= self.published):
self.notifyFrontend('add', f.md5sum)
self.published=datetime.now()
def bootstrapFrontend(self):
for f in self.files:
self.notifyFrontend('add', f.md5sum)
def syncFrontend(self, addOnly=False):
dto = socket.getdefaulttimeout()
socket.setdefaulttimeout(256)
data = getUrl("%s/list" % self.baseUrlFrontend)
md5sums = simplejson.loads(data)['files']
socket.setdefaulttimeout(dto)
for md5sum in md5sums:
try:
f = ArchiveFile.byMd5sum(md5sum)
except SQLObjectNotFound:
self.notifyFrontend('remove', md5sum)
if not addOnly:
for f in ArchiveFile.select(NOT(IN(ArchiveFile.q.md5sum, md5sums))):
self.notifyFrontend('add', f.md5sum)
class ArchiveFile(SQLObject):
'''
ALTER TABLE archive_file CHANGE size size bigint;
ALTER TABLE archive_file CHANGE pixels pixels bigint;
ALTER TABLE archive_file CHANGE srt srt LONGTEXT;
'''
md5sum = UnicodeCol(length=128, alternateID=True)
oxdb = UnicodeCol(length=128)
path = UnicodeCol()
date = DateTimeCol()
archive = ForeignKey('Archive')
audio = UnicodeCol()
video = UnicodeCol()
length = IntCol()
size = IntCol()
bpp = FloatCol(default = -1)
pixels = IntCol(default = 0)
date_added = DateTimeCol(default=datetime.now)
published = DateTimeCol(default=datetime.now)
modified = DateTimeCol(default=datetime.now)
height = IntCol(default = -1)
width = IntCol(default = -1)
frameAspect = UnicodeCol(default = "1.6", length = 100)
bitrate = IntCol(default = -1)
fps = IntCol(default = -1)
srt = UnicodeCol(default = '')
subtitle_meta_id = IntCol(default = -1)
subtitle = BoolCol(default = False)
part = IntCol(default = 1)
broken = BoolCol(default = False)
extracted = BoolCol(default = False)
def _get_part(self):
part = 1
parts = re.compile('Part (\d)').findall(self.path)
if not parts:
parts = re.compile('CD (\d)').findall(self.path)
if parts:
part = int(parts[-1])
self.part = part
return part
def _get_offset(self):
if not self.part:
self.part = 1
if self.part == 1:
return 0
length = 0
q = ArchiveFile.select(AND(
ArchiveFile.q.oxdb == self.oxdb,
ArchiveFile.q.part < self.part,
ArchiveFile.q.subtitle == False,
))
for f in q:
length += f.length
return length
def _get_ext(self):
return self.path.split('.')[-1]
def _get_preferredVersion(self):
e = self.nameExtra.lower()
for pref in ('directors cut', 'long version'):
if pref in e:
return True
return False
def _get_nameExtra(self):
path = os.path.basename(self.path)
parts = path.replace(self.title, '').split('.')[:-1]
parts = filter(lambda x: not x.startswith('Part'), parts)
parts = filter(lambda x: not x.startswith('Season'), parts)
parts = filter(lambda x: not x.startswith('Episode'), parts)
parts = filter(lambda x: not x.startswith('vo'), parts)
extra = '.'.join(parts)
if extra.startswith('.'):
extra = extra[1:]
return extra
def _get_title(self):
return oxdb_title(self.path)
def _get_director(self):
return oxdb_director(self.path)
def _get_absolutePath(self):
return join(self.archive.basePath, self.path)
def updateMeta(self):
self.findSubtitleLink()
if exists(self.absolutePath):
if not self.broken:
info = midentify.identify(self.absolutePath)
self.length = info['length']
self.width = info['width']
self.frameAspect = "%0.6f" % info['aspect']
self.height = info['height']
self.bitrate = info['video_bitrate']
self.fps = info['fps']
self.audio = info['audio_codec']
self.video = info['video_codec']
self.updatePixels()
self.updateBpp()
self.loadSubtitleFromFile()
self.oxdb = oxdb_id(self.title, self.director)
def _get_frameAspect(self):
aspect = float(self._SO_get_frameAspect())
if self.subtitle:
return 1
if aspect == -1:
if self.height:
aspect = float(self.width) / self.height
else:
aspect = 16.0 / 10
self.frameAspect = "%0.6f" % aspect
return aspect
def _get_sceneWidth(self):
return 128
def _get_sceneHeight(self):
default = 80
if not self.subtitle:
h = int(self.sceneWidth / self.frameAspect)
h = h + h % 2
return h
return default
def _get_movieFile(self):
if self.subtitle and self.subtitle_meta_id>0:
try:
m = ArchiveFile.get(self.subtitle_meta_id)
except:
m = None
self.subtitle_meta_id = -1
self.srt = ''
return m
return None
def _get_subtitleFile(self):
if not self.subtitle and self.subtitle_meta_id>0:
try:
s = ArchiveFile.get(self.subtitle_meta_id)
except:
s = None
self.subtitle_meta_id = -1
self.srt = ''
return s
return None
def findSubtitleLink(self):
subtitle = not self.subtitle
q = ArchiveFile.select(AND(
ArchiveFile.q.oxdb == self.oxdb,
ArchiveFile.q.part == self.part,
ArchiveFile.q.subtitle == subtitle,
))
self.subtitle_meta_id = -1
if q.count():
for f in q:
if not f.path.endswith('.sub'):
if f.nameExtra == self.nameExtra or f.nameExtra == 'en':
self.subtitle_meta_id = f.id
def _get_mini_movie_file(self):
return join(cache.cache_root, 'mini', self.md5sum[:4], "%s.avi" % self.md5sum)
def _get_frameFolder(self):
f = join(cache.cache_root, 'frame', self.md5sum[:4], self.md5sum)
oxdb_makedir(f)
return f
def _get_timelineFile(self):
return join(cache.cache_root, 'timeline', self.md5sum[:4], "%s.png" % self.md5sum)
def _get_posterStillFile(self):
return join(cache.cache_root, 'posterStill', self.md5sum[:4], "%s.png" % self.md5sum)
def stillFile(self, position):
position = oxlib.getValidFilename(position)
return join(cache.cache_root, 'still', self.md5sum[:4], "%s_%s.png" % (self.md5sum, position))
def removeMiniMovie(self):
if exists(self.mini_movie_file):
os.remove(self.mini_movie_file)
def _startPoints(self):
inpoints = []
if self.srt:
d = subtitles.Subtitle(self.srt)
for s in d:
inpoints.append(s['in'])
elif self.length:
minutes = int((float(self.length) / 1000) / 60)
minute = 1000 * 60
inpoints = [msec2time_str(m*minute) for m in range(0, minutes)]
return inpoints
def _findSubtitleByInPoint(self, inpoint):
if self.srt:
d = subtitles.Subtitle(self.srt)
for s in d:
if s['in'] == inpoint:
return s
return None
def extractAll(self, force = False):
if not self.extracted or force:
self.updateMeta()
self.extractClipMovie(force)
self.extractTimeline(force)
#enable this later
#self.extractFrames()
#self.extractClips()
def extractFlvClip(self, inpoint, outpoint=-1, flash_folder=-1):
if flash_folder == -1:
flash_folder = self.frameFolder
movie_file = self.mini_movie_file
position = inpoint.replace(':', '.')
flash_movie = join(self.frameFolder, '%s.%s' % (position, 'flv'))
width = self.sceneWidth
height = self.sceneHeight
inpoint = inpoint.replace('.', ':')
if outpoint == -1:
s = self._findSubtitleByInPoint(inpoint)
if s:
outpoint = s['out']
else:
outpoint = subtitles.shiftTime(5000, inpoint)
else:
outpoint = outpoint.replace('.', ':')
extract_flash(movie_file, flash_movie, inpoint, outpoint, width, height, offset = 0)
#extract_flash_ng(self.absolutePath, flash_movie, inpoint, outpoint, width, height, offset)
def extractOggClip(self, inpoint, outpoint=-1, clip_folder=-1):
if clip_folder == -1:
clip_folder = self.frameFolder
movie_file = self.mini_movie_file
position = inpoint.replace(':', '.')
clip_movie = join(self.frameFolder, '%s.%s' % (position, 'ogv'))
width = self.sceneWidth
height = self.sceneHeight
inpoint = inpoint.replace('.', ':')
if outpoint == -1:
s = self._findSubtitleByInPoint(inpoint)
if s:
outpoint = s['out']
else:
outpoint = subtitles.shiftTime(5000, inpoint)
else:
outpoint = outpoint.replace('.', ':')
extract_ogg(movie_file, clip_movie, inpoint, outpoint, width, height, offset = 0)
def extractClips(self, img_folder=cache.frame_cache_root):
for p in self._startPoints():
self.extractFlvClip(p)
def extractFrame(self, position, img_folder=-1):
if img_folder == -1:
img_folder = self.frameFolder
if self.movieFile:
return self.movieFile.extractFrame(position, img_folder)
movie_file = self.mini_movie_file
oxdb_makedir(img_folder)
position = position.replace('.', ':')
extract_frame(movie_file, position, img_folder, offset = 0, redo = False)
def extractFrames(self, img_folder=cache.frame_cache_root):
for p in self._startPoints():
self.frame(p)
def extractPosterStill(self, position):
oxdb_makedir(dirname(self.posterStillFile))
mFile = self.absolutePath
if os.path.splitext(mFile)[-1] in ('.mov', '.mpg', '.mpeg'):
mFile = self.mini_movie_file
extract_poster_still(mFile, self.posterStillFile, position)
def extractClipMovie(self, force = False):
if self.broken:
return
#do not try to extract non movie files, or files that could not be identified
if not self.height or self.path.split('.')[-1] in ('mp3', 'wav', 'srt', 'sub', 'idx', 'rar','jpg', 'png'):
self.extracted = True
return
mini_movie_file = self.mini_movie_file
movie_file = self.absolutePath
if not movie_file or not exists(movie_file):
return
if not exists(mini_movie_file):
self.extracted = False
if self.extracted and not force:
debug("clip exists, skipping extraction %s" % mini_movie_file)
return
self.extracted = False
oxdb_makedir(dirname(mini_movie_file))
options = ''
options += " -ovc lavc -lavcopts vcodec=mjpeg"
options += " -af volnorm=1 -oac mp3lame -lameopts br=64:mode=3 -af resample=44100"
options += " -vf scale -zoom -xy %s" % self.sceneWidth
options += ' "%s"' % movie_file.replace('"', '\\"')
options += ' -o "%s"' % mini_movie_file
cmd = "mencoder %s >/dev/null 2>&1" % options
debug(cmd.encode('utf-8'))
r = os.system(cmd.encode('utf-8'))
if r == 0:
self.extracted = True
def extractOggMovie(self, force = False):
if self.broken:
return
if not self.height:
#only if midentify works we should try to extract the clip
return
if self.path.split('.')[-1] in ('mp3', 'wav', 'srt', 'sub', 'idx', 'rar','jpg', 'png'):
#ignore files known to not be
return
oggFile = self.mini_movie_file.replace('.avi', '.ogg')
movieFile = self.absolutePath
if not movieFile or not exists(movieFile):
return
if exists(oggFile) and not force:
debug("clip exists, skipping extraction %s" % oggFile)
return
self.extractedOgg = False
oxdb_makedir(dirname(oggFile))
options = ''
options += " --no-skeleton -K 16 -V 180 -a -1 -H 44100 -S 1 --speedlevel 0 -c 2 "
options += " -x %s -y %s" % (self.sceneWidth, self.sceneHeight)
options += ' "%s"' % movieFile.replace('"', '\\"')
options += ' -o "%s"' % oggFile
cmd = "ffmpeg2theora %s >/dev/null 2>&1" % options
r = os.system(cmd.encode('utf-8'))
if r == 0:
self.extractedOgg = True
def extractH264Movie(self, force = False):
if self.broken:
return
if not self.height:
#only if midentify works we should try to extract the clip
return
if self.path.split('.')[-1] in ('mp3', 'wav', 'srt', 'sub', 'idx', 'rar','jpg', 'png'):
#ignore files known to not be
return
h264File = self.mini_movie_file.replace('.avi', '.mp4')
h264FileTmp = h264File + ".tmp.mp4"
movieFile = self.absolutePath
if not movieFile or not exists(movieFile):
return
if exists(h264File) and not force:
debug("clip exists, skipping extraction %s" % h264File)
return
self.extractedH264 = False
height = self.sceneHeight
oxdb_makedir(dirname(h264File))
options = ''
options += " -vcodec libx264 -b 112k -bf 3 -subq 6 -cmp 256 -refs 5 -qmin 10 "
options += " -qmax 51 -qdiff 4 -coder 1 -loop 1 -me hex -me_range 16 -trellis 1 "
options += " -flags +mv4 -flags2 +bpyramid+wpred+mixed_refs+brdo+8x8dct "
options += " -partitions parti4x4+parti8x8+partp4x4+partp8x8+partb8x8 -g 250 "
options += " -keyint_min 16 -sc_threshold 40 -i_qfactor 0.71"
options += " -s %sx%s" % (self.sceneWidth, height)
cmd = '''ffmpeg -y -i "%s" -an -pass 1 -threads 2 %s "%s" >/dev/null 2>&1''' % (movieFile, options, h264FileTmp)
print cmd
r = os.system(cmd.encode('utf-8'))
cmd = '''ffmpeg -y -i "%s" -acodec libfaac -ac 2 -ar 44100 -ab 48k -pass 2 -threads 2 %s "%s" >/dev/null 2>&1''' % (movieFile, options, h264FileTmp)
r = os.system(cmd.encode('utf-8'))
cmd = '''qt-faststart "%s" "%s" >/dev/null 2>&1 && rm "%s"''' % (h264FileTmp, h264File, h264FileTmp)
r = os.system(cmd.encode('utf-8'))
if r == 0:
self.extractedH264 = True
def removeTimeline(self):
if exists(self.timelineFile):
os.unlink(self.timelineFile)
def extractTimeline(self, force = False):
if self.broken:
return
if force:
self.removeTimeline()
#return if its not a video
if self.height <= 0:
return
if not (self.extracted and exists(self.mini_movie_file)):
debug("mini movie missing, skipping %s" % self.path)
return
t = self.timelineFile
if exists(t):
debug("timeline exists , %s" % self.path)
return
oxdb_makedir(dirname(t))
#this fails in tg-admin shell
extractTimelineScript = abspath(join(dirname(cache.cache_root), "tools/extract_timeline.py"))
cmd = "python %s %s %s" %(extractTimelineScript, t, self.mini_movie_file)
os.system(cmd)
def loadSubtitleFromFile(self):
if self.movieFile:
movieFile = self.movieFile
subtitle = self
else:
movieFile = self
subtitle = self.subtitleFile
if movieFile:
movieFile.srt = ''
if subtitle and movieFile:
if not subtitle.absolutePath or not exists(subtitle.absolutePath):
return
if not subtitle.absolutePath.endswith('.srt'):
debug("this is not a subtitle %s" % subtitle.absolutePath)
return
movieFile.srt = subtitles.Subtitle(subtitle.absolutePath).toSrt()
def _set_fps(self, fps):
fps = int(fps * 10000)
self._SO_set_fps(fps)
def _get_fps(self):
fps = self._SO_get_fps()
if fps:
return float(fps) / 10000
return 0.0
def _get_resolution(self):
if self.subtitle or (not self.width or not self.height):
return u''
return "%sx%s" % (self.width, self.height)
def updateBpp(self):
if self.height and self.width and self.fps and self.bitrate:
bpp = float(self.height * self.width * self.fps / self.bitrate)
self.bpp = bpp
else:
bpp = -1
return bpp
def updatePixels(self):
if self.length and self.fps and self.width and self.height:
pixels = int((self.length / 1000) * self.fps * self.width * self.height)
self.pixels = pixels
else:
pixels = 0
return pixels
def _get_pixels(self):
pixels = self._SO_get_pixels()
if not pixels:
pixels = self.updatePixels()
return pixels
def flvClip(self, position):
return cache.loadFlvClip(self, position)
def oggClip(self, position):
return cache.loadOggClip(self, position)
def frame(self, position):
return cache.loadFrame(self, position)
def timeline(self):
return cache.loadTimeline(self)
def posterStill(self, position):
return cache.loadPosterStill(self, position)
def still(self, position):
still = self.stillFile(position)
if not exists(still):
oxdb_makedir(dirname(still))
movieFile = self.absolutePath
if os.path.splitext(movieFile)[-1] in ('.mov', '.mpg', '.mpeg'):
movieFile = self.mini_movie_file
extract_still(movieFile, still, position)
if exists(still):
return cache.loadFile(still)
return ''