oxdbarchive/oxdbarchive/oxdb_import.py

134 lines
3.2 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
# OXDb Import client, crawls the filesystem and gathers information about
# movies
#
import md5
import os
import sys
import re
import urllib
import time
import simplejson
from scrapeit.utils import read_url
class OXDb:
def __init__(self, archive):
self.archive = archive
def getBase(self):
self.basePath = self.ar
self.basePath = self._remote_call('base')['base']
if not self.basePath.endswith('/'):
self.basePath = self.basePath + '/'
return self.basePath
def files(self):
return self._remote_call('list')['files']
def addFile(self, params):
return self._remote_call('add', params)['result']
def removeFile(self, params):
return self._remote_call('remove', params)['result']
def oxdb_md5sum(fname):
'''Returns an md5 hash for file'''
md5sum = None
if os.path.exists(fname):
f = file(fname, 'rb')
m = md5.new()
while True:
d = f.read(8096)
if not d:
break
m.update(d)
md5sum = m.hexdigest()
f.close()
return md5sum
_oxdb_extensions = (
'.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb',
'.mp3', '.wav',
'.srt', '.sub', '.idx', '.rar',
'.jpg', '.png',
)
def _oxdb_file_blacklist(f):
if f.startswith('.'):
return True
return False
def oxdb_spider(archive_base):
oxdb_files = []
for root, dirs, files in os.walk(archive_base):
for d in dirs:
oxdb_files.extend(oxdb_spider(os.path.join(root, d)))
for f in files:
if not _oxdb_file_blacklist(f):
if os.path.splitext(f)[1] in _oxdb_extensions:
oxdb_files.append(os.path.join(root, f))
return oxdb_files
_known_oxdb_extensions = ['Interview']
_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
def _in_known_oxdb_extensions(term):
'''
used to remove parts that are known to not be part of the title
'''
if term in _known_oxdb_extensions:
return True
for reg in _known_oxdb_extensions_reg:
if re.compile(reg, re.IGNORECASE).findall(term):
return True
return False
def oxdb_title(title):
'''
normalize filename to get movie title
'''
title = title.split('.')[:-1]
while len(title) > 1 and ( \
_in_known_oxdb_extensions(title[-1]) or \
title[-1].startswith('Part ') or \
len(title[-1]) == 2 or \
len(title[-1]) == 4):
title = title[:-1]
title = ".".join(title)
return title
def oxdb_file_stats(fname):
stat = os.stat(fname)
size = stat.st_size
date = stat.st_mtime
return {
'path': fname,
'size': size,
'date': date,
}
def oxdb_file_metadata(meta):
'''
check if file is in db, add otherwise
'''
meta['video'] = ''
meta['audio'] = ''
meta['length'] = 0
meta['bpp'] = 0
meta['md5sum'] = oxdb_md5sum(meta['path'])
#FIXME: use midentifiy or other to get more info about file
return meta
title = oxdb_title(os.path.basename(meta['path']))
director = os.path.basename(os.path.dirname(meta['path']))
print '"%s"' % title, ' by', director
#imdb = oxdb_backend.byMD5Sum(md5sum)