oxdbarchive/oxdbarchive/oxdb_import.py

175 lines
4.5 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- Mode: Python; -*-
# vi:si:et:sw=2:sts=2:ts=2
# OXDb Import client, crawls the filesystem and gathers information about
# movies
#
import md5
import os
import sys
import re
import urllib
import time
import simplejson
from scrapeit.utils import read_url
class OXDb:
def __init__(self, archive):
self.archive = archive
def getBase(self):
self.basePath = self.ar
self.basePath = self._remote_call('base')['base']
if not self.basePath.endswith('/'):
self.basePath = self.basePath + '/'
return self.basePath
def files(self):
return self._remote_call('list')['files']
def addFile(self, params):
return self._remote_call('add', params)['result']
def removeFile(self, params):
return self._remote_call('remove', params)['result']
def oxdb_md5sum(fname):
'''Returns an md5 hash for file'''
msum = None
if os.path.exists(fname):
f = file(fname, 'rb')
m = md5.new()
while True:
d = f.read(8096)
if not d:
break
m.update(d)
md5sum = m.hexdigest()
f.close()
return md5sum
_oxdb_extensions = (
'.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb',
'.mp3', '.wav',
'.srt', '.sub', '.idx', '.rar',
'.jpg', '.png',
)
def _oxdb_file_blacklist(f):
if f.startswith('.'):
return True
return False
def oxdb_spider(archive_base):
oxdb_files = []
for root, dirs, files in os.walk(archive_base):
for d in dirs:
oxdb_files.extend(oxdb_spider(os.path.join(root, d)))
for f in files:
if not _oxdb_file_blacklist(f):
if os.path.splitext(f)[1] in _oxdb_extensions:
oxdb_files.append(os.path.join(root, f))
return oxdb_files
_known_oxdb_extensions = ['Interview']
_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
def _in_known_oxdb_extensions(term):
'''
used to remove parts that are known to not be part of the title
'''
if term in _known_oxdb_extensions:
return True
for reg in _known_oxdb_extensions_reg:
if re.compile(reg, re.IGNORECASE).findall(term):
return True
return False
def oxdb_title(title):
'''
normalize filename to get movie title
'''
title = title.split('.')[:-1]
while len(title) > 1 and ( \
_in_known_oxdb_extensions(title[-1]) or \
title[-1].startswith('Part ') or \
len(title[-1]) == 2 or \
len(title[-1]) == 4):
title = title[:-1]
title = ".".join(title)
return title
def oxdb_import_files(archive):
stats = {'skipped': 0, 'added': 0, 'remove':0}
oxdb_backend = OXDb()
base = oxdb_backend.getBase()
print base
files = oxdb_spider(base)
oxdb_files = oxdb_backend.files()
md5sum_on_disk = []
for f in files:
meta = oxdb_file_stats(f)
f = f.replace(base, '')
if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
stats['skipped'] += 1
md5sum_on_disk.append(oxdb_files[f]['md5sum'])
else:
meta = oxdb_file_metadata(meta)
#remove base
meta['path'] = f.encode('utf-8')
#ignore files changed in the last 5 minutes
if time.time() - meta['date'] > 300:
print oxdb_backend.addFile(meta), f
stats['added'] += 1
else:
print "to hot, skipping for now", f
md5sum_on_disk.append(meta['md5sum'])
for f in oxdb_files:
if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
print "remove", f
oxdb_backend.removeFile({'md5sum':oxdb_files[f]['md5sum']})
stats['remove'] += 1
print stats
return stats
def oxdb_file_stats(fname):
stat = os.stat(fname)
size = stat.st_size
date = stat.st_mtime
return {
'path': fname,
'size': size,
'date': date,
}
def oxdb_file_metadata(meta):
'''
check if file is in db, add otherwise
'''
meta['video'] = ''
meta['audio'] = ''
meta['length'] = 0
meta['bpp'] = 0
meta['md5sum'] = oxdb_md5sum(meta['path'])
#FIXME: use midentifiy or other to get more info about file
return meta
title = oxdb_title(os.path.basename(meta['path']))
director = os.path.basename(os.path.dirname(meta['path']))
print '"%s"' % title, ' by', director
#imdb = oxdb_backend.byMD5Sum(md5sum)
# if invoked on command line, print md5 hashes of specified files.
if __name__ == '__main__':
#for fname in sys.argv[1:]:
# print oxdb_md5sum(fname), fname
oxdb_import_files()