175 lines
4.5 KiB
Python
175 lines
4.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# -*- Mode: Python; -*-
|
|
# vi:si:et:sw=2:sts=2:ts=2
|
|
# OXDb Import client, crawls the filesystem and gathers information about
|
|
# movies
|
|
#
|
|
|
|
import md5
|
|
import os
|
|
import sys
|
|
import re
|
|
import urllib
|
|
import time
|
|
|
|
import simplejson
|
|
|
|
from scrapeit.utils import read_url
|
|
|
|
|
|
class OXDb:
|
|
def __init__(self, archive):
|
|
self.archive = archive
|
|
|
|
def getBase(self):
|
|
self.basePath = self.ar
|
|
self.basePath = self._remote_call('base')['base']
|
|
if not self.basePath.endswith('/'):
|
|
self.basePath = self.basePath + '/'
|
|
|
|
return self.basePath
|
|
|
|
def files(self):
|
|
return self._remote_call('list')['files']
|
|
|
|
def addFile(self, params):
|
|
return self._remote_call('add', params)['result']
|
|
|
|
def removeFile(self, params):
|
|
return self._remote_call('remove', params)['result']
|
|
|
|
def oxdb_md5sum(fname):
|
|
'''Returns an md5 hash for file'''
|
|
msum = None
|
|
if os.path.exists(fname):
|
|
f = file(fname, 'rb')
|
|
m = md5.new()
|
|
while True:
|
|
d = f.read(8096)
|
|
if not d:
|
|
break
|
|
m.update(d)
|
|
md5sum = m.hexdigest()
|
|
f.close()
|
|
return md5sum
|
|
|
|
_oxdb_extensions = (
|
|
'.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb',
|
|
'.mp3', '.wav',
|
|
'.srt', '.sub', '.idx', '.rar',
|
|
'.jpg', '.png',
|
|
)
|
|
|
|
def _oxdb_file_blacklist(f):
|
|
if f.startswith('.'):
|
|
return True
|
|
return False
|
|
|
|
def oxdb_spider(archive_base):
|
|
oxdb_files = []
|
|
for root, dirs, files in os.walk(archive_base):
|
|
for d in dirs:
|
|
oxdb_files.extend(oxdb_spider(os.path.join(root, d)))
|
|
for f in files:
|
|
if not _oxdb_file_blacklist(f):
|
|
if os.path.splitext(f)[1] in _oxdb_extensions:
|
|
oxdb_files.append(os.path.join(root, f))
|
|
return oxdb_files
|
|
|
|
_known_oxdb_extensions = ['Interview']
|
|
_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
|
|
|
|
def _in_known_oxdb_extensions(term):
|
|
'''
|
|
used to remove parts that are known to not be part of the title
|
|
'''
|
|
if term in _known_oxdb_extensions:
|
|
return True
|
|
for reg in _known_oxdb_extensions_reg:
|
|
if re.compile(reg, re.IGNORECASE).findall(term):
|
|
return True
|
|
return False
|
|
|
|
def oxdb_title(title):
|
|
'''
|
|
normalize filename to get movie title
|
|
'''
|
|
title = title.split('.')[:-1]
|
|
while len(title) > 1 and ( \
|
|
_in_known_oxdb_extensions(title[-1]) or \
|
|
title[-1].startswith('Part ') or \
|
|
len(title[-1]) == 2 or \
|
|
len(title[-1]) == 4):
|
|
title = title[:-1]
|
|
title = ".".join(title)
|
|
return title
|
|
|
|
def oxdb_import_files(archive):
|
|
stats = {'skipped': 0, 'added': 0, 'remove':0}
|
|
oxdb_backend = OXDb()
|
|
base = oxdb_backend.getBase()
|
|
print base
|
|
files = oxdb_spider(base)
|
|
|
|
oxdb_files = oxdb_backend.files()
|
|
md5sum_on_disk = []
|
|
for f in files:
|
|
meta = oxdb_file_stats(f)
|
|
f = f.replace(base, '')
|
|
if oxdb_files.has_key(f) and oxdb_files[f]['size'] == meta['size']:
|
|
stats['skipped'] += 1
|
|
md5sum_on_disk.append(oxdb_files[f]['md5sum'])
|
|
else:
|
|
meta = oxdb_file_metadata(meta)
|
|
#remove base
|
|
meta['path'] = f.encode('utf-8')
|
|
#ignore files changed in the last 5 minutes
|
|
if time.time() - meta['date'] > 300:
|
|
print oxdb_backend.addFile(meta), f
|
|
stats['added'] += 1
|
|
else:
|
|
print "to hot, skipping for now", f
|
|
md5sum_on_disk.append(meta['md5sum'])
|
|
for f in oxdb_files:
|
|
if oxdb_files[f]['md5sum'] not in md5sum_on_disk:
|
|
print "remove", f
|
|
oxdb_backend.removeFile({'md5sum':oxdb_files[f]['md5sum']})
|
|
stats['remove'] += 1
|
|
print stats
|
|
return stats
|
|
|
|
def oxdb_file_stats(fname):
|
|
stat = os.stat(fname)
|
|
size = stat.st_size
|
|
date = stat.st_mtime
|
|
return {
|
|
'path': fname,
|
|
'size': size,
|
|
'date': date,
|
|
}
|
|
|
|
def oxdb_file_metadata(meta):
|
|
'''
|
|
check if file is in db, add otherwise
|
|
'''
|
|
meta['video'] = ''
|
|
meta['audio'] = ''
|
|
meta['length'] = 0
|
|
meta['bpp'] = 0
|
|
meta['md5sum'] = oxdb_md5sum(meta['path'])
|
|
#FIXME: use midentifiy or other to get more info about file
|
|
return meta
|
|
|
|
title = oxdb_title(os.path.basename(meta['path']))
|
|
director = os.path.basename(os.path.dirname(meta['path']))
|
|
print '"%s"' % title, ' by', director
|
|
#imdb = oxdb_backend.byMD5Sum(md5sum)
|
|
|
|
# if invoked on command line, print md5 hashes of specified files.
|
|
if __name__ == '__main__':
|
|
#for fname in sys.argv[1:]:
|
|
# print oxdb_md5sum(fname), fname
|
|
oxdb_import_files()
|
|
|