211 lines
6.2 KiB
Python
211 lines
6.2 KiB
Python
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
# -*- Mode: Python; -*-
|
||
|
# vi:si:et:sw=2:sts=2:ts=2
|
||
|
# OXDb Import client, crawls the filesystem and gathers information about
|
||
|
# movies
|
||
|
#
|
||
|
|
||
|
import md5
|
||
|
import os
|
||
|
import sys
|
||
|
import re
|
||
|
import urllib
|
||
|
|
||
|
from scrapeit.utils import read_url
|
||
|
|
||
|
_oxdb_file_blacklist = ['.DS_Store']
|
||
|
_oxdb_extensions = [
|
||
|
'.avi', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4',
|
||
|
'.srt', '.sub', '.idx'
|
||
|
]
|
||
|
|
||
|
_known_oxdb_extensions = ['Interview']
|
||
|
_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
|
||
|
|
||
|
def _in_known_oxdb_extensions(term):
|
||
|
'''
|
||
|
used to remove parts that are known to not be part of the title
|
||
|
'''
|
||
|
if term in _known_oxdb_extensions:
|
||
|
return True
|
||
|
for reg in _known_oxdb_extensions_reg:
|
||
|
if re.compile(reg, re.IGNORECASE).findall(term):
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def oxdb_filenameUmlaute(string):
|
||
|
string = u"%s" % string
|
||
|
string = string.replace(u'ae', u'ä')
|
||
|
string = string.replace(u'oe', u'ö')
|
||
|
string = string.replace(u'ue', u'ü')
|
||
|
string = string.replace(u'Ae', u'Ä')
|
||
|
string = string.replace(u'Oe', u'Ö')
|
||
|
string = string.replace(u'Ue', u'Ü')
|
||
|
return string
|
||
|
|
||
|
def oxdb_director(director):
|
||
|
director = os.path.basename(os.path.dirname(director))
|
||
|
director.replace(' & ', ', ')
|
||
|
return director
|
||
|
|
||
|
def oxdb_title(title):
|
||
|
'''
|
||
|
normalize filename to get movie title
|
||
|
'''
|
||
|
title = os.path.basename(title).replace('. ', '_dot__space_')
|
||
|
title = title.replace(' .', '_space__dot_')
|
||
|
title = title.split('.')[0]
|
||
|
title = title.replace('_dot__space_', '. ')
|
||
|
title = title.replace('_space__dot_', ' .')
|
||
|
return title
|
||
|
|
||
|
|
||
|
def oxdb_id(title, director):
|
||
|
key = u"%s/%s" % (director,title)
|
||
|
oxdb_id = '0x%s' % md5.new(key.encode('utf-8')).hexdigest()
|
||
|
return oxdb_id
|
||
|
|
||
|
'''
|
||
|
seperate number with thousand comma
|
||
|
'''
|
||
|
def formatNumber(n, sep=','):
|
||
|
ln = list(str(n))
|
||
|
ln.reverse()
|
||
|
newn = []
|
||
|
while len(ln) > 3:
|
||
|
newn.extend(ln[:3])
|
||
|
newn.append(sep)
|
||
|
ln = ln[3:]
|
||
|
newn.extend(ln)
|
||
|
newn.reverse()
|
||
|
return "".join(newn)
|
||
|
|
||
|
'''
|
||
|
format runtime for stats
|
||
|
'''
|
||
|
def oxdb_runtimeformat(runtime):
|
||
|
if runtime == 0:
|
||
|
return ''
|
||
|
if runtime < 60:
|
||
|
return "%s sec" % runtime
|
||
|
minutes = int(runtime / 60)
|
||
|
seconds = runtime % 60
|
||
|
if minutes < 900:
|
||
|
return "%s min" % minutes
|
||
|
hours = int(minutes / 60)
|
||
|
minutes = minutes % 60
|
||
|
if hours < 24:
|
||
|
return "%s hours %s minutes %s seconds" % (hours, minutes, seconds)
|
||
|
days = int(hours / 24)
|
||
|
hours = hours % 24
|
||
|
if days < 365:
|
||
|
return "%s days %s hours %s minutes %s seconds" % (days, hours, minutes, seconds)
|
||
|
years = int(days / 365)
|
||
|
days = days % 365
|
||
|
return "%s years %s days %s hours %s minutes %s seconds" % (years, days, hours, minutes, seconds)
|
||
|
|
||
|
def oxdb_lengthformat(mseconds):
|
||
|
"""
|
||
|
Format mseconds in a nice way
|
||
|
"""
|
||
|
seconds = mseconds/1000
|
||
|
minutes = int(seconds / 60)
|
||
|
seconds = seconds % 60
|
||
|
hours = int(minutes / 60)
|
||
|
minutes = minutes % 60
|
||
|
if hours > 24:
|
||
|
days = int(hours / 24)
|
||
|
hours = hours % 24
|
||
|
return "%d:%02d:%02d:%02d" % (days, hours, minutes, seconds)
|
||
|
return "%02d:%02d:%02d" % (hours, minutes, seconds)
|
||
|
|
||
|
"""
|
||
|
Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102
|
||
|
bytes, etc).
|
||
|
number - number to format.
|
||
|
long_name - long name. i.e. byte
|
||
|
short - short name, i.e. B
|
||
|
"""
|
||
|
def oxdb_format(number, long_name, short):
|
||
|
if not number:
|
||
|
return "0 %ss" % long_name
|
||
|
number = float(number)
|
||
|
if number < 1024:
|
||
|
return "%d %s%s" % (number, long_name, number != 1 and 's' or '')
|
||
|
if number < 1024 * 1024:
|
||
|
return "%d K%s" % ((number / 1024), short)
|
||
|
if number < 1024 * 1024 * 1024:
|
||
|
return "%.1f M%s" % (number / (1024 * 1024), short)
|
||
|
if number < 1024 * 1024 * 1024 * 1024:
|
||
|
return "%.2f G%s" % (number / (1024 * 1024 * 1024), short)
|
||
|
return "%.3f T%s" % (number / (1024 * 1024 * 1024 * 1024), short)
|
||
|
|
||
|
def oxdb_filesizeformat(number):
|
||
|
return oxdb_format(number, 'byte', 'B')
|
||
|
|
||
|
def oxdb_bitformat(number):
|
||
|
return oxdb_format(number, 'bit', 'b')
|
||
|
|
||
|
def oxdb_pixelformat(number):
|
||
|
return oxdb_format(number, 'pixel', 'px')
|
||
|
|
||
|
|
||
|
from htmlentitydefs import name2codepoint
|
||
|
|
||
|
# This pattern matches a character entity reference (a decimal numeric
|
||
|
# references, a hexadecimal numeric reference, or a named reference).
|
||
|
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
|
||
|
|
||
|
def htmldecode(text):
|
||
|
"""Decode HTML entities in the given text."""
|
||
|
if type(text) is unicode:
|
||
|
uchr = unichr
|
||
|
else:
|
||
|
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
||
|
def entitydecode(match, uchr=uchr):
|
||
|
entity = match.group(1)
|
||
|
if entity.startswith('#x'):
|
||
|
return uchr(int(entity[2:], 16))
|
||
|
elif entity.startswith('#'):
|
||
|
return uchr(int(entity[1:]))
|
||
|
elif entity in name2codepoint:
|
||
|
return uchr(name2codepoint[entity])
|
||
|
else:
|
||
|
return match.group(0)
|
||
|
return charrefpat.sub(entitydecode, text)
|
||
|
|
||
|
def highlight(text, query):
|
||
|
if query:
|
||
|
m = re.compile("(%s)" % re.escape(query), re.IGNORECASE).findall(text)
|
||
|
for i in m:
|
||
|
text = re.sub("(%s)" % re.escape(i), '<span class="textHighlight">\\1</span>', text)
|
||
|
return text
|
||
|
|
||
|
def imdb2oxdb(imdb):
|
||
|
if imdb.startswith('0x') or imdb.startswith('ox') :
|
||
|
return imdb
|
||
|
return "0x%06X" % int(imdb)
|
||
|
|
||
|
def oxdb2imdb(oxdb):
|
||
|
if len(oxdb) == 8:
|
||
|
return "%07d" % float(oxdb)
|
||
|
return oxdb
|
||
|
|
||
|
def trimString(string, length):
|
||
|
if len(string) > length:
|
||
|
string = string[:length - 13] + '...' + string[-10:]
|
||
|
return string
|
||
|
|
||
|
languages = ('be', 'bg', 'ba', 'wo', 'bn', 'bo', 'bh', 'bi', 'ji', 'br', 'ja',
|
||
|
'ru', 'rw', 'tl', 'rm', 'rn', 'ro', 'gu', 'jw', 'gd', 'ga', 'sv', 'gn', 'gl',
|
||
|
'om', 'tn', 'fa', 'oc', 'ss', 'or', 'hy', 'hr', 'sw', 'hu', 'hi', 'su', 'ha',
|
||
|
'ps', 'pt', 'sk', 'pa', 'pl', 'el', 'eo', 'en', 'zh', 'sm', 'eu', 'et', 'sa',
|
||
|
'es', 'mg', 'uz', 'ml', 'mo', 'mn', 'mi', 'as', 'mk', 'ur', 'mt', 'ms', 'mr',
|
||
|
'my', 'fr', 'fy', 'ia', 'zu', 'fi', 'fj', 'fo', 'nl', 'no', 'na', 'ne', 'xh',
|
||
|
'co', 'ca', 'cy', 'cs', 'ka', 'kk', 'sr', 'sq', 'ko', 'kn', 'km', 'kl', 'ks',
|
||
|
'si', 'sh', 'so', 'sn', 'ku', 'sl', 'ky', 'sg', 'sd', 'yo', 'de', 'da', 'dz',
|
||
|
'la', 'ln', 'lo', 'tt', 'tr', 'ts', 'lv', 'to', 'lt', 'tk', 'th', 'ti', 'tg',
|
||
|
'te', 'ta', 'aa', 'ab', 'uk', 'af', 'vi', 'is', 'am', 'it', 'iw', 'vo', 'ik',
|
||
|
'ar', 'in', 'ay', 'az', 'ie', 'qu', 'st', 'tw')
|