From 36a5b32df283ccf39ac098b4dbb8dfe1456c93ef Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 15 Aug 2008 17:13:11 +0200 Subject: [PATCH] remove scrapeit, sync oxdb_utils(update oxdb_director) --- oxdbarchive/cache.py | 2 - oxdbarchive/oxdb_import.py | 4 +- oxdbarchive/oxdb_utils.py | 230 +++++++++++-------------------------- 3 files changed, 69 insertions(+), 167 deletions(-) diff --git a/oxdbarchive/cache.py b/oxdbarchive/cache.py index 79c5eec..31c25f8 100644 --- a/oxdbarchive/cache.py +++ b/oxdbarchive/cache.py @@ -8,8 +8,6 @@ from glob import glob import Image from StringIO import StringIO -from scrapeit.utils import read_url - cache_root = join(dirname(abspath(__file__)), 'cache') img_extension = "jpg" diff --git a/oxdbarchive/oxdb_import.py b/oxdbarchive/oxdb_import.py index 46f4f65..4a2dd0a 100644 --- a/oxdbarchive/oxdb_import.py +++ b/oxdbarchive/oxdb_import.py @@ -13,8 +13,6 @@ import time import simplejson -from scrapeit.utils import read_url - class OXDb: def __init__(self, archive): @@ -53,7 +51,7 @@ def oxdb_md5sum(fname): return md5sum _oxdb_extensions = ( - '.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb', + '.avi', '.mov', '.ogg', '.ogv', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb', '.flv', '.f4v', '.mp3', '.wav', '.srt', '.sub', '.idx', '.rar', '.jpg', '.png', diff --git a/oxdbarchive/oxdb_utils.py b/oxdbarchive/oxdb_utils.py index 98a36fd..16bffae 100644 --- a/oxdbarchive/oxdb_utils.py +++ b/oxdbarchive/oxdb_utils.py @@ -11,27 +11,6 @@ import re import urllib import errno -from scrapeit.utils import read_url - -_oxdb_file_blacklist = ['.DS_Store'] -_oxdb_extensions = [ - '.avi', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', - '.srt', '.sub', '.idx' -] - -_known_oxdb_extensions = ['Interview'] -_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$'] - -def _in_known_oxdb_extensions(term): - ''' - used to remove parts that are known to not be part of the title - ''' - if term in _known_oxdb_extensions: - return True - for reg in _known_oxdb_extensions_reg: - if re.compile(reg, re.IGNORECASE).findall(term): - return True - return False def oxdb_filenameUmlaute(string): string = u"%s" % string @@ -44,143 +23,82 @@ def oxdb_filenameUmlaute(string): return string def oxdb_director(director): - director = os.path.basename(os.path.dirname(director)) - director = director.replace('&', ', ').replace(' , ', ', ') - return director + director = os.path.basename(os.path.dirname(director)) + director = ", ".join([normalizeName(d) for d in director.split('; ')]) + director = director.replace('Unknown Director', '') + director = director.replace('Series', '') + director = director.replace('Compilations', '') + return director -def oxdb_title(title): - ''' - normalize filename to get movie title - ''' - title = os.path.basename(title).replace('. ', '_dot__space_') - title = title.replace(' .', '_space__dot_') - title = title.split('.')[0] - title = title.replace('_dot__space_', '. ') - title = title.replace('_space__dot_', ' .') - return title +def oxdb_title(_title, searchTitle = False): + ''' + normalize filename to get movie title + ''' + _title = os.path.basename(_title).replace('. ', '_dot__space_') + _title = _title.replace(' .', '_space__dot_') + title = _title.split('.')[0] + se = re.compile('Season (\d+).Episode (\d+)').findall(_title) + if se: + se = "S%02dE%02d" % (int(se[0][0]), int(se[0][1])) + if 'Part' in _title.split('.')[-2] and 'Episode' not in _title.split('.')[-3]: + stitle = _title.split('.')[-3] + else: + stitle = _title.split('.')[-2] + if stitle.startswith('Episode '): + stitle = '' + if searchTitle: + title = '"%s" %s' % (title, stitle) + else: + title = '%s (%s) %s' % (title, se, stitle) + title = title.strip() + title = title.replace('_dot__space_', '. ') + title = title.replace('_space__dot_', ' .') + return title +def oxdb_year(data): + return oxlib.findRe(data, '\.(\d{4})\.') + +def oxdb_series_title(path): + seriesTitle = u'' + if path.startswith('Series'): + seriesTitle = os.path.basename(os.path.dirname(path)) + else: + t = oxdb_title(path) + if " (S" in t: + seriesTitle = t.split(" (S")[0] + return seriesTitle + +def oxdb_episode_title(path): + episodeTitle = u'' + ep = re.compile('.Episode \d+?\.(.*?)\.[a-zA-Z]').findall(path) + if ep: + episodeTitle = ep[0][0] + return episodeTitle + +def oxdb_season_episode(path): + season = 0 + episode = 0 + path = os.path.basename(path) + se = re.compile('Season (\d+).Episode (\d+)').findall(path) + if se: + season = int(se[0][0]) + episode = int(se[0][1]) + else: + ep = re.compile('.Episode (\d+?)').findall(path) + if ep: + episode = int(ep[0][0]) + if season == 0 and episode == 0: + se = re.compile('S(\d\d)E(\d\d)').findall(path) + if se: + season = int(se[0][0]) + episode = int(se[0][1]) + return (season, episode) def oxdb_id(title, director): key = u"%s/%s" % (director,title) oxdb_id = '0x%s' % md5.new(key.encode('utf-8')).hexdigest() return oxdb_id -''' -seperate number with thousand comma -''' -def formatNumber(n, sep=','): - ln = list(str(n)) - ln.reverse() - newn = [] - while len(ln) > 3: - newn.extend(ln[:3]) - newn.append(sep) - ln = ln[3:] - newn.extend(ln) - newn.reverse() - return "".join(newn) - -''' -format runtime for stats -''' -def oxdb_runtimeformat(runtime): - if runtime == 0: - return '' - if runtime < 60: - return "%s sec" % runtime - minutes = int(runtime / 60) - seconds = runtime % 60 - if minutes < 900: - return "%s min" % minutes - hours = int(minutes / 60) - minutes = minutes % 60 - if hours < 24: - return "%s hours %s minutes %s seconds" % (hours, minutes, seconds) - days = int(hours / 24) - hours = hours % 24 - if days < 365: - return "%s days %s hours %s minutes %s seconds" % (days, hours, minutes, seconds) - years = int(days / 365) - days = days % 365 - return "%s years %s days %s hours %s minutes %s seconds" % (years, days, hours, minutes, seconds) - -def oxdb_lengthformat(mseconds): - """ - Format mseconds in a nice way - """ - seconds = mseconds/1000 - minutes = int(seconds / 60) - seconds = seconds % 60 - hours = int(minutes / 60) - minutes = minutes % 60 - if hours > 24: - days = int(hours / 24) - hours = hours % 24 - return "%d:%02d:%02d:%02d" % (days, hours, minutes, seconds) - return "%02d:%02d:%02d" % (hours, minutes, seconds) - -""" -Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102 -bytes, etc). - number - number to format. - long_name - long name. i.e. byte - short - short name, i.e. B -""" -def oxdb_format(number, long_name, short): - if not number: - return "0 %ss" % long_name - number = float(number) - if number < 1024: - return "%d %s%s" % (number, long_name, number != 1 and 's' or '') - if number < 1024 * 1024: - return "%d K%s" % ((number / 1024), short) - if number < 1024 * 1024 * 1024: - return "%.1f M%s" % (number / (1024 * 1024), short) - if number < 1024 * 1024 * 1024 * 1024: - return "%.2f G%s" % (number / (1024 * 1024 * 1024), short) - return "%.3f T%s" % (number / (1024 * 1024 * 1024 * 1024), short) - -def oxdb_filesizeformat(number): - return oxdb_format(number, 'byte', 'B') - -def oxdb_bitformat(number): - return oxdb_format(number, 'bit', 'b') - -def oxdb_pixelformat(number): - return oxdb_format(number, 'pixel', 'px') - - -from htmlentitydefs import name2codepoint - -# This pattern matches a character entity reference (a decimal numeric -# references, a hexadecimal numeric reference, or a named reference). -charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?') - -def htmldecode(text): - """Decode HTML entities in the given text.""" - if type(text) is unicode: - uchr = unichr - else: - uchr = lambda value: value > 255 and unichr(value) or chr(value) - def entitydecode(match, uchr=uchr): - entity = match.group(1) - if entity.startswith('#x'): - return uchr(int(entity[2:], 16)) - elif entity.startswith('#'): - return uchr(int(entity[1:])) - elif entity in name2codepoint: - return uchr(name2codepoint[entity]) - else: - return match.group(0) - return charrefpat.sub(entitydecode, text) - -def highlight(text, query): - if query: - m = re.compile("(%s)" % re.escape(query), re.IGNORECASE).findall(text) - for i in m: - text = re.sub("(%s)" % re.escape(i), '\\1', text) - return text - def imdb2oxdb(imdb): if imdb.startswith('0x') or imdb.startswith('ox') : return imdb @@ -196,18 +114,6 @@ def trimString(string, length): string = string[:length - 13] + '...' + string[-10:] return string -languages = ('be', 'bg', 'ba', 'wo', 'bn', 'bo', 'bh', 'bi', 'ji', 'br', 'ja', - 'ru', 'rw', 'tl', 'rm', 'rn', 'ro', 'gu', 'jw', 'gd', 'ga', 'sv', 'gn', 'gl', - 'om', 'tn', 'fa', 'oc', 'ss', 'or', 'hy', 'hr', 'sw', 'hu', 'hi', 'su', 'ha', - 'ps', 'pt', 'sk', 'pa', 'pl', 'el', 'eo', 'en', 'zh', 'sm', 'eu', 'et', 'sa', - 'es', 'mg', 'uz', 'ml', 'mo', 'mn', 'mi', 'as', 'mk', 'ur', 'mt', 'ms', 'mr', - 'my', 'fr', 'fy', 'ia', 'zu', 'fi', 'fj', 'fo', 'nl', 'no', 'na', 'ne', 'xh', - 'co', 'ca', 'cy', 'cs', 'ka', 'kk', 'sr', 'sq', 'ko', 'kn', 'km', 'kl', 'ks', - 'si', 'sh', 'so', 'sn', 'ku', 'sl', 'ky', 'sg', 'sd', 'yo', 'de', 'da', 'dz', - 'la', 'ln', 'lo', 'tt', 'tr', 'ts', 'lv', 'to', 'lt', 'tk', 'th', 'ti', 'tg', - 'te', 'ta', 'aa', 'ab', 'uk', 'af', 'vi', 'is', 'am', 'it', 'iw', 'vo', 'ik', - 'ar', 'in', 'ay', 'az', 'ie', 'qu', 'st', 'tw') - def oxdb_makedir(folder): if not os.path.exists(folder): try: