From 36a5b32df283ccf39ac098b4dbb8dfe1456c93ef Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Fri, 15 Aug 2008 17:13:11 +0200
Subject: [PATCH] remove scrapeit, sync oxdb_utils(update oxdb_director)

---
 oxdbarchive/cache.py       |   2 -
 oxdbarchive/oxdb_import.py |   4 +-
 oxdbarchive/oxdb_utils.py  | 230 +++++++++++--------------------------
 3 files changed, 69 insertions(+), 167 deletions(-)

diff --git a/oxdbarchive/cache.py b/oxdbarchive/cache.py
index 79c5eec..31c25f8 100644
--- a/oxdbarchive/cache.py
+++ b/oxdbarchive/cache.py
@@ -8,8 +8,6 @@ from glob import glob
 import Image
 from StringIO import StringIO
 
-from scrapeit.utils import read_url
-
 cache_root = join(dirname(abspath(__file__)), 'cache')
 img_extension = "jpg"
 
diff --git a/oxdbarchive/oxdb_import.py b/oxdbarchive/oxdb_import.py
index 46f4f65..4a2dd0a 100644
--- a/oxdbarchive/oxdb_import.py
+++ b/oxdbarchive/oxdb_import.py
@@ -13,8 +13,6 @@ import time
 
 import simplejson
 
-from scrapeit.utils import read_url
-
 
 class OXDb:
   def __init__(self, archive):
@@ -53,7 +51,7 @@ def oxdb_md5sum(fname):
     return md5sum
 
 _oxdb_extensions = (
-  '.avi', '.mov', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb',
+  '.avi', '.mov', '.ogg', '.ogv', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4', '.rm', '.mpeg', '.rmvb', '.flv', '.f4v',
   '.mp3', '.wav',
   '.srt', '.sub', '.idx', '.rar',
   '.jpg', '.png',
diff --git a/oxdbarchive/oxdb_utils.py b/oxdbarchive/oxdb_utils.py
index 98a36fd..16bffae 100644
--- a/oxdbarchive/oxdb_utils.py
+++ b/oxdbarchive/oxdb_utils.py
@@ -11,27 +11,6 @@ import re
 import urllib
 import errno
 
-from scrapeit.utils import read_url
-
-_oxdb_file_blacklist = ['.DS_Store']
-_oxdb_extensions = [
-  '.avi', '.ogg', '.ogm', '.mkv', '.mpg', '.wmv', '.mp4v', '.mp4',
-  '.srt', '.sub', '.idx'
-]
-
-_known_oxdb_extensions = ['Interview']
-_known_oxdb_extensions_reg = ["\d\d\dx\d\d\d", "S\d\dE\d\d", "S\d\dE\d\d-E\d\d" "Season .*", "Episode .*", 'khz$']
-
-def _in_known_oxdb_extensions(term):
-  '''
-    used to remove parts that are known to not be part of the title
-  '''
-  if term in _known_oxdb_extensions:
-    return True
-  for reg in _known_oxdb_extensions_reg:
-    if re.compile(reg, re.IGNORECASE).findall(term):
-      return True
-  return False
 
 def oxdb_filenameUmlaute(string):
   string = u"%s" % string
@@ -44,143 +23,82 @@ def oxdb_filenameUmlaute(string):
   return string
 
 def oxdb_director(director):
-  director = os.path.basename(os.path.dirname(director))
-  director = director.replace('&', ', ').replace(' ,  ', ', ')
-  return director
+    director = os.path.basename(os.path.dirname(director))
+    director = ", ".join([normalizeName(d) for d in director.split('; ')])
+    director = director.replace('Unknown Director', '')
+    director = director.replace('Series', '')
+    director = director.replace('Compilations', '')
+    return director
 
-def oxdb_title(title):
-  '''
-    normalize filename to get movie title
-  '''
-  title = os.path.basename(title).replace('. ', '_dot__space_')
-  title = title.replace(' .', '_space__dot_')
-  title = title.split('.')[0]
-  title = title.replace('_dot__space_', '. ')
-  title = title.replace('_space__dot_', ' .')
-  return title
+def oxdb_title(_title, searchTitle = False):
+    '''
+      normalize filename to get movie title
+    '''
+    _title = os.path.basename(_title).replace('. ', '_dot__space_')
+    _title = _title.replace(' .', '_space__dot_')
+    title = _title.split('.')[0]
+    se = re.compile('Season (\d+).Episode (\d+)').findall(_title)
+    if se:
+        se = "S%02dE%02d" % (int(se[0][0]), int(se[0][1]))
+        if 'Part' in _title.split('.')[-2] and 'Episode' not in _title.split('.')[-3]:
+            stitle = _title.split('.')[-3]
+        else:
+            stitle = _title.split('.')[-2]
+        if stitle.startswith('Episode '):
+            stitle = '' 
+        if searchTitle:
+            title = '"%s" %s' % (title, stitle)
+        else:
+            title = '%s (%s) %s' % (title, se, stitle)
+            title = title.strip()
+    title = title.replace('_dot__space_', '. ')
+    title = title.replace('_space__dot_', ' .')
+    return title
 
+def oxdb_year(data):
+    return oxlib.findRe(data, '\.(\d{4})\.')
+
+def oxdb_series_title(path):
+    seriesTitle = u''
+    if path.startswith('Series'):
+        seriesTitle = os.path.basename(os.path.dirname(path))
+    else:
+        t = oxdb_title(path)
+        if " (S" in t:
+            seriesTitle = t.split(" (S")[0]
+    return seriesTitle
+
+def oxdb_episode_title(path):
+    episodeTitle = u''
+    ep = re.compile('.Episode \d+?\.(.*?)\.[a-zA-Z]').findall(path)
+    if ep:
+        episodeTitle = ep[0][0]
+    return episodeTitle
+
+def oxdb_season_episode(path):
+    season = 0
+    episode = 0
+    path = os.path.basename(path)
+    se = re.compile('Season (\d+).Episode (\d+)').findall(path)
+    if se:
+        season = int(se[0][0])
+        episode = int(se[0][1])
+    else:
+        ep = re.compile('.Episode (\d+?)').findall(path)
+        if ep:
+            episode = int(ep[0][0])
+    if season == 0 and episode == 0:
+        se = re.compile('S(\d\d)E(\d\d)').findall(path)
+        if se:
+            season = int(se[0][0])
+            episode = int(se[0][1])
+    return (season, episode)
 
 def oxdb_id(title, director):
   key = u"%s/%s" % (director,title)
   oxdb_id = '0x%s' % md5.new(key.encode('utf-8')).hexdigest()
   return oxdb_id
 
-'''
-seperate number with thousand comma
-'''
-def formatNumber(n, sep=','):
-  ln = list(str(n))
-  ln.reverse()
-  newn = []
-  while len(ln) > 3:
-    newn.extend(ln[:3])
-    newn.append(sep)
-    ln = ln[3:]
-    newn.extend(ln)
-    newn.reverse()
-  return "".join(newn)
-
-'''
-format runtime for stats
-'''
-def oxdb_runtimeformat(runtime):
-  if runtime == 0:
-    return ''
-  if runtime < 60:
-    return "%s sec" % runtime
-  minutes = int(runtime / 60)
-  seconds = runtime % 60
-  if minutes < 900:
-    return "%s min" % minutes
-  hours = int(minutes / 60)
-  minutes = minutes % 60
-  if hours < 24:
-    return "%s hours %s minutes %s seconds" % (hours, minutes, seconds)
-  days = int(hours / 24)
-  hours = hours % 24
-  if days < 365:
-    return "%s days %s hours %s minutes %s seconds" % (days, hours, minutes, seconds)
-  years = int(days / 365)
-  days = days % 365
-  return "%s years %s days %s hours %s minutes %s seconds" % (years, days, hours, minutes, seconds)
-
-def oxdb_lengthformat(mseconds):
-  """
-    Format mseconds in a nice way
-  """
-  seconds = mseconds/1000
-  minutes = int(seconds / 60)
-  seconds = seconds % 60
-  hours = int(minutes / 60)
-  minutes = minutes % 60
-  if hours > 24:
-    days = int(hours / 24)
-    hours = hours % 24
-    return "%d:%02d:%02d:%02d" % (days, hours, minutes, seconds)
-  return "%02d:%02d:%02d" % (hours, minutes, seconds)
-
-"""
-Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102
-bytes, etc).
-  number - number to format.
-  long_name - long name. i.e. byte
-  short - short name, i.e. B
-"""
-def oxdb_format(number, long_name, short):
-  if not number:
-    return "0 %ss" % long_name
-  number = float(number)
-  if number < 1024:
-    return "%d %s%s" % (number, long_name, number != 1 and 's' or '')
-  if number < 1024 * 1024:
-    return "%d K%s" % ((number / 1024), short)
-  if number < 1024 * 1024 * 1024:
-    return "%.1f M%s" % (number / (1024 * 1024), short)
-  if number < 1024 * 1024 * 1024 * 1024:
-    return "%.2f G%s" % (number / (1024 * 1024 * 1024), short)
-  return "%.3f T%s" % (number / (1024 * 1024 * 1024 * 1024), short)
-
-def oxdb_filesizeformat(number):
-  return oxdb_format(number, 'byte', 'B')
-
-def oxdb_bitformat(number):
-  return oxdb_format(number, 'bit', 'b')
-
-def oxdb_pixelformat(number):
-  return oxdb_format(number, 'pixel', 'px')
-
-
-from htmlentitydefs import name2codepoint
-
-# This pattern matches a character entity reference (a decimal numeric
-# references, a hexadecimal numeric reference, or a named reference).
-charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
-
-def htmldecode(text):
-  """Decode HTML entities in the given text."""
-  if type(text) is unicode:
-    uchr = unichr
-  else:
-    uchr = lambda value: value > 255 and unichr(value) or chr(value)
-  def entitydecode(match, uchr=uchr):
-    entity = match.group(1)
-    if entity.startswith('#x'):
-      return uchr(int(entity[2:], 16))
-    elif entity.startswith('#'):
-      return uchr(int(entity[1:]))
-    elif entity in name2codepoint:
-      return uchr(name2codepoint[entity])
-    else:
-      return match.group(0)
-  return charrefpat.sub(entitydecode, text)
-
-def highlight(text, query):
-  if query:
-    m = re.compile("(%s)" % re.escape(query), re.IGNORECASE).findall(text)
-    for i in m:
-      text = re.sub("(%s)" % re.escape(i), '<span class="textHighlight">\\1</span>', text)
-  return text
-
 def imdb2oxdb(imdb):
   if imdb.startswith('0x') or imdb.startswith('ox') :
     return imdb
@@ -196,18 +114,6 @@ def trimString(string, length):
     string = string[:length - 13] + '...' + string[-10:]
   return string
 
-languages = ('be', 'bg', 'ba', 'wo', 'bn', 'bo', 'bh', 'bi', 'ji', 'br', 'ja',
- 'ru', 'rw', 'tl', 'rm', 'rn', 'ro', 'gu', 'jw', 'gd', 'ga', 'sv', 'gn', 'gl',
- 'om', 'tn', 'fa', 'oc', 'ss', 'or', 'hy', 'hr', 'sw', 'hu', 'hi', 'su', 'ha',
- 'ps', 'pt', 'sk', 'pa', 'pl', 'el', 'eo', 'en', 'zh', 'sm', 'eu', 'et', 'sa',
- 'es', 'mg', 'uz', 'ml', 'mo', 'mn', 'mi', 'as', 'mk', 'ur', 'mt', 'ms', 'mr',
- 'my', 'fr', 'fy', 'ia', 'zu', 'fi', 'fj', 'fo', 'nl', 'no', 'na', 'ne', 'xh',
- 'co', 'ca', 'cy', 'cs', 'ka', 'kk', 'sr', 'sq', 'ko', 'kn', 'km', 'kl', 'ks',
- 'si', 'sh', 'so', 'sn', 'ku', 'sl', 'ky', 'sg', 'sd', 'yo', 'de', 'da', 'dz',
- 'la', 'ln', 'lo', 'tt', 'tr', 'ts', 'lv', 'to', 'lt', 'tk', 'th', 'ti', 'tg',
- 'te', 'ta', 'aa', 'ab', 'uk', 'af', 'vi', 'is', 'am', 'it', 'iw', 'vo', 'ik',
- 'ar', 'in', 'ay', 'az', 'ie', 'qu', 'st', 'tw')
-
 def oxdb_makedir(folder):
   if not os.path.exists(folder):
     try: