diff --git a/ox/js.py b/ox/js.py index 2f419bd..43b6fb0 100644 --- a/ox/js.py +++ b/ox/js.py @@ -95,7 +95,7 @@ def tokenize(source): '.', ',', ';' ] REGEXP = 'abcdefghijklmnopqrstuvwxyz' - STRING = '\'"`' + STRING = '\'"' WHITESPACE = ' \t' def is_regexp(): # checks if a forward slash is the beginning of a regexp, diff --git a/ox/web/imdb.py b/ox/web/imdb.py index ac12c83..fb109be 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -36,7 +36,7 @@ def delete_url(url, data=None, headers=cache.DEFAULT_HEADERS): cache.store.delete(url, data, headers) def get_url(id): - return "http://www.imdb.com/title/tt%s/" % id + return "http://akas.imdb.com/title/tt%s/" % id def reference_section(id): @@ -334,6 +334,7 @@ class Imdb(SiteParser): return self._cache[url] def __init__(self, id, timeout=-1): + # use akas.imdb.com to always get original title: # http://www.imdb.com/help/show_leaf?titlelanguagedisplay self.baseUrl = "http://www.imdb.com/title/tt%s/" % id super(Imdb, self).__init__(timeout) @@ -627,7 +628,7 @@ def get_movie_by_title(title, timeout=-1): except: params['q'] = params['q'].encode('utf-8') params = urlencode(params) - url = "http://www.imdb.com/find?" + params + url = "http://akas.imdb.com/find?" + params data = read_url(url, timeout=timeout, unicode=True) #if search results in redirect, get id of current page r = '' @@ -704,7 +705,7 @@ def get_movie_id(title, director='', year='', timeout=-1): except: params['q'] = params['q'].encode('utf-8') params = urlencode(params) - url = "http://www.imdb.com/find?" + params + url = "http://akas.imdb.com/find?" + params #print url data = read_url(url, timeout=timeout, unicode=True)