diff --git a/ox/cache.py b/ox/cache.py index c359cbd..ba41574 100644 --- a/ox/cache.py +++ b/ox/cache.py @@ -104,7 +104,10 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un if USE_REQUESTS: if headers is None: headers = DEFAULT_HEADERS.copy() - r = requests_session.get(url, headers=headers) + if data: + r = requests_session.post(url, data=data, headers=headers) + else: + r = requests_session.get(url, headers=headers) for key in r.headers: url_headers[key.lower()] = r.headers[key] result = r.content diff --git a/ox/web/imdb.py b/ox/web/imdb.py index fd5cba4..affbf99 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -122,6 +122,7 @@ def tech_spec(metadata): def movie_connections(metadata): + connections = {} if 'props' not in metadata: return connections @@ -428,6 +429,7 @@ class Imdb(SiteParser): def __init__(self, id, timeout=-1): # http://www.imdb.com/help/show_leaf?titlelanguagedisplay self.baseUrl = "http://www.imdb.com/title/tt%s/" % id + self._id = id if timeout != 0: self._cache = {} url = self.baseUrl + 'releaseinfo' @@ -576,7 +578,9 @@ class Imdb(SiteParser): except: pass - self['connections'] = movie_connections(self.get_page_data('movieconnections')) + #self['connections'] = movie_connections(self.get_page_data('movieconnections')) + self['connections'] = self._get_connections() + spec = tech_spec(self.get_page_data('technical')) for key in spec: if not self.get(key): @@ -682,6 +686,59 @@ class Imdb(SiteParser): self['episodeDirector'] = self['director'] self['director'] = self['creator'] + def _get_connections(self): + query = '''query { + title(id: "tt%s") { + id + titleText { + text + } + connections(first: 5000) { + edges { + node { + associatedTitle { + id + titleText { + text + } + } + category { + text + } + text + } + } + } + } +} +''' % self._id + url = 'https://caching.graphql.imdb.com/' + headers = cache.DEFAULT_HEADERS.copy() + headers.update({ + 'Accept': 'application/graphql+json, application/json', + 'Origin': 'https://www.imdb.com', + 'Referer': 'https://www.imdb.com', + 'x-imdb-user-country': 'US', + 'x-imdb-user-language': 'en-US', + 'content-type': 'application/json', + 'Accept-Language': 'en,en-US;q=0.5' + }) + #response = requests.post(url, json= + response = json.loads(read_url(url, data=json.dumps({ + "query": query + }), headers=headers)) + connections = {} + for c in response['data']['title']['connections']['edges']: + cat = c['node']['category']['text'] + if cat not in connections: + connections[cat] = [] + connections[cat].append({ + 'id': c['node']['associatedTitle']['id'][2:], + 'title': c['node']['associatedTitle']['titleText']['text'], + 'description': c['node'].get('text') + }) + return connections + class ImdbCombined(Imdb): def __init__(self, id, timeout=-1):