get all connections
This commit is contained in:
parent
773d288f55
commit
677b61877e
2 changed files with 62 additions and 2 deletions
|
@ -104,7 +104,10 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un
|
|||
if USE_REQUESTS:
|
||||
if headers is None:
|
||||
headers = DEFAULT_HEADERS.copy()
|
||||
r = requests_session.get(url, headers=headers)
|
||||
if data:
|
||||
r = requests_session.post(url, data=data, headers=headers)
|
||||
else:
|
||||
r = requests_session.get(url, headers=headers)
|
||||
for key in r.headers:
|
||||
url_headers[key.lower()] = r.headers[key]
|
||||
result = r.content
|
||||
|
|
|
@ -122,6 +122,7 @@ def tech_spec(metadata):
|
|||
|
||||
|
||||
def movie_connections(metadata):
|
||||
|
||||
connections = {}
|
||||
if 'props' not in metadata:
|
||||
return connections
|
||||
|
@ -428,6 +429,7 @@ class Imdb(SiteParser):
|
|||
def __init__(self, id, timeout=-1):
|
||||
# http://www.imdb.com/help/show_leaf?titlelanguagedisplay
|
||||
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
|
||||
self._id = id
|
||||
if timeout != 0:
|
||||
self._cache = {}
|
||||
url = self.baseUrl + 'releaseinfo'
|
||||
|
@ -576,7 +578,9 @@ class Imdb(SiteParser):
|
|||
except:
|
||||
pass
|
||||
|
||||
self['connections'] = movie_connections(self.get_page_data('movieconnections'))
|
||||
#self['connections'] = movie_connections(self.get_page_data('movieconnections'))
|
||||
self['connections'] = self._get_connections()
|
||||
|
||||
spec = tech_spec(self.get_page_data('technical'))
|
||||
for key in spec:
|
||||
if not self.get(key):
|
||||
|
@ -682,6 +686,59 @@ class Imdb(SiteParser):
|
|||
self['episodeDirector'] = self['director']
|
||||
self['director'] = self['creator']
|
||||
|
||||
def _get_connections(self):
|
||||
query = '''query {
|
||||
title(id: "tt%s") {
|
||||
id
|
||||
titleText {
|
||||
text
|
||||
}
|
||||
connections(first: 5000) {
|
||||
edges {
|
||||
node {
|
||||
associatedTitle {
|
||||
id
|
||||
titleText {
|
||||
text
|
||||
}
|
||||
}
|
||||
category {
|
||||
text
|
||||
}
|
||||
text
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
''' % self._id
|
||||
url = 'https://caching.graphql.imdb.com/'
|
||||
headers = cache.DEFAULT_HEADERS.copy()
|
||||
headers.update({
|
||||
'Accept': 'application/graphql+json, application/json',
|
||||
'Origin': 'https://www.imdb.com',
|
||||
'Referer': 'https://www.imdb.com',
|
||||
'x-imdb-user-country': 'US',
|
||||
'x-imdb-user-language': 'en-US',
|
||||
'content-type': 'application/json',
|
||||
'Accept-Language': 'en,en-US;q=0.5'
|
||||
})
|
||||
#response = requests.post(url, json=
|
||||
response = json.loads(read_url(url, data=json.dumps({
|
||||
"query": query
|
||||
}), headers=headers))
|
||||
connections = {}
|
||||
for c in response['data']['title']['connections']['edges']:
|
||||
cat = c['node']['category']['text']
|
||||
if cat not in connections:
|
||||
connections[cat] = []
|
||||
connections[cat].append({
|
||||
'id': c['node']['associatedTitle']['id'][2:],
|
||||
'title': c['node']['associatedTitle']['titleText']['text'],
|
||||
'description': c['node'].get('text')
|
||||
})
|
||||
return connections
|
||||
|
||||
|
||||
class ImdbCombined(Imdb):
|
||||
def __init__(self, id, timeout=-1):
|
||||
|
|
Loading…
Reference in a new issue