get all connections
This commit is contained in:
parent
773d288f55
commit
677b61877e
2 changed files with 62 additions and 2 deletions
|
@ -104,7 +104,10 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un
|
||||||
if USE_REQUESTS:
|
if USE_REQUESTS:
|
||||||
if headers is None:
|
if headers is None:
|
||||||
headers = DEFAULT_HEADERS.copy()
|
headers = DEFAULT_HEADERS.copy()
|
||||||
r = requests_session.get(url, headers=headers)
|
if data:
|
||||||
|
r = requests_session.post(url, data=data, headers=headers)
|
||||||
|
else:
|
||||||
|
r = requests_session.get(url, headers=headers)
|
||||||
for key in r.headers:
|
for key in r.headers:
|
||||||
url_headers[key.lower()] = r.headers[key]
|
url_headers[key.lower()] = r.headers[key]
|
||||||
result = r.content
|
result = r.content
|
||||||
|
|
|
@ -122,6 +122,7 @@ def tech_spec(metadata):
|
||||||
|
|
||||||
|
|
||||||
def movie_connections(metadata):
|
def movie_connections(metadata):
|
||||||
|
|
||||||
connections = {}
|
connections = {}
|
||||||
if 'props' not in metadata:
|
if 'props' not in metadata:
|
||||||
return connections
|
return connections
|
||||||
|
@ -428,6 +429,7 @@ class Imdb(SiteParser):
|
||||||
def __init__(self, id, timeout=-1):
|
def __init__(self, id, timeout=-1):
|
||||||
# http://www.imdb.com/help/show_leaf?titlelanguagedisplay
|
# http://www.imdb.com/help/show_leaf?titlelanguagedisplay
|
||||||
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
|
self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
|
||||||
|
self._id = id
|
||||||
if timeout != 0:
|
if timeout != 0:
|
||||||
self._cache = {}
|
self._cache = {}
|
||||||
url = self.baseUrl + 'releaseinfo'
|
url = self.baseUrl + 'releaseinfo'
|
||||||
|
@ -576,7 +578,9 @@ class Imdb(SiteParser):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self['connections'] = movie_connections(self.get_page_data('movieconnections'))
|
#self['connections'] = movie_connections(self.get_page_data('movieconnections'))
|
||||||
|
self['connections'] = self._get_connections()
|
||||||
|
|
||||||
spec = tech_spec(self.get_page_data('technical'))
|
spec = tech_spec(self.get_page_data('technical'))
|
||||||
for key in spec:
|
for key in spec:
|
||||||
if not self.get(key):
|
if not self.get(key):
|
||||||
|
@ -682,6 +686,59 @@ class Imdb(SiteParser):
|
||||||
self['episodeDirector'] = self['director']
|
self['episodeDirector'] = self['director']
|
||||||
self['director'] = self['creator']
|
self['director'] = self['creator']
|
||||||
|
|
||||||
|
def _get_connections(self):
|
||||||
|
query = '''query {
|
||||||
|
title(id: "tt%s") {
|
||||||
|
id
|
||||||
|
titleText {
|
||||||
|
text
|
||||||
|
}
|
||||||
|
connections(first: 5000) {
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
associatedTitle {
|
||||||
|
id
|
||||||
|
titleText {
|
||||||
|
text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
category {
|
||||||
|
text
|
||||||
|
}
|
||||||
|
text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
''' % self._id
|
||||||
|
url = 'https://caching.graphql.imdb.com/'
|
||||||
|
headers = cache.DEFAULT_HEADERS.copy()
|
||||||
|
headers.update({
|
||||||
|
'Accept': 'application/graphql+json, application/json',
|
||||||
|
'Origin': 'https://www.imdb.com',
|
||||||
|
'Referer': 'https://www.imdb.com',
|
||||||
|
'x-imdb-user-country': 'US',
|
||||||
|
'x-imdb-user-language': 'en-US',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'Accept-Language': 'en,en-US;q=0.5'
|
||||||
|
})
|
||||||
|
#response = requests.post(url, json=
|
||||||
|
response = json.loads(read_url(url, data=json.dumps({
|
||||||
|
"query": query
|
||||||
|
}), headers=headers))
|
||||||
|
connections = {}
|
||||||
|
for c in response['data']['title']['connections']['edges']:
|
||||||
|
cat = c['node']['category']['text']
|
||||||
|
if cat not in connections:
|
||||||
|
connections[cat] = []
|
||||||
|
connections[cat].append({
|
||||||
|
'id': c['node']['associatedTitle']['id'][2:],
|
||||||
|
'title': c['node']['associatedTitle']['titleText']['text'],
|
||||||
|
'description': c['node'].get('text')
|
||||||
|
})
|
||||||
|
return connections
|
||||||
|
|
||||||
|
|
||||||
class ImdbCombined(Imdb):
|
class ImdbCombined(Imdb):
|
||||||
def __init__(self, id, timeout=-1):
|
def __init__(self, id, timeout=-1):
|
||||||
|
|
Loading…
Reference in a new issue