get all connections

2023-07-07 14:50:14 +05:30 · 2023-07-07 14:50:14 +05:30 · 677b61877e
commit 677b61877e
parent 773d288f55
2 changed files with 62 additions and 2 deletions
--- a/ox/cache.py
+++ b/ox/cache.py
@ -104,7 +104,10 @@ def read_url(url, data=None, headers=None, timeout=cache_timeout, valid=None, un
        if USE_REQUESTS:
            if headers is None:
                headers = DEFAULT_HEADERS.copy()
-            r = requests_session.get(url, headers=headers)
+            if data:
+                r = requests_session.post(url, data=data, headers=headers)
+            else:
+                r = requests_session.get(url, headers=headers)
            for key in r.headers:
                url_headers[key.lower()] = r.headers[key]
            result = r.content
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -122,6 +122,7 @@ def tech_spec(metadata):


 def movie_connections(metadata):
+    
    connections = {}
    if 'props' not in metadata:
        return connections
@ -428,6 +429,7 @@ class Imdb(SiteParser):
    def __init__(self, id, timeout=-1):
        # http://www.imdb.com/help/show_leaf?titlelanguagedisplay
        self.baseUrl = "http://www.imdb.com/title/tt%s/" % id
+        self._id = id
        if timeout != 0:
            self._cache = {}
            url = self.baseUrl + 'releaseinfo'
@ -576,7 +578,9 @@ class Imdb(SiteParser):
            except:
                pass

-        self['connections'] = movie_connections(self.get_page_data('movieconnections'))
+        #self['connections'] = movie_connections(self.get_page_data('movieconnections'))
+        self['connections'] = self._get_connections()
+
        spec = tech_spec(self.get_page_data('technical'))
        for key in spec:
            if not self.get(key):
@ -682,6 +686,59 @@ class Imdb(SiteParser):
                self['episodeDirector'] = self['director']
            self['director'] = self['creator']

+    def _get_connections(self):
+        query = '''query {
+    title(id: "tt%s") {
+        id
+        titleText {
+           text
+        }
+        connections(first: 5000) {
+            edges {
+                node {
+                    associatedTitle {
+                        id
+                        titleText {
+                            text
+                        }
+                    }
+                    category {
+                        text
+                    }
+                    text
+                }
+            }
+        }
+    }
+}
+''' % self._id
+        url = 'https://caching.graphql.imdb.com/'
+        headers = cache.DEFAULT_HEADERS.copy()
+        headers.update({
+             'Accept': 'application/graphql+json, application/json',
+             'Origin': 'https://www.imdb.com',
+             'Referer': 'https://www.imdb.com',
+             'x-imdb-user-country': 'US',
+             'x-imdb-user-language': 'en-US',
+             'content-type': 'application/json',
+             'Accept-Language': 'en,en-US;q=0.5'
+        })
+        #response = requests.post(url, json=
+        response = json.loads(read_url(url, data=json.dumps({
+            "query": query
+        }), headers=headers))
+        connections = {}
+        for c in response['data']['title']['connections']['edges']:
+            cat = c['node']['category']['text']
+            if cat not in connections:
+                connections[cat] = []
+            connections[cat].append({
+                'id': c['node']['associatedTitle']['id'][2:],
+                'title': c['node']['associatedTitle']['titleText']['text'],
+                'description': c['node'].get('text')
+            })
+        return connections
+

 class ImdbCombined(Imdb):
    def __init__(self, id, timeout=-1):