resolved conflist with master branch, added orig_in element for playlist, added update_user_playlists call in get_videos and get_next.

2018-11-16 23:47:02 +01:00 · 2018-11-16 23:47:02 +01:00 · 834ba630f1
commit 834ba630f1
parent acb9b499be
1 changed files with 72 additions and 44 deletions
--- a/recommendation_engine.py
+++ b/recommendation_engine.py
@ -43,9 +43,8 @@ class Engine:
        else:
            self.state = {
                'channels': {
-                    'globalKeywords': {'locked': False, 'value': 7},
-                    'userKeywords': {'locked': False, 'value': 7},
-                    'screenings': {'locked': True, 'value': 2}
+                    'globalKeywords': {'locked': False, 'value': 8},
+                    'userKeywords': {'locked': False, 'value': 8}
                },
                'globalKeywords': {},
            }
@ -55,6 +54,12 @@ class Engine:
                'nextPlaylist': {'locked': False, 'value': 4},
                'staySame': {'locked': False, 'value': 8}
            }
+        if 'userKeywordsWeights' not in self.state:
+            self.state['userKeywordsWeights'] = {
+                'themeTags': {'locked': False, 'value': 0.3},
+                'characterTags': {'locked': False, 'value': 0.7}
+
+            }
        self.update_keywords()

    @property
@ -87,8 +92,16 @@ class Engine:


    def get_videos(self, user):
-        if user.get('events', [{}])[0].get("event")=="login": 
-            return self.get_recommendations(user) 
+        # Update self_playlists first to reflect changes
+        update_user_playlists(self.playlists, user)
+
+        if user.get('events', [{}])[0].get("event")=="login":
+            return {
+                'user': {
+                    'keywords': user.get('keywords', {})
+                },
+                'videos': self.get_recommendations(user)
+            }

        channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
        sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
@ -97,29 +110,28 @@ class Engine:
        # check if there were grid events for all indexes.
        grid_events = {}
        (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame"))
-        video_num = nc + np + ns 
+        video_num = nc + np + ns

-        # for event in user.get('events', []):
-        #     if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
-        #         grid_events[event.get('data').get('index')] = event.get('data')
-        #     if len(grid_events) == video_num:
-        #         break
-
-        # # The version where the loop also extract play_index (requires "index" in play event data):
+        # # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable):
        play_index = None
        for event in user.get('events', []):
            if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
                grid_events[event.get('data').get('index')] = event.get('data')
            if event.get('event') == "play" and event["data"].get("type") == "video" and not play_index:
-                play_index = event.get('data').get('index') 
+                play_index = event.get('data').get('index')
            if len(grid_events) == video_num and play_index:
-                break 
+                break

        prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index'])

        # if there were no grid events for all, initialize all grids.
-        if len(prev_grid_list) < video_num: 
-            return self.get_recommendations(user)   
+        if len(prev_grid_list) < video_num:
+            return {
+                'user': {
+                    'keywords': user.get('keywords', {})
+                },
+                'videos': self.get_recommendations(user)
+            }

        else:
            if play_index is None:
@ -144,7 +156,6 @@ class Engine:
                    next_playlist_index.append(next_clip_index)
                    break
                else:
-                # if "playlist" and "playlistPostion" (if not, default to 0) exists in grid event
                    for playlist in self.playlists:
                        if playlist.get('name')== prev_grid_list[i].get('playlist'):
                            if len(playlist["clips"]) == 1:
@ -176,13 +187,20 @@ class Engine:
            rec_list += [(i,{}) for i in stay_same_index]

            rec_list = sorted(rec_list, key=lambda k:k[0])
-            return [e[1] for e in rec_list]
+            videos_ = [e[1] for e in rec_list]
+            return {
+                'user': {
+                    'keywords': user.get('keywords', {})
+                },
+                'videos': videos_
+            }


    def get_recommendations(self, user, vids_exclude = []):
        channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
        sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
        gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
+        userKeywordsWeights = {k: v.get('value', 1) for k, v in self.state['userKeywordsWeights'].items()}

        # Exclude playlists from the most recent grid 
        playlists = copy.deepcopy(self.playlists)
@ -191,13 +209,26 @@ class Engine:
                if playlist["name"] in vids_exclude:
                    playlists.remove(playlist)

-        # For each playlist, compute user keyword score
-        user_keywords = user.get('keywords', {})
+        # For each playlist, compute user keyword score by theme and character tags
+        user_keywords = copy.deepcopy(user.get('keywords', {}))
+        theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()}
+        character_tags = {k:v for k,v in user_keywords.items() if k.isupper()}
+        # manually modify some of the user keywords to match the playlist tags
+        theme_tags["god"] = theme_tags.get("god - gods",0)
+        theme_tags["visionary"] = theme_tags.get("visionary - enlightenment",0)
+        theme_tags["enlightenment"] = theme_tags.get("visionary - enlightenment",0)
+        character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0)
+        character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0)
+        character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0)
+
        score = {}
        for playlist in playlists:
-            score[playlist['name']] = random.random()
-            for tag in [tag for tag in playlist['tags'] if tag in user_keywords]:
-                score[playlist['name']] += user_keywords[tag]
+            score[playlist['name']] = random.random() * 0.001
+            for tag in playlist['tags']:
+                if tag in theme_tags:
+                    score[playlist['name']] += theme_tags[tag] * userKeywordsWeights["themeTags"]
+                elif tag in character_tags:
+                    score[playlist['name']] += character_tags[tag] * userKeywordsWeights["characterTags"]
        # Select highest scoring playlists
        playlists = sorted(
            playlists,
@ -216,23 +247,7 @@ class Engine:
            playlists,
            key=lambda playlist: -score[playlist['name']]
        )
-        videos += playlists[:channels['globalKeywords']]
-        playlists = playlists[channels['globalKeywords']:]
-        # Count products the user has seen
-        count = defaultdict(lambda: 0)
-        for event in user.get('events', []):
-            if event.get('data', {}).get('product'):
-                count[event['data']['product']] += 1
-        # For each product in playlist tags, increment score by count
-        for playlist in playlists:
-            score[playlist['name']] = random.random()
-            for tag in set(playlist['tags']) & set(count):
-                score[playlist['name']] += count[tag]
-        # Select highest scoring playlists
-        videos += sorted(
-            playlists,
-            key=lambda playlist: -score[playlist['name']]
-        )[:16 - channels['userKeywords'] - channels['globalKeywords']]
+        videos += playlists[:16 - channels['userKeywords']]
        # Shuffle playlists (randomize layout) and shift clips (randomize start)
        random.shuffle(videos)
        return [{
@ -255,7 +270,8 @@ class Engine:
        watched = []
        clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs
        # The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)"
-        for event in user["events"][::-1]:
+        # A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene"
+        for event in user.get('events', [])[::-1]:
            if event["event"] == "play" and event["data"].get("type") == "video":
                play = event
            elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video":
@ -270,9 +286,18 @@ class Engine:
                                if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15:
                                    # This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference.
                                    # instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug)
-                                    if event["data"]["position"] >= ((playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]):
+                                    if "orig_in" not in playlist["clips"][i]:
+                                        cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]
+                                    else:
+                                        cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["orig_in"])*watch_cutoff + playlist["clips"][i]["orig_in"]
+
+                                    if event["data"]["position"] >= cutoff_pos:
                                        watched.append((playlist["name"],i))
-                                    else: 
+                                    else:
+                                        if "orig_in" not in playlist["clips"][i]:
+                                            # record the original "in" position to calculate cutoff position in the future
+                                            playlist["clips"][i]["orig_in"] = playlist["clips"][i]["in"]
+                                        # update "in" position of the clip in the playlist
                                        playlist["clips"][i]["in"] = event["data"]["position"]
                                break
                play = {}
@ -292,6 +317,9 @@ class Engine:


    def get_next(self, user, position):
+        # Update self_playlists first to reflect changes
+        update_user_playlists(self.playlists, user)
+        
        grid_events = {}
        video_num = 16