resolved conflist with master branch, added orig_in element for playlist, added update_user_playlists call in get_videos and get_next.

2018-11-16 23:47:02 +01:00 · 2018-11-16 23:47:02 +01:00 · 834ba630f1
commit 834ba630f1
parent acb9b499be
1 changed files with 72 additions and 44 deletions
--- a/recommendation_engine.py
+++ b/recommendation_engine.py
@ -43,9 +43,8 @@ class Engine:
        else:
            self.state = {
                'channels': {
-                    'globalKeywords': {'locked': False, 'value': 7},
+                    'globalKeywords': {'locked': False, 'value': 8},
-                    'userKeywords': {'locked': False, 'value': 7},
+                    'userKeywords': {'locked': False, 'value': 8}
                    'screenings': {'locked': True, 'value': 2}
                },
                'globalKeywords': {},
            }
@ -55,6 +54,12 @@ class Engine:
                'nextPlaylist': {'locked': False, 'value': 4},
                'staySame': {'locked': False, 'value': 8}
            }
        if 'userKeywordsWeights' not in self.state:
            self.state['userKeywordsWeights'] = {
                'themeTags': {'locked': False, 'value': 0.3},
                'characterTags': {'locked': False, 'value': 0.7}
            }
        self.update_keywords()
    @property
@ -87,8 +92,16 @@ class Engine:
    def get_videos(self, user):
-        if user.get('events', [{}])[0].get("event")=="login": 
+        # Update self_playlists first to reflect changes
-            return self.get_recommendations(user) 
+        update_user_playlists(self.playlists, user)
        if user.get('events', [{}])[0].get("event")=="login":
            return {
                'user': {
                    'keywords': user.get('keywords', {})
                },
                'videos': self.get_recommendations(user)
            }
        channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
        sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
@ -97,29 +110,28 @@ class Engine:
        # check if there were grid events for all indexes.
        grid_events = {}
        (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame"))
-        video_num = nc + np + ns 
+        video_num = nc + np + ns
-        # for event in user.get('events', []):
+        # # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable):
        #     if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
        #         grid_events[event.get('data').get('index')] = event.get('data')
        #     if len(grid_events) == video_num:
        #         break
        # # The version where the loop also extract play_index (requires "index" in play event data):
        play_index = None
        for event in user.get('events', []):
            if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
                grid_events[event.get('data').get('index')] = event.get('data')
            if event.get('event') == "play" and event["data"].get("type") == "video" and not play_index:
-                play_index = event.get('data').get('index') 
+                play_index = event.get('data').get('index')
            if len(grid_events) == video_num and play_index:
-                break 
+                break
        prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index'])
        # if there were no grid events for all, initialize all grids.
-        if len(prev_grid_list) < video_num: 
+        if len(prev_grid_list) < video_num:
-            return self.get_recommendations(user)   
+            return {
                'user': {
                    'keywords': user.get('keywords', {})
                },
                'videos': self.get_recommendations(user)
            }
        else:
            if play_index is None:
@ -144,7 +156,6 @@ class Engine:
                    next_playlist_index.append(next_clip_index)
                    break
                else:
                # if "playlist" and "playlistPostion" (if not, default to 0) exists in grid event
                    for playlist in self.playlists:
                        if playlist.get('name')== prev_grid_list[i].get('playlist'):
                            if len(playlist["clips"]) == 1:
@ -176,13 +187,20 @@ class Engine:
            rec_list += [(i,{}) for i in stay_same_index]
            rec_list = sorted(rec_list, key=lambda k:k[0])
-            return [e[1] for e in rec_list]
+            videos_ = [e[1] for e in rec_list]
            return {
                'user': {
                    'keywords': user.get('keywords', {})
                },
                'videos': videos_
            }
    def get_recommendations(self, user, vids_exclude = []):
        channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
        sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
        gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
        userKeywordsWeights = {k: v.get('value', 1) for k, v in self.state['userKeywordsWeights'].items()}
        # Exclude playlists from the most recent grid 
        playlists = copy.deepcopy(self.playlists)
@ -191,13 +209,26 @@ class Engine:
                if playlist["name"] in vids_exclude:
                    playlists.remove(playlist)
-        # For each playlist, compute user keyword score
+        # For each playlist, compute user keyword score by theme and character tags
-        user_keywords = user.get('keywords', {})
+        user_keywords = copy.deepcopy(user.get('keywords', {}))
        theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()}
        character_tags = {k:v for k,v in user_keywords.items() if k.isupper()}
        # manually modify some of the user keywords to match the playlist tags
        theme_tags["god"] = theme_tags.get("god - gods",0)
        theme_tags["visionary"] = theme_tags.get("visionary - enlightenment",0)
        theme_tags["enlightenment"] = theme_tags.get("visionary - enlightenment",0)
        character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0)
        character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0)
        character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0)
        score = {}
        for playlist in playlists:
-            score[playlist['name']] = random.random()
+            score[playlist['name']] = random.random() * 0.001
-            for tag in [tag for tag in playlist['tags'] if tag in user_keywords]:
+            for tag in playlist['tags']:
-                score[playlist['name']] += user_keywords[tag]
+                if tag in theme_tags:
                    score[playlist['name']] += theme_tags[tag] * userKeywordsWeights["themeTags"]
                elif tag in character_tags:
                    score[playlist['name']] += character_tags[tag] * userKeywordsWeights["characterTags"]
        # Select highest scoring playlists
        playlists = sorted(
            playlists,
@ -216,23 +247,7 @@ class Engine:
            playlists,
            key=lambda playlist: -score[playlist['name']]
        )
-        videos += playlists[:channels['globalKeywords']]
+        videos += playlists[:16 - channels['userKeywords']]
        playlists = playlists[channels['globalKeywords']:]
        # Count products the user has seen
        count = defaultdict(lambda: 0)
        for event in user.get('events', []):
            if event.get('data', {}).get('product'):
                count[event['data']['product']] += 1
        # For each product in playlist tags, increment score by count
        for playlist in playlists:
            score[playlist['name']] = random.random()
            for tag in set(playlist['tags']) & set(count):
                score[playlist['name']] += count[tag]
        # Select highest scoring playlists
        videos += sorted(
            playlists,
            key=lambda playlist: -score[playlist['name']]
        )[:16 - channels['userKeywords'] - channels['globalKeywords']]
        # Shuffle playlists (randomize layout) and shift clips (randomize start)
        random.shuffle(videos)
        return [{
@ -255,7 +270,8 @@ class Engine:
        watched = []
        clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs
        # The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)"
-        for event in user["events"][::-1]:
+        # A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene"
        for event in user.get('events', [])[::-1]:
            if event["event"] == "play" and event["data"].get("type") == "video":
                play = event
            elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video":
@ -270,9 +286,18 @@ class Engine:
                                if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15:
                                    # This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference.
                                    # instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug)
-                                    if event["data"]["position"] >= ((playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]):
+                                    if "orig_in" not in playlist["clips"][i]:
                                        cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]
                                    else:
                                        cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["orig_in"])*watch_cutoff + playlist["clips"][i]["orig_in"]
                                    if event["data"]["position"] >= cutoff_pos:
                                        watched.append((playlist["name"],i))
-                                    else: 
+                                    else:
                                        if "orig_in" not in playlist["clips"][i]:
                                            # record the original "in" position to calculate cutoff position in the future
                                            playlist["clips"][i]["orig_in"] = playlist["clips"][i]["in"]
                                        # update "in" position of the clip in the playlist
                                        playlist["clips"][i]["in"] = event["data"]["position"]
                                break
                play = {}
@ -292,6 +317,9 @@ class Engine:
    def get_next(self, user, position):
        # Update self_playlists first to reflect changes
        update_user_playlists(self.playlists, user)
        grid_events = {}
        video_num = 16