updated comments, cleaned up nextClip and staySame process in get_videos, reflecting recent change from update_user_play().

2018-11-29 15:27:21 +01:00 · 2018-11-29 15:27:21 +01:00 · 319e4d384e
commit 319e4d384e
parent 2405b8b7c7
1 changed files with 35 additions and 36 deletions
--- a/recommendation_engine.py
+++ b/recommendation_engine.py
@ -1,6 +1,6 @@
 '''
-Recommendation Engine Example
+Recommendation Engine ver 1
-1 Nov 2017, 0x2620
+30 Nov 2018, 0x2620
 '''
 from collections import defaultdict
@ -95,7 +95,11 @@ class Engine:
    def get_videos(self, user):
-        # Update self_playlists first to reflect changes
+        ## Output is a dictionary of: user keyword scores, list of videos for each grid index (0-15),
        ## and parameters to be displayed on debug view.
        ## It implements "next clip" "next playlist" "stay same" grid allocation for the output video, depending on the user log history.
        # Update self_playlists to reflect user log history
        playlists = self.update_user_playlists(user)
        # Get the user keyword scores for debug view
        user_keywords = copy.deepcopy(user.get('keywords', {}))
@ -104,6 +108,7 @@ class Engine:
        top_user_keywords = sorted([(k,v) for (k,v) in theme_tags.items()], key=lambda kv: kv[1])[-5:]
        top_user_characters = sorted([(k,v) for (k,v) in character_tags.items()], key=lambda kv: kv[1])[-5:]
        # If the most recent event is "login," initialize grid videos.
        if user.get('events', [{}])[0].get("event")=="login":
            return {
                'user': {
@ -120,12 +125,12 @@ class Engine:
        sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
        grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
        # check if there were grid events for all indexes.
        grid_events = {}
        (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame"))
        video_num = nc + np + ns
-        # # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable):
+        # collect the most recent grid event for each grid index and the grid index of the most recent play event.
        # the following requires "index" in play event data (previously unavailable)
        play_index = None
        for event in user.get('events', []):
            if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
@ -155,7 +160,7 @@ class Engine:
                video_indx = list(range(video_num))
                random.shuffle(video_indx)
            else:
-            # played index is excluded from the random shuffle and deterministically added to staySame pool.
+            # play index is excluded from the random shuffle and deterministically added to staySame pool.
                video_indx = [*range(play_index)]+[*range(play_index+1,video_num)]
                random.shuffle(video_indx)
                video_indx.append(play_index)
@ -166,58 +171,53 @@ class Engine:
            rec_list = []
            # Make sure the playlist exists for staySame pool; otherwise add it to nextPlaylist pool.
            for i in stay_same_index:
                if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]:
                    next_playlist_index.append(stay_same_index)
-            # select next clip for nextClip pool except when the playlist has only one clip. It skips the clip with "pass":True when selecting the next clip.
+            # nextClip pool: select next clip except when the playlist has only one clip. skip the clip with "pass":True when selecting the next clip.
            for i in next_clip_index:
                # add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated due to update_user_playlists().
                if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]:
-                # add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated.
+                    next_playlist_index.append(i)
                    next_playlist_index.append(next_clip_index)
                    break
                else:
                    for playlist in playlists:
                        if playlist.get('name')== prev_grid_list[i].get('playlist'):
-                            unwatched_clips_indx = [j for j in range(len(playlist["clips"])) if playlist["clips"][j].get("pass",False)!=True]
+                            unwatched_clips_indx = [j for j in range(len(playlist["clips"])) if not playlist["clips"][j].get("pass")]
                            if len(playlist["clips"]) == 1:
                                next_playlist_index.append(i)
                            else:
                                next_unwatched_indx = [j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']]
                                if len(next_unwatched_indx) == 0:
                                    if unwatched_clips_indx[0] != prev_grid_list[i]['playlistPosition']:
                                        playlist_pos = unwatched_clips_indx[0]
                                    else:
                                        next_playlist_index.append(i)
                                        break
                            # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already?
                            elif prev_grid_list[i].get('playlistPosition', 0) + 1 == len(playlist['clips']):
                                if unwatched_clips_indx[0] != prev_grid_list[i]['playlistPosition']:
                                    playlist_pos = unwatched_clips_indx[0]
                                else:
-                                    next_playlist_index.append(i)
+                                    playlist_pos = next_unwatched_indx[0]
                            else:
                                if len([j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']]) == 0:
                                    if unwatched_clips_indx[0] != prev_grid_list[i]['playlistPosition']:
                                        playlist_pos = unwatched_clips_indx[0]
                                    else:
                                        next_playlist_index.append(i)
                                else:
                                    playlist_pos = [j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']][0]
                                rec_list.append((i, {
                                    'clips': playlist['clips'],
                                # 'position': random.randrange(len(playlist['clips'])),
                                    'position': playlist_pos,
                                    'name': playlist['name'],
-                                'tags': playlist['tags'],
+                                    'tags': playlist['tags']
                                }))
-            # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events)
+
-            # for nextPlaylist pool.
+            #staySame pool
            for i in stay_same_index:
                # add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated due to update_user_playlists().
                if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]:
                    next_playlist_index.append(i)
                else:
                    rec_list.append((i,{}))
            # nextPlaylist pool: randomly select playlists (excluding the playlists from the current grid).
            vids_exclude = [e.get("playlist") for e in prev_grid_list]
            while None in vids_exclude:
                vids_exclude.remove(None)
            video = self.get_recommendations(playlists, user, vids_exclude)
            rec_list += [(i, video[i]) for i in next_playlist_index]
            #staySame pool
            rec_list += [(i,{}) for i in stay_same_index]
            rec_list = sorted(rec_list, key=lambda k:k[0])
            videos_ = [e[1] for e in rec_list]
@ -345,7 +345,6 @@ class Engine:
        # If the number of playlists is reduced to 30, reset it to the original.
        if len(playlists) < 30:
            playlists = copy.deepcopy(self.playlists)
        return(playlists)