diff --git a/recommendation_engine.py b/recommendation_engine.py index bf67911..17972d6 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -1,6 +1,6 @@ ''' -Recommendation Engine Example -1 Nov 2017, 0x2620 +Recommendation Engine ver 1 +30 Nov 2018, 0x2620 ''' from collections import defaultdict @@ -95,7 +95,11 @@ class Engine: def get_videos(self, user): - # Update self_playlists first to reflect changes + ## Output is a dictionary of: user keyword scores, list of videos for each grid index (0-15), + ## and parameters to be displayed on debug view. + ## It implements "next clip" "next playlist" "stay same" grid allocation for the output video, depending on the user log history. + + # Update self_playlists to reflect user log history playlists = self.update_user_playlists(user) # Get the user keyword scores for debug view user_keywords = copy.deepcopy(user.get('keywords', {})) @@ -104,6 +108,7 @@ class Engine: top_user_keywords = sorted([(k,v) for (k,v) in theme_tags.items()], key=lambda kv: kv[1])[-5:] top_user_characters = sorted([(k,v) for (k,v) in character_tags.items()], key=lambda kv: kv[1])[-5:] + # If the most recent event is "login," initialize grid videos. if user.get('events', [{}])[0].get("event")=="login": return { 'user': { @@ -120,12 +125,12 @@ class Engine: sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} - # check if there were grid events for all indexes. grid_events = {} (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) video_num = nc + np + ns - # # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable): + # collect the most recent grid event for each grid index and the grid index of the most recent play event. + # the following requires "index" in play event data (previously unavailable) play_index = None for event in user.get('events', []): if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: @@ -155,7 +160,7 @@ class Engine: video_indx = list(range(video_num)) random.shuffle(video_indx) else: - # played index is excluded from the random shuffle and deterministically added to staySame pool. + # play index is excluded from the random shuffle and deterministically added to staySame pool. video_indx = [*range(play_index)]+[*range(play_index+1,video_num)] random.shuffle(video_indx) video_indx.append(play_index) @@ -166,58 +171,53 @@ class Engine: rec_list = [] - # Make sure the playlist exists for staySame pool; otherwise add it to nextPlaylist pool. - for i in stay_same_index: - if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]: - next_playlist_index.append(stay_same_index) - # select next clip for nextClip pool except when the playlist has only one clip. It skips the clip with "pass":True when selecting the next clip. + # nextClip pool: select next clip except when the playlist has only one clip. skip the clip with "pass":True when selecting the next clip. for i in next_clip_index: + # add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated due to update_user_playlists(). if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]: - # add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated. - next_playlist_index.append(next_clip_index) - break + next_playlist_index.append(i) else: for playlist in playlists: if playlist.get('name')== prev_grid_list[i].get('playlist'): - unwatched_clips_indx = [j for j in range(len(playlist["clips"])) if playlist["clips"][j].get("pass",False)!=True] + unwatched_clips_indx = [j for j in range(len(playlist["clips"])) if not playlist["clips"][j].get("pass")] if len(playlist["clips"]) == 1: next_playlist_index.append(i) - break - # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already? - elif prev_grid_list[i].get('playlistPosition', 0) + 1 == len(playlist['clips']): - if unwatched_clips_indx[0] != prev_grid_list[i]['playlistPosition']: - playlist_pos = unwatched_clips_indx[0] - else: - next_playlist_index.append(i) else: - if len([j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']]) == 0: + next_unwatched_indx = [j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']] + if len(next_unwatched_indx) == 0: if unwatched_clips_indx[0] != prev_grid_list[i]['playlistPosition']: playlist_pos = unwatched_clips_indx[0] else: next_playlist_index.append(i) + break else: - playlist_pos = [j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']][0] + playlist_pos = next_unwatched_indx[0] - rec_list.append((i, { - 'clips': playlist['clips'], - # 'position': random.randrange(len(playlist['clips'])), - 'position': playlist_pos, - 'name': playlist['name'], - 'tags': playlist['tags'], - })) + rec_list.append((i, { + 'clips': playlist['clips'], + 'position': playlist_pos, + 'name': playlist['name'], + 'tags': playlist['tags'] + })) - # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) - # for nextPlaylist pool. + + #staySame pool + for i in stay_same_index: + # add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated due to update_user_playlists(). + if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]: + next_playlist_index.append(i) + else: + rec_list.append((i,{})) + + # nextPlaylist pool: randomly select playlists (excluding the playlists from the current grid). vids_exclude = [e.get("playlist") for e in prev_grid_list] while None in vids_exclude: vids_exclude.remove(None) video = self.get_recommendations(playlists, user, vids_exclude) rec_list += [(i, video[i]) for i in next_playlist_index] - #staySame pool - rec_list += [(i,{}) for i in stay_same_index] rec_list = sorted(rec_list, key=lambda k:k[0]) videos_ = [e[1] for e in rec_list] @@ -345,7 +345,6 @@ class Engine: # If the number of playlists is reduced to 30, reset it to the original. if len(playlists) < 30: playlists = copy.deepcopy(self.playlists) - return(playlists)