From 2eae1aa2a7b5dcb6a02f768456b571f41ca3dc7b Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Fri, 20 Jul 2018 15:59:42 +0200 Subject: [PATCH 1/5] added gridChange variables and TODO comments --- .gitignore | 4 ++++ recommendation_engine.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/.gitignore b/.gitignore index fe7241d..9f4d1c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ json/* +venv/* __pycache__/ *.swp +*.ipynb +*.pyc +test.py diff --git a/recommendation_engine.py b/recommendation_engine.py index 98c1b26..39138f0 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -46,6 +46,12 @@ class Engine: 'screenings': {'locked': True, 'value': 2} }, 'globalKeywords': {}, + 'gridChange': { + 'nextClip': {'locked': False, 'value': 4}, + 'nextPlaylist': {'locked': False, 'value': 4}, + 'staySame': {'locked': False, 'value': 8} + + } } self.update_keywords() @@ -77,9 +83,14 @@ class Engine: clips[inpoint['index']]['out'] = self.pandora.get(video_id, ['duration'])['duration'] return clips +### TASK 1 ### +# Change this to (1) check if there's no grid event and if so: initialize, and +# (2) if not: add serendipity to the current grid mix using gridChange variables. + def get_videos(self, user): channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} + gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} # For each playlist, compute user keyword score user_keywords = user.get('keywords', {}) score = {} @@ -131,6 +142,10 @@ class Engine: 'tags': video['tags'], } for video in videos] + +### TASK 2 ### +# Change this to make sure the next playlist is not the current playlist +# AND not the playlists displayed in the current grid. Incorporate the change from Task 1. def get_next(self, user, position): video = self.get_videos(user)[position] return video From 29f892cafad8ae2f8f0df2e0406fbfc90c799469 Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Wed, 25 Jul 2018 13:12:36 +0200 Subject: [PATCH 2/5] implemented issue 1 - vanila version that works without user event data for playlist, clip position, clip index. To be updated as the data is updated --- recommendation_engine.py | 98 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 10 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 39138f0..14fdbc7 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -15,6 +15,7 @@ import ox from utils import run_async logger = logging.getLogger(__name__) +verbose = True class Engine: @@ -83,24 +84,94 @@ class Engine: clips[inpoint['index']]['out'] = self.pandora.get(video_id, ['duration'])['duration'] return clips -### TASK 1 ### -# Change this to (1) check if there's no grid event and if so: initialize, and -# (2) if not: add serendipity to the current grid mix using gridChange variables. - def get_videos(self, user): + channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} + sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} + grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + + # check if there were grid events for all indexes. + grid_events = {} + (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) + # this assumes np + nc + ns = total number of videos in the grid view (16). + # Make sure sanity check exists in front-end (error if it does not add up to 16). + video_num = nc + np + ns + + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if len(grid_events) == video_num: + break + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) + + # if there were no grid events for all, initialize all grids. + if len(prev_grid_list) < video_num: + return self.get_recommendations(user) + + else: + # Once the "index" is added to play event data, played index will be excluded from the random shuffle + # and deterministically added to staySame pool. + # video_indx = [*range(playIndex)]+[*range(playIndex+1,video_num)] + video_indx = list(range(video_num)) + random.shuffle(video_indx) + rec_list = [] + # For now, randomly choose a clip in the same playlist instead of switching to the next clip. + # This will be changed once the clip position is tracked in the user data. + for i in video_indx[:nc]: + if prev_grid_list[i].get('playlist') is None: + # add this to deal with the absence of "playlist" data in a grid event temporarily. + # If there's no playlist data recorded, add the nextClip pool to nextPlaylist pool for now. + np += nc + nc = 0 + break + else: + # if "playlist" data exists in grid event (it should be in the future) + for playlist in playlists: + if playlist.get('name')== prev_grid_list[i].get('playlist'): + rec_list.append((i, { + 'clips': playlist['clips'], + 'position': random.randrange(len(playlist['clips'])), + 'name': playlist['name'], + 'tags': playlist['tags'], + })) + + # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) + # for nextPlaylist pool + vids_exclude = [] + # vids_exclude = [e.get("playlist") for e in prev_grid_list] + video = self.get_recommendations(user, vids_exclude) + rec_list += [(i, video[i]) for i in video_indx[nc:nc+np]] + + #staySame pool + rec_list += [(i,{}) for i in video_indx[nc+np:]] + + rec_list = sorted(rec_list, key=lambda k:k[0]) + return [e[1] for e in rec_list] + + +# NOTE for future improvement: vids_exclude element unit could be clip or in/out time pairs, rather than playlist. +# The same playlist could be played in the grid view as long as these are differenct clips or separate times. + def get_recommendations(self, user, vids_exclude = []): channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + + # Exclude playlists from the most recent grid + playlists = self.playlists + if len(vids_exclude) > 0: + for playlist in playlists: + if playlist["name"] in vids_exclude: + playlists.remove(playlist) + # For each playlist, compute user keyword score user_keywords = user.get('keywords', {}) score = {} - for playlist in self.playlists: + for playlist in playlists: score[playlist['name']] = random.random() for tag in [tag for tag in playlist['tags'] if tag in user_keywords]: score[playlist['name']] += user_keywords[tag] # Select highest scoring playlists playlists = sorted( - self.playlists, + playlists, key=lambda playlist: -score[playlist['name']] ) videos = playlists[:channels['userKeywords']] @@ -143,11 +214,18 @@ class Engine: } for video in videos] -### TASK 2 ### -# Change this to make sure the next playlist is not the current playlist -# AND not the playlists displayed in the current grid. Incorporate the change from Task 1. def get_next(self, user, position): - video = self.get_videos(user)[position] + grid_events = {} + video_num = 16 + + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if len(grid_events) == video_num: + break + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) + vids_exclude = [e.get("playlist") for e in prev_grid_list] + video = self.get_recommendations(user, vids_exclude)[position] return video def update_state(self, data): From 83af674af95bc4e1e65c35d66e9dd3015cd52d6c Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Wed, 25 Jul 2018 16:07:31 +0200 Subject: [PATCH 3/5] minor additions to incorporate the future data scheme change --- recommendation_engine.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 14fdbc7..88a241b 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -110,12 +110,13 @@ class Engine: else: # Once the "index" is added to play event data, played index will be excluded from the random shuffle # and deterministically added to staySame pool. + # video_indx = [*range(playIndex)]+[*range(playIndex+1,video_num)] video_indx = list(range(video_num)) random.shuffle(video_indx) rec_list = [] # For now, randomly choose a clip in the same playlist instead of switching to the next clip. - # This will be changed once the clip position is tracked in the user data. + # This will be changed once the clip position is tracked in the user data as "playlistPostion" for i in video_indx[:nc]: if prev_grid_list[i].get('playlist') is None: # add this to deal with the absence of "playlist" data in a grid event temporarily. @@ -124,24 +125,32 @@ class Engine: nc = 0 break else: - # if "playlist" data exists in grid event (it should be in the future) + # if "playlist" and "playlistPostion" data exists in grid event (it should be in the future) for playlist in playlists: if playlist.get('name')== prev_grid_list[i].get('playlist'): + # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already? + # if prev_grid_list[i].get('index') == len(playlist['clips']): + # playlist_pos = 0 + # else: + # playlist_pos = prev_grid_list[i].get('index') + 1 + rec_list.append((i, { 'clips': playlist['clips'], 'position': random.randrange(len(playlist['clips'])), + # 'position': playlist_pos, 'name': playlist['name'], 'tags': playlist['tags'], })) # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) - # for nextPlaylist pool + # for nextPlaylist pool. vids_exclude = [] # vids_exclude = [e.get("playlist") for e in prev_grid_list] video = self.get_recommendations(user, vids_exclude) rec_list += [(i, video[i]) for i in video_indx[nc:nc+np]] #staySame pool + # video_indx.append(playIndex) rec_list += [(i,{}) for i in video_indx[nc+np:]] rec_list = sorted(rec_list, key=lambda k:k[0]) From 71a9effbba1b917644e88786263ddd78c870e29e Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Wed, 25 Jul 2018 16:13:43 +0200 Subject: [PATCH 4/5] play_index extraction to incorporate the future data scheme change --- recommendation_engine.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/recommendation_engine.py b/recommendation_engine.py index 88a241b..03c6919 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -101,6 +101,17 @@ class Engine: grid_events[event.get('data').get('index')] = event.get('data') if len(grid_events) == video_num: break + + # # The version where the loop also extract play_index (requires "index" in play event data): + # play_index = None + # for event in user.get('events', []): + # if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + # grid_events[event.get('data').get('index')] = event.get('data') + # if event.get('event') == "play" and not play_index: + # play_index = event.get('data').get('index') + # if len(grid_events) == video_num and play_index: + # break + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) # if there were no grid events for all, initialize all grids. From b7eeb0bd95ce780d95e7590bb699f53905b88c47 Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Fri, 3 Aug 2018 00:06:49 +0200 Subject: [PATCH 5/5] modified for new user data schema and one clip playlist cases --- recommendation_engine.py | 91 +++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 03c6919..ab0f3cc 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -96,21 +96,21 @@ class Engine: # Make sure sanity check exists in front-end (error if it does not add up to 16). video_num = nc + np + ns - for event in user.get('events', []): - if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: - grid_events[event.get('data').get('index')] = event.get('data') - if len(grid_events) == video_num: - break - - # # The version where the loop also extract play_index (requires "index" in play event data): - # play_index = None # for event in user.get('events', []): # if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: # grid_events[event.get('data').get('index')] = event.get('data') - # if event.get('event') == "play" and not play_index: - # play_index = event.get('data').get('index') - # if len(grid_events) == video_num and play_index: - # break + # if len(grid_events) == video_num: + # break + + # # The version where the loop also extract play_index (requires "index" in play event data): + play_index = None + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if event.get('event') == "play" and not play_index: + play_index = event.get('data').get('index') + if len(grid_events) == video_num and play_index: + break prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) @@ -119,50 +119,45 @@ class Engine: return self.get_recommendations(user) else: - # Once the "index" is added to play event data, played index will be excluded from the random shuffle - # and deterministically added to staySame pool. - - # video_indx = [*range(playIndex)]+[*range(playIndex+1,video_num)] - video_indx = list(range(video_num)) + # played index is excluded from the random shuffle and deterministically added to staySame pool. + video_indx = [*range(play_index)]+[*range(play_index+1,video_num)] + # video_indx = list(range(video_num)) random.shuffle(video_indx) + video_indx.append(play_index) + next_clip_index = video_indx[:nc] + next_playlist_index = video_indx[nc:nc+np] + stay_same_index = video_indx[nc+np:] + rec_list = [] - # For now, randomly choose a clip in the same playlist instead of switching to the next clip. - # This will be changed once the clip position is tracked in the user data as "playlistPostion" - for i in video_indx[:nc]: - if prev_grid_list[i].get('playlist') is None: - # add this to deal with the absence of "playlist" data in a grid event temporarily. - # If there's no playlist data recorded, add the nextClip pool to nextPlaylist pool for now. - np += nc - nc = 0 - break - else: - # if "playlist" and "playlistPostion" data exists in grid event (it should be in the future) - for playlist in playlists: - if playlist.get('name')== prev_grid_list[i].get('playlist'): - # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already? - # if prev_grid_list[i].get('index') == len(playlist['clips']): - # playlist_pos = 0 - # else: - # playlist_pos = prev_grid_list[i].get('index') + 1 - - rec_list.append((i, { - 'clips': playlist['clips'], - 'position': random.randrange(len(playlist['clips'])), - # 'position': playlist_pos, - 'name': playlist['name'], - 'tags': playlist['tags'], - })) + # select next clip for nextClip pool except when the playlist has only one clip. + for i in next_clip_index: + for playlist in self.playlists: + if playlist.get('name')== prev_grid_list[i].get('playlist'): + if len(playlist["clips"]) == 1: + next_playlist_index.append(i) + break + # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already? + elif prev_grid_list[i].get('playlistPosition') + 1 == len(playlist['clips']): + playlist_pos = 0 + else: + playlist_pos = prev_grid_list[i].get('playlistPosition') + 1 + + rec_list.append((i, { + 'clips': playlist['clips'], + # 'position': random.randrange(len(playlist['clips'])), + 'position': playlist_pos, + 'name': playlist['name'], + 'tags': playlist['tags'], + })) # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) # for nextPlaylist pool. - vids_exclude = [] - # vids_exclude = [e.get("playlist") for e in prev_grid_list] + vids_exclude = [e.get("playlist") for e in prev_grid_list] video = self.get_recommendations(user, vids_exclude) - rec_list += [(i, video[i]) for i in video_indx[nc:nc+np]] + rec_list += [(i, video[i]) for i in next_playlist_index] #staySame pool - # video_indx.append(playIndex) - rec_list += [(i,{}) for i in video_indx[nc+np:]] + rec_list += [(i,{}) for i in stay_same_index] rec_list = sorted(rec_list, key=lambda k:k[0]) return [e[1] for e in rec_list]