From 29f892cafad8ae2f8f0df2e0406fbfc90c799469 Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Wed, 25 Jul 2018 13:12:36 +0200 Subject: [PATCH] implemented issue 1 - vanila version that works without user event data for playlist, clip position, clip index. To be updated as the data is updated --- recommendation_engine.py | 98 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 10 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 39138f0..14fdbc7 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -15,6 +15,7 @@ import ox from utils import run_async logger = logging.getLogger(__name__) +verbose = True class Engine: @@ -83,24 +84,94 @@ class Engine: clips[inpoint['index']]['out'] = self.pandora.get(video_id, ['duration'])['duration'] return clips -### TASK 1 ### -# Change this to (1) check if there's no grid event and if so: initialize, and -# (2) if not: add serendipity to the current grid mix using gridChange variables. - def get_videos(self, user): + channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} + sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} + grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + + # check if there were grid events for all indexes. + grid_events = {} + (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) + # this assumes np + nc + ns = total number of videos in the grid view (16). + # Make sure sanity check exists in front-end (error if it does not add up to 16). + video_num = nc + np + ns + + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if len(grid_events) == video_num: + break + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) + + # if there were no grid events for all, initialize all grids. + if len(prev_grid_list) < video_num: + return self.get_recommendations(user) + + else: + # Once the "index" is added to play event data, played index will be excluded from the random shuffle + # and deterministically added to staySame pool. + # video_indx = [*range(playIndex)]+[*range(playIndex+1,video_num)] + video_indx = list(range(video_num)) + random.shuffle(video_indx) + rec_list = [] + # For now, randomly choose a clip in the same playlist instead of switching to the next clip. + # This will be changed once the clip position is tracked in the user data. + for i in video_indx[:nc]: + if prev_grid_list[i].get('playlist') is None: + # add this to deal with the absence of "playlist" data in a grid event temporarily. + # If there's no playlist data recorded, add the nextClip pool to nextPlaylist pool for now. + np += nc + nc = 0 + break + else: + # if "playlist" data exists in grid event (it should be in the future) + for playlist in playlists: + if playlist.get('name')== prev_grid_list[i].get('playlist'): + rec_list.append((i, { + 'clips': playlist['clips'], + 'position': random.randrange(len(playlist['clips'])), + 'name': playlist['name'], + 'tags': playlist['tags'], + })) + + # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) + # for nextPlaylist pool + vids_exclude = [] + # vids_exclude = [e.get("playlist") for e in prev_grid_list] + video = self.get_recommendations(user, vids_exclude) + rec_list += [(i, video[i]) for i in video_indx[nc:nc+np]] + + #staySame pool + rec_list += [(i,{}) for i in video_indx[nc+np:]] + + rec_list = sorted(rec_list, key=lambda k:k[0]) + return [e[1] for e in rec_list] + + +# NOTE for future improvement: vids_exclude element unit could be clip or in/out time pairs, rather than playlist. +# The same playlist could be played in the grid view as long as these are differenct clips or separate times. + def get_recommendations(self, user, vids_exclude = []): channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + + # Exclude playlists from the most recent grid + playlists = self.playlists + if len(vids_exclude) > 0: + for playlist in playlists: + if playlist["name"] in vids_exclude: + playlists.remove(playlist) + # For each playlist, compute user keyword score user_keywords = user.get('keywords', {}) score = {} - for playlist in self.playlists: + for playlist in playlists: score[playlist['name']] = random.random() for tag in [tag for tag in playlist['tags'] if tag in user_keywords]: score[playlist['name']] += user_keywords[tag] # Select highest scoring playlists playlists = sorted( - self.playlists, + playlists, key=lambda playlist: -score[playlist['name']] ) videos = playlists[:channels['userKeywords']] @@ -143,11 +214,18 @@ class Engine: } for video in videos] -### TASK 2 ### -# Change this to make sure the next playlist is not the current playlist -# AND not the playlists displayed in the current grid. Incorporate the change from Task 1. def get_next(self, user, position): - video = self.get_videos(user)[position] + grid_events = {} + video_num = 16 + + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if len(grid_events) == video_num: + break + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) + vids_exclude = [e.get("playlist") for e in prev_grid_list] + video = self.get_recommendations(user, vids_exclude)[position] return video def update_state(self, data):