diff --git a/.gitignore b/.gitignore index fe7241d..9f4d1c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ json/* +venv/* __pycache__/ *.swp +*.ipynb +*.pyc +test.py diff --git a/recommendation_engine.py b/recommendation_engine.py index 98c1b26..ab0f3cc 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -15,6 +15,7 @@ import ox from utils import run_async logger = logging.getLogger(__name__) +verbose = True class Engine: @@ -46,6 +47,12 @@ class Engine: 'screenings': {'locked': True, 'value': 2} }, 'globalKeywords': {}, + 'gridChange': { + 'nextClip': {'locked': False, 'value': 4}, + 'nextPlaylist': {'locked': False, 'value': 4}, + 'staySame': {'locked': False, 'value': 8} + + } } self.update_keywords() @@ -80,16 +87,106 @@ class Engine: def get_videos(self, user): channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} + grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + + # check if there were grid events for all indexes. + grid_events = {} + (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) + # this assumes np + nc + ns = total number of videos in the grid view (16). + # Make sure sanity check exists in front-end (error if it does not add up to 16). + video_num = nc + np + ns + + # for event in user.get('events', []): + # if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + # grid_events[event.get('data').get('index')] = event.get('data') + # if len(grid_events) == video_num: + # break + + # # The version where the loop also extract play_index (requires "index" in play event data): + play_index = None + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if event.get('event') == "play" and not play_index: + play_index = event.get('data').get('index') + if len(grid_events) == video_num and play_index: + break + + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) + + # if there were no grid events for all, initialize all grids. + if len(prev_grid_list) < video_num: + return self.get_recommendations(user) + + else: + # played index is excluded from the random shuffle and deterministically added to staySame pool. + video_indx = [*range(play_index)]+[*range(play_index+1,video_num)] + # video_indx = list(range(video_num)) + random.shuffle(video_indx) + video_indx.append(play_index) + next_clip_index = video_indx[:nc] + next_playlist_index = video_indx[nc:nc+np] + stay_same_index = video_indx[nc+np:] + + rec_list = [] + # select next clip for nextClip pool except when the playlist has only one clip. + for i in next_clip_index: + for playlist in self.playlists: + if playlist.get('name')== prev_grid_list[i].get('playlist'): + if len(playlist["clips"]) == 1: + next_playlist_index.append(i) + break + # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already? + elif prev_grid_list[i].get('playlistPosition') + 1 == len(playlist['clips']): + playlist_pos = 0 + else: + playlist_pos = prev_grid_list[i].get('playlistPosition') + 1 + + rec_list.append((i, { + 'clips': playlist['clips'], + # 'position': random.randrange(len(playlist['clips'])), + 'position': playlist_pos, + 'name': playlist['name'], + 'tags': playlist['tags'], + })) + + # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) + # for nextPlaylist pool. + vids_exclude = [e.get("playlist") for e in prev_grid_list] + video = self.get_recommendations(user, vids_exclude) + rec_list += [(i, video[i]) for i in next_playlist_index] + + #staySame pool + rec_list += [(i,{}) for i in stay_same_index] + + rec_list = sorted(rec_list, key=lambda k:k[0]) + return [e[1] for e in rec_list] + + +# NOTE for future improvement: vids_exclude element unit could be clip or in/out time pairs, rather than playlist. +# The same playlist could be played in the grid view as long as these are differenct clips or separate times. + def get_recommendations(self, user, vids_exclude = []): + channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} + sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} + gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + + # Exclude playlists from the most recent grid + playlists = self.playlists + if len(vids_exclude) > 0: + for playlist in playlists: + if playlist["name"] in vids_exclude: + playlists.remove(playlist) + # For each playlist, compute user keyword score user_keywords = user.get('keywords', {}) score = {} - for playlist in self.playlists: + for playlist in playlists: score[playlist['name']] = random.random() for tag in [tag for tag in playlist['tags'] if tag in user_keywords]: score[playlist['name']] += user_keywords[tag] # Select highest scoring playlists playlists = sorted( - self.playlists, + playlists, key=lambda playlist: -score[playlist['name']] ) videos = playlists[:channels['userKeywords']] @@ -131,8 +228,19 @@ class Engine: 'tags': video['tags'], } for video in videos] + def get_next(self, user, position): - video = self.get_videos(user)[position] + grid_events = {} + video_num = 16 + + for event in user.get('events', []): + if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + grid_events[event.get('data').get('index')] = event.get('data') + if len(grid_events) == video_num: + break + prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) + vids_exclude = [e.get("playlist") for e in prev_grid_list] + video = self.get_recommendations(user, vids_exclude)[position] return video def update_state(self, data):