diff --git a/.gitignore b/.gitignore index 9f4d1c3..fe7241d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,3 @@ json/* -venv/* __pycache__/ *.swp -*.ipynb -*.pyc -test.py diff --git a/recommendation_engine.py b/recommendation_engine.py index ab0f3cc..98c1b26 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -15,7 +15,6 @@ import ox from utils import run_async logger = logging.getLogger(__name__) -verbose = True class Engine: @@ -47,12 +46,6 @@ class Engine: 'screenings': {'locked': True, 'value': 2} }, 'globalKeywords': {}, - 'gridChange': { - 'nextClip': {'locked': False, 'value': 4}, - 'nextPlaylist': {'locked': False, 'value': 4}, - 'staySame': {'locked': False, 'value': 8} - - } } self.update_keywords() @@ -87,106 +80,16 @@ class Engine: def get_videos(self, user): channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} - grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} - - # check if there were grid events for all indexes. - grid_events = {} - (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) - # this assumes np + nc + ns = total number of videos in the grid view (16). - # Make sure sanity check exists in front-end (error if it does not add up to 16). - video_num = nc + np + ns - - # for event in user.get('events', []): - # if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: - # grid_events[event.get('data').get('index')] = event.get('data') - # if len(grid_events) == video_num: - # break - - # # The version where the loop also extract play_index (requires "index" in play event data): - play_index = None - for event in user.get('events', []): - if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: - grid_events[event.get('data').get('index')] = event.get('data') - if event.get('event') == "play" and not play_index: - play_index = event.get('data').get('index') - if len(grid_events) == video_num and play_index: - break - - prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) - - # if there were no grid events for all, initialize all grids. - if len(prev_grid_list) < video_num: - return self.get_recommendations(user) - - else: - # played index is excluded from the random shuffle and deterministically added to staySame pool. - video_indx = [*range(play_index)]+[*range(play_index+1,video_num)] - # video_indx = list(range(video_num)) - random.shuffle(video_indx) - video_indx.append(play_index) - next_clip_index = video_indx[:nc] - next_playlist_index = video_indx[nc:nc+np] - stay_same_index = video_indx[nc+np:] - - rec_list = [] - # select next clip for nextClip pool except when the playlist has only one clip. - for i in next_clip_index: - for playlist in self.playlists: - if playlist.get('name')== prev_grid_list[i].get('playlist'): - if len(playlist["clips"]) == 1: - next_playlist_index.append(i) - break - # Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already? - elif prev_grid_list[i].get('playlistPosition') + 1 == len(playlist['clips']): - playlist_pos = 0 - else: - playlist_pos = prev_grid_list[i].get('playlistPosition') + 1 - - rec_list.append((i, { - 'clips': playlist['clips'], - # 'position': random.randrange(len(playlist['clips'])), - 'position': playlist_pos, - 'name': playlist['name'], - 'tags': playlist['tags'], - })) - - # randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events) - # for nextPlaylist pool. - vids_exclude = [e.get("playlist") for e in prev_grid_list] - video = self.get_recommendations(user, vids_exclude) - rec_list += [(i, video[i]) for i in next_playlist_index] - - #staySame pool - rec_list += [(i,{}) for i in stay_same_index] - - rec_list = sorted(rec_list, key=lambda k:k[0]) - return [e[1] for e in rec_list] - - -# NOTE for future improvement: vids_exclude element unit could be clip or in/out time pairs, rather than playlist. -# The same playlist could be played in the grid view as long as these are differenct clips or separate times. - def get_recommendations(self, user, vids_exclude = []): - channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} - sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} - gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} - - # Exclude playlists from the most recent grid - playlists = self.playlists - if len(vids_exclude) > 0: - for playlist in playlists: - if playlist["name"] in vids_exclude: - playlists.remove(playlist) - # For each playlist, compute user keyword score user_keywords = user.get('keywords', {}) score = {} - for playlist in playlists: + for playlist in self.playlists: score[playlist['name']] = random.random() for tag in [tag for tag in playlist['tags'] if tag in user_keywords]: score[playlist['name']] += user_keywords[tag] # Select highest scoring playlists playlists = sorted( - playlists, + self.playlists, key=lambda playlist: -score[playlist['name']] ) videos = playlists[:channels['userKeywords']] @@ -228,19 +131,8 @@ class Engine: 'tags': video['tags'], } for video in videos] - def get_next(self, user, position): - grid_events = {} - video_num = 16 - - for event in user.get('events', []): - if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: - grid_events[event.get('data').get('index')] = event.get('data') - if len(grid_events) == video_num: - break - prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) - vids_exclude = [e.get("playlist") for e in prev_grid_list] - video = self.get_recommendations(user, vids_exclude)[position] + video = self.get_videos(user)[position] return video def update_state(self, data):