diff --git a/recommendation_engine.py b/recommendation_engine.py index 3082c4a..b3d9e4f 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -58,10 +58,6 @@ class Engine: self.state['userKeywordsWeights'] = { 'themeTags': {'locked': False, 'value': 0.3}, 'characterTags': {'locked': False, 'value': 0.7} -<<<<<<< HEAD -======= - ->>>>>>> iss2 } self.update_keywords() @@ -93,10 +89,7 @@ class Engine: clips[inpoint['index']]['out'] = self.pandora.get(video_id, ['duration'])['duration'] return clips - def get_videos(self, user): - # Update self_playlists first to reflect changes - update_user_playlists(self.playlists, user) if user.get('events', [{}])[0].get("event")=="login": return { @@ -113,26 +106,30 @@ class Engine: # check if there were grid events for all indexes. grid_events = {} (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) - video_num = nc + np + ns + # this assumes np + nc + ns = total number of videos in the grid view (16). + # Make sure sanity check exists in front-end (error if it does not add up to 16). + video_num = nc + np + ns - # # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable): + # for event in user.get('events', []): + # if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: + # grid_events[event.get('data').get('index')] = event.get('data') + # if len(grid_events) == video_num: + # break + + # # The version where the loop also extract play_index (requires "index" in play event data): play_index = None for event in user.get('events', []): if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: grid_events[event.get('data').get('index')] = event.get('data') - if event.get('event') == "play" and event["data"].get("type") == "video" and not play_index: - play_index = event.get('data').get('index') + if event.get('event') == "play" and not play_index: + play_index = event.get('data').get('index') if len(grid_events) == video_num and play_index: - break + break prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) # if there were no grid events for all, initialize all grids. -<<<<<<< HEAD if len(prev_grid_list) < video_num: -======= - if len(prev_grid_list) < video_num: ->>>>>>> iss2 return { 'user': { 'keywords': user.get('keywords', {}) @@ -163,6 +160,7 @@ class Engine: next_playlist_index.append(next_clip_index) break else: + # if "playlist" and "playlistPostion" (if not, default to 0) exists in grid event for playlist in self.playlists: if playlist.get('name')== prev_grid_list[i].get('playlist'): if len(playlist["clips"]) == 1: @@ -201,10 +199,7 @@ class Engine: }, 'videos': videos_ } -<<<<<<< HEAD -======= ->>>>>>> iss2 def get_recommendations(self, user, vids_exclude = []): @@ -231,11 +226,7 @@ class Engine: character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0) character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0) character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0) -<<<<<<< HEAD -======= - ->>>>>>> iss2 score = {} for playlist in playlists: score[playlist['name']] = random.random() * 0.001 @@ -273,68 +264,7 @@ class Engine: } for video in videos] - def update_user_playlists(playlists, user, watch_cutoff = 0.9): - # Output: playlists with updated in/out time of clips that have been watched. - # Watched is defined as a video being played in full screen. - # "watch_cutoff" parameter: the portion of the clip duration to be determined as watched the whole clip. should be [0,1] - # + check (play, pause) pairs and eliminate unusual cases most likely due to a bug. - # + If (play, pause) pairs exceed XX(80-90?) percent of the clip length, eliminate the clip from the playlist. - # + Otherwise, find the last pause position of a clip and record it as "in" position of the clip. - # + If the clips are all eliminated from a playlist, eliminate the playlist. - play = {} - watched = [] - clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs - # The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)" - # A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene" - for event in user.get('events', [])[::-1]: - if event["event"] == "play" and event["data"].get("type") == "video": - play = event - elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video": - if "position" not in play["data"]: - play = {} - break - if play["data"].get("playlist") == event["data"].get("playlist"): - if event["data"]["position"] - play["data"]["position"] > 0 and event["data"]["position"] - play["data"]["position"] < clip_max_dur and event["data"].get("playlistPosition") == play["data"].get("playlistPosition") and event["data"].get("playlistPosition") is not None: - i = event["data"]["playlistPosition"] - for playlist in playlists: - if playlist["name"] == event["data"]["playlist"] and i < len(playlist["clips"]): - if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15: - # This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference. - # instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug) - if "orig_in" not in playlist["clips"][i]: - cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"] - else: - cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["orig_in"])*watch_cutoff + playlist["clips"][i]["orig_in"] - - if event["data"]["position"] >= cutoff_pos: - watched.append((playlist["name"],i)) - else: - if "orig_in" not in playlist["clips"][i]: - # record the original "in" position to calculate cutoff position in the future - playlist["clips"][i]["orig_in"] = playlist["clips"][i]["in"] - # update "in" position of the clip in the playlist - playlist["clips"][i]["in"] = event["data"]["position"] - break - play = {} - - d_watched = defaultdict(set) - for k, v in watched: - d_watched[k].add(v) - for k, v in d_watched.items(): - for playlist in playlists: - if playlist["name"] == k: - if len(v) == len(playlist["clips"]): - playlists.remove(playlist) - else: - playlist["clips"] = [playlist["clips"][i] for i in range(len(playlist["clips"])) if i not in v] - break - return(playlists) - - def get_next(self, user, position): - # Update self_playlists first to reflect changes - update_user_playlists(self.playlists, user) - grid_events = {} video_num = 16