diff --git a/recommendation_engine.py b/recommendation_engine.py index b3d9e4f..3082c4a 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -58,6 +58,10 @@ class Engine: self.state['userKeywordsWeights'] = { 'themeTags': {'locked': False, 'value': 0.3}, 'characterTags': {'locked': False, 'value': 0.7} +<<<<<<< HEAD +======= + +>>>>>>> iss2 } self.update_keywords() @@ -89,7 +93,10 @@ class Engine: clips[inpoint['index']]['out'] = self.pandora.get(video_id, ['duration'])['duration'] return clips + def get_videos(self, user): + # Update self_playlists first to reflect changes + update_user_playlists(self.playlists, user) if user.get('events', [{}])[0].get("event")=="login": return { @@ -106,30 +113,26 @@ class Engine: # check if there were grid events for all indexes. grid_events = {} (nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame")) - # this assumes np + nc + ns = total number of videos in the grid view (16). - # Make sure sanity check exists in front-end (error if it does not add up to 16). - video_num = nc + np + ns + video_num = nc + np + ns - # for event in user.get('events', []): - # if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: - # grid_events[event.get('data').get('index')] = event.get('data') - # if len(grid_events) == video_num: - # break - - # # The version where the loop also extract play_index (requires "index" in play event data): + # # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable): play_index = None for event in user.get('events', []): if event.get('event') == "grid" and event.get('data').get('index') not in grid_events: grid_events[event.get('data').get('index')] = event.get('data') - if event.get('event') == "play" and not play_index: - play_index = event.get('data').get('index') + if event.get('event') == "play" and event["data"].get("type") == "video" and not play_index: + play_index = event.get('data').get('index') if len(grid_events) == video_num and play_index: - break + break prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index']) # if there were no grid events for all, initialize all grids. +<<<<<<< HEAD if len(prev_grid_list) < video_num: +======= + if len(prev_grid_list) < video_num: +>>>>>>> iss2 return { 'user': { 'keywords': user.get('keywords', {}) @@ -160,7 +163,6 @@ class Engine: next_playlist_index.append(next_clip_index) break else: - # if "playlist" and "playlistPostion" (if not, default to 0) exists in grid event for playlist in self.playlists: if playlist.get('name')== prev_grid_list[i].get('playlist'): if len(playlist["clips"]) == 1: @@ -199,7 +201,10 @@ class Engine: }, 'videos': videos_ } +<<<<<<< HEAD +======= +>>>>>>> iss2 def get_recommendations(self, user, vids_exclude = []): @@ -226,7 +231,11 @@ class Engine: character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0) character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0) character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0) +<<<<<<< HEAD +======= + +>>>>>>> iss2 score = {} for playlist in playlists: score[playlist['name']] = random.random() * 0.001 @@ -264,7 +273,68 @@ class Engine: } for video in videos] + def update_user_playlists(playlists, user, watch_cutoff = 0.9): + # Output: playlists with updated in/out time of clips that have been watched. + # Watched is defined as a video being played in full screen. + # "watch_cutoff" parameter: the portion of the clip duration to be determined as watched the whole clip. should be [0,1] + # + check (play, pause) pairs and eliminate unusual cases most likely due to a bug. + # + If (play, pause) pairs exceed XX(80-90?) percent of the clip length, eliminate the clip from the playlist. + # + Otherwise, find the last pause position of a clip and record it as "in" position of the clip. + # + If the clips are all eliminated from a playlist, eliminate the playlist. + play = {} + watched = [] + clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs + # The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)" + # A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene" + for event in user.get('events', [])[::-1]: + if event["event"] == "play" and event["data"].get("type") == "video": + play = event + elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video": + if "position" not in play["data"]: + play = {} + break + if play["data"].get("playlist") == event["data"].get("playlist"): + if event["data"]["position"] - play["data"]["position"] > 0 and event["data"]["position"] - play["data"]["position"] < clip_max_dur and event["data"].get("playlistPosition") == play["data"].get("playlistPosition") and event["data"].get("playlistPosition") is not None: + i = event["data"]["playlistPosition"] + for playlist in playlists: + if playlist["name"] == event["data"]["playlist"] and i < len(playlist["clips"]): + if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15: + # This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference. + # instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug) + if "orig_in" not in playlist["clips"][i]: + cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"] + else: + cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["orig_in"])*watch_cutoff + playlist["clips"][i]["orig_in"] + + if event["data"]["position"] >= cutoff_pos: + watched.append((playlist["name"],i)) + else: + if "orig_in" not in playlist["clips"][i]: + # record the original "in" position to calculate cutoff position in the future + playlist["clips"][i]["orig_in"] = playlist["clips"][i]["in"] + # update "in" position of the clip in the playlist + playlist["clips"][i]["in"] = event["data"]["position"] + break + play = {} + + d_watched = defaultdict(set) + for k, v in watched: + d_watched[k].add(v) + for k, v in d_watched.items(): + for playlist in playlists: + if playlist["name"] == k: + if len(v) == len(playlist["clips"]): + playlists.remove(playlist) + else: + playlist["clips"] = [playlist["clips"][i] for i in range(len(playlist["clips"])) if i not in v] + break + return(playlists) + + def get_next(self, user, position): + # Update self_playlists first to reflect changes + update_user_playlists(self.playlists, user) + grid_events = {} video_num = 16