resolved conflist with master branch, added orig_in element for playlist, added update_user_playlists call in get_videos and get_next.

This commit is contained in:
pythagoraswitch 2018-11-16 23:47:02 +01:00
parent acb9b499be
commit 834ba630f1
1 changed files with 72 additions and 44 deletions

View File

@ -43,9 +43,8 @@ class Engine:
else:
self.state = {
'channels': {
'globalKeywords': {'locked': False, 'value': 7},
'userKeywords': {'locked': False, 'value': 7},
'screenings': {'locked': True, 'value': 2}
'globalKeywords': {'locked': False, 'value': 8},
'userKeywords': {'locked': False, 'value': 8}
},
'globalKeywords': {},
}
@ -55,6 +54,12 @@ class Engine:
'nextPlaylist': {'locked': False, 'value': 4},
'staySame': {'locked': False, 'value': 8}
}
if 'userKeywordsWeights' not in self.state:
self.state['userKeywordsWeights'] = {
'themeTags': {'locked': False, 'value': 0.3},
'characterTags': {'locked': False, 'value': 0.7}
}
self.update_keywords()
@property
@ -87,8 +92,16 @@ class Engine:
def get_videos(self, user):
if user.get('events', [{}])[0].get("event")=="login":
return self.get_recommendations(user)
# Update self_playlists first to reflect changes
update_user_playlists(self.playlists, user)
if user.get('events', [{}])[0].get("event")=="login":
return {
'user': {
'keywords': user.get('keywords', {})
},
'videos': self.get_recommendations(user)
}
channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
@ -97,29 +110,28 @@ class Engine:
# check if there were grid events for all indexes.
grid_events = {}
(nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame"))
video_num = nc + np + ns
video_num = nc + np + ns
# for event in user.get('events', []):
# if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
# grid_events[event.get('data').get('index')] = event.get('data')
# if len(grid_events) == video_num:
# break
# # The version where the loop also extract play_index (requires "index" in play event data):
# # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable):
play_index = None
for event in user.get('events', []):
if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
grid_events[event.get('data').get('index')] = event.get('data')
if event.get('event') == "play" and event["data"].get("type") == "video" and not play_index:
play_index = event.get('data').get('index')
play_index = event.get('data').get('index')
if len(grid_events) == video_num and play_index:
break
break
prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index'])
# if there were no grid events for all, initialize all grids.
if len(prev_grid_list) < video_num:
return self.get_recommendations(user)
if len(prev_grid_list) < video_num:
return {
'user': {
'keywords': user.get('keywords', {})
},
'videos': self.get_recommendations(user)
}
else:
if play_index is None:
@ -144,7 +156,6 @@ class Engine:
next_playlist_index.append(next_clip_index)
break
else:
# if "playlist" and "playlistPostion" (if not, default to 0) exists in grid event
for playlist in self.playlists:
if playlist.get('name')== prev_grid_list[i].get('playlist'):
if len(playlist["clips"]) == 1:
@ -176,13 +187,20 @@ class Engine:
rec_list += [(i,{}) for i in stay_same_index]
rec_list = sorted(rec_list, key=lambda k:k[0])
return [e[1] for e in rec_list]
videos_ = [e[1] for e in rec_list]
return {
'user': {
'keywords': user.get('keywords', {})
},
'videos': videos_
}
def get_recommendations(self, user, vids_exclude = []):
channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
userKeywordsWeights = {k: v.get('value', 1) for k, v in self.state['userKeywordsWeights'].items()}
# Exclude playlists from the most recent grid
playlists = copy.deepcopy(self.playlists)
@ -191,13 +209,26 @@ class Engine:
if playlist["name"] in vids_exclude:
playlists.remove(playlist)
# For each playlist, compute user keyword score
user_keywords = user.get('keywords', {})
# For each playlist, compute user keyword score by theme and character tags
user_keywords = copy.deepcopy(user.get('keywords', {}))
theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()}
character_tags = {k:v for k,v in user_keywords.items() if k.isupper()}
# manually modify some of the user keywords to match the playlist tags
theme_tags["god"] = theme_tags.get("god - gods",0)
theme_tags["visionary"] = theme_tags.get("visionary - enlightenment",0)
theme_tags["enlightenment"] = theme_tags.get("visionary - enlightenment",0)
character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0)
character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0)
character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0)
score = {}
for playlist in playlists:
score[playlist['name']] = random.random()
for tag in [tag for tag in playlist['tags'] if tag in user_keywords]:
score[playlist['name']] += user_keywords[tag]
score[playlist['name']] = random.random() * 0.001
for tag in playlist['tags']:
if tag in theme_tags:
score[playlist['name']] += theme_tags[tag] * userKeywordsWeights["themeTags"]
elif tag in character_tags:
score[playlist['name']] += character_tags[tag] * userKeywordsWeights["characterTags"]
# Select highest scoring playlists
playlists = sorted(
playlists,
@ -216,23 +247,7 @@ class Engine:
playlists,
key=lambda playlist: -score[playlist['name']]
)
videos += playlists[:channels['globalKeywords']]
playlists = playlists[channels['globalKeywords']:]
# Count products the user has seen
count = defaultdict(lambda: 0)
for event in user.get('events', []):
if event.get('data', {}).get('product'):
count[event['data']['product']] += 1
# For each product in playlist tags, increment score by count
for playlist in playlists:
score[playlist['name']] = random.random()
for tag in set(playlist['tags']) & set(count):
score[playlist['name']] += count[tag]
# Select highest scoring playlists
videos += sorted(
playlists,
key=lambda playlist: -score[playlist['name']]
)[:16 - channels['userKeywords'] - channels['globalKeywords']]
videos += playlists[:16 - channels['userKeywords']]
# Shuffle playlists (randomize layout) and shift clips (randomize start)
random.shuffle(videos)
return [{
@ -255,7 +270,8 @@ class Engine:
watched = []
clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs
# The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)"
for event in user["events"][::-1]:
# A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene"
for event in user.get('events', [])[::-1]:
if event["event"] == "play" and event["data"].get("type") == "video":
play = event
elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video":
@ -270,9 +286,18 @@ class Engine:
if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15:
# This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference.
# instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug)
if event["data"]["position"] >= ((playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]):
if "orig_in" not in playlist["clips"][i]:
cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]
else:
cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["orig_in"])*watch_cutoff + playlist["clips"][i]["orig_in"]
if event["data"]["position"] >= cutoff_pos:
watched.append((playlist["name"],i))
else:
else:
if "orig_in" not in playlist["clips"][i]:
# record the original "in" position to calculate cutoff position in the future
playlist["clips"][i]["orig_in"] = playlist["clips"][i]["in"]
# update "in" position of the clip in the playlist
playlist["clips"][i]["in"] = event["data"]["position"]
break
play = {}
@ -292,6 +317,9 @@ class Engine:
def get_next(self, user, position):
# Update self_playlists first to reflect changes
update_user_playlists(self.playlists, user)
grid_events = {}
video_num = 16