Compare commits
23 commits
Author | SHA1 | Date | |
---|---|---|---|
d3f1a95c5f | |||
3c80c721d3 | |||
97a65ad52d | |||
8c618ab988 | |||
d96edf480f | |||
6899cc5d37 | |||
b749d66bac | |||
b41b56941c | |||
e3c61853c3 | |||
319e4d384e | |||
2405b8b7c7 | |||
001e377003 | |||
21781747a2 | |||
4c2cb60d9c | |||
b5172bbc96 | |||
4fadcf7927 | |||
d51748b1d0 | |||
b3d727a9d0 | |||
834ba630f1 | |||
81cfe9c9d8 | |||
024c1008fb | |||
4bd4af703e | |||
0fe5752db3 |
1 changed files with 220 additions and 108 deletions
|
@ -1,6 +1,6 @@
|
||||||
'''
|
'''
|
||||||
Recommendation Engine Example
|
Recommendation Engine ver 1
|
||||||
1 Nov 2017, 0x2620
|
30 Nov 2018, 0x2620
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
@ -33,6 +33,10 @@ class Engine:
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
with open(filename) as f:
|
with open(filename) as f:
|
||||||
self.playlists = json.load(f)
|
self.playlists = json.load(f)
|
||||||
|
# ## the following is for testing purpose.
|
||||||
|
# for playlist in self.playlists:
|
||||||
|
# for clip in playlist["clips"]:
|
||||||
|
# clip["pass"] = bool(random.getrandbits(1))
|
||||||
else:
|
else:
|
||||||
self.playlists = []
|
self.playlists = []
|
||||||
|
|
||||||
|
@ -43,18 +47,25 @@ class Engine:
|
||||||
else:
|
else:
|
||||||
self.state = {
|
self.state = {
|
||||||
'channels': {
|
'channels': {
|
||||||
'globalKeywords': {'locked': False, 'value': 7},
|
'globalKeywords': {'locked': False, 'value': 8},
|
||||||
'userKeywords': {'locked': False, 'value': 7},
|
'userKeywords': {'locked': False, 'value': 8}
|
||||||
'screenings': {'locked': True, 'value': 2}
|
|
||||||
},
|
},
|
||||||
'globalKeywords': {},
|
'globalKeywords': {},
|
||||||
}
|
}
|
||||||
if 'gridChange' not in self.state:
|
if 'gridChange' not in self.state:
|
||||||
self.state['gridChange'] = {
|
self.state['gridChange'] = {
|
||||||
'nextClip': {'locked': True, 'value': 4},
|
'nextClip': {'locked': False, 'value': 5},
|
||||||
'nextPlaylist': {'locked': False, 'value': 4},
|
'nextPlaylist': {'locked': False, 'value': 8},
|
||||||
'staySame': {'locked': False, 'value': 8}
|
'staySame': {'locked': True, 'value': 3}
|
||||||
}
|
}
|
||||||
|
if 'userKeywordsWeights' not in self.state:
|
||||||
|
self.state['userKeywordsWeights'] = {
|
||||||
|
'themeTags': {'locked': False, 'value': 0.3},
|
||||||
|
'characterTags': {'locked': False, 'value': 0.7},
|
||||||
|
'random': {'locked': False, 'value': False}
|
||||||
|
}
|
||||||
|
if 'random' not in self.state['userKeywordsWeights']:
|
||||||
|
self.state['userKeywordsWeights']['random'] = {'locked': False, 'value': False}
|
||||||
self.update_keywords()
|
self.update_keywords()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -87,25 +98,47 @@ class Engine:
|
||||||
|
|
||||||
|
|
||||||
def get_videos(self, user):
|
def get_videos(self, user):
|
||||||
|
## Output is a dictionary of: user keyword scores, list of videos for each grid index (0-15),
|
||||||
|
## and parameters to be displayed on debug view.
|
||||||
|
## It implements "next clip" "next playlist" "stay same" grid allocation for the output video, depending on the user log history.
|
||||||
|
|
||||||
|
# Update self_playlists to reflect user log history
|
||||||
|
playlists = self.update_user_playlists(user)
|
||||||
|
|
||||||
|
# Get the user keyword scores for debug view
|
||||||
|
user_keywords = copy.deepcopy(user.get('keywords', {}))
|
||||||
|
theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()}
|
||||||
|
character_tags = {k:v for k,v in user_keywords.items() if k.isupper()}
|
||||||
|
top_user_keywords = sorted([(k,v) for (k,v) in theme_tags.items()], key=lambda kv: kv[1])[-10:]
|
||||||
|
top_user_characters = sorted([(k,v) for (k,v) in character_tags.items()], key=lambda kv: kv[1])[-10:]
|
||||||
|
debug_index_output = defaultdict(list)
|
||||||
|
|
||||||
|
# If the most recent event is "login," initialize grid videos.
|
||||||
if user.get('events', [{}])[0].get("event")=="login":
|
if user.get('events', [{}])[0].get("event")=="login":
|
||||||
return self.get_recommendations(user)
|
rec = self.get_recommendations(playlists, user)
|
||||||
|
return {
|
||||||
|
'user': {
|
||||||
|
'keywords': user.get('keywords', {})
|
||||||
|
},
|
||||||
|
'videos': rec["videos"],
|
||||||
|
"_debug": {
|
||||||
|
"top_user_keywords": top_user_keywords,
|
||||||
|
"top_user_characters": top_user_characters,
|
||||||
|
"top_user_playlists": rec["top_user_playlists"],
|
||||||
|
"top_global_playlists": rec["top_global_playlists"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
|
channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
|
||||||
sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
|
sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
|
||||||
grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
|
grid_change = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
|
||||||
|
|
||||||
# check if there were grid events for all indexes.
|
|
||||||
grid_events = {}
|
grid_events = {}
|
||||||
(nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame"))
|
(nc, np, ns) = (grid_change.get("nextClip"), grid_change.get("nextPlaylist"), grid_change.get("staySame"))
|
||||||
video_num = nc + np + ns
|
video_num = nc + np + ns
|
||||||
|
|
||||||
# for event in user.get('events', []):
|
# collect the most recent grid event for each grid index and the grid index of the most recent play event.
|
||||||
# if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
|
# the following requires "index" in play event data (previously unavailable)
|
||||||
# grid_events[event.get('data').get('index')] = event.get('data')
|
|
||||||
# if len(grid_events) == video_num:
|
|
||||||
# break
|
|
||||||
|
|
||||||
# # The version where the loop also extract play_index (requires "index" in play event data):
|
|
||||||
play_index = None
|
play_index = None
|
||||||
for event in user.get('events', []):
|
for event in user.get('events', []):
|
||||||
if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
|
if event.get('event') == "grid" and event.get('data').get('index') not in grid_events:
|
||||||
|
@ -119,14 +152,26 @@ class Engine:
|
||||||
|
|
||||||
# if there were no grid events for all, initialize all grids.
|
# if there were no grid events for all, initialize all grids.
|
||||||
if len(prev_grid_list) < video_num:
|
if len(prev_grid_list) < video_num:
|
||||||
return self.get_recommendations(user)
|
rec = self.get_recommendations(playlists, user)
|
||||||
|
return {
|
||||||
|
'user': {
|
||||||
|
'keywords': user.get('keywords', {})
|
||||||
|
},
|
||||||
|
'videos': rec["videos"],
|
||||||
|
"_debug": {
|
||||||
|
"top_user_keywords": top_user_keywords,
|
||||||
|
"top_user_characters": top_user_characters,
|
||||||
|
"top_user_playlists": rec["top_user_playlists"],
|
||||||
|
"top_global_playlists": rec["top_global_playlists"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if play_index is None:
|
if play_index is None:
|
||||||
video_indx = list(range(video_num))
|
video_indx = list(range(video_num))
|
||||||
random.shuffle(video_indx)
|
random.shuffle(video_indx)
|
||||||
else:
|
else:
|
||||||
# played index is excluded from the random shuffle and deterministically added to staySame pool.
|
# play index is excluded from the random shuffle and deterministically added to staySame pool.
|
||||||
video_indx = [*range(play_index)]+[*range(play_index+1,video_num)]
|
video_indx = [*range(play_index)]+[*range(play_index+1,video_num)]
|
||||||
random.shuffle(video_indx)
|
random.shuffle(video_indx)
|
||||||
video_indx.append(play_index)
|
video_indx.append(play_index)
|
||||||
|
@ -136,76 +181,132 @@ class Engine:
|
||||||
stay_same_index = video_indx[nc+np:]
|
stay_same_index = video_indx[nc+np:]
|
||||||
|
|
||||||
rec_list = []
|
rec_list = []
|
||||||
# select next clip for nextClip pool except when the playlist has only one clip.
|
|
||||||
|
|
||||||
|
# nextClip pool: select next clip except when the playlist has only one clip. skip the clip with "pass":True when selecting the next clip.
|
||||||
for i in next_clip_index:
|
for i in next_clip_index:
|
||||||
if prev_grid_list[i].get('playlist') is None:
|
# add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated due to update_user_playlists().
|
||||||
# add this to deal with the absence of "playlist" data in old grid event.
|
if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]:
|
||||||
# If there's no playlist data recorded, add the nextClip pool to nextPlaylist pool.
|
next_playlist_index.append(i)
|
||||||
next_playlist_index.append(next_clip_index)
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
# if "playlist" and "playlistPostion" (if not, default to 0) exists in grid event
|
for playlist in playlists:
|
||||||
for playlist in self.playlists:
|
|
||||||
if playlist.get('name')== prev_grid_list[i].get('playlist'):
|
if playlist.get('name')== prev_grid_list[i].get('playlist'):
|
||||||
|
unwatched_clips_indx = [j for j in range(len(playlist["clips"])) if not playlist["clips"][j].get("pass")]
|
||||||
if len(playlist["clips"]) == 1:
|
if len(playlist["clips"]) == 1:
|
||||||
next_playlist_index.append(i)
|
next_playlist_index.append(i)
|
||||||
break
|
|
||||||
# Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already?
|
|
||||||
elif prev_grid_list[i].get('playlistPosition', 0) + 1 == len(playlist['clips']):
|
|
||||||
playlist_pos = 0
|
|
||||||
else:
|
else:
|
||||||
playlist_pos = prev_grid_list[i].get('playlistPosition', 0) + 1
|
next_unwatched_indx = [j for j in unwatched_clips_indx if j > prev_grid_list[i]['playlistPosition']]
|
||||||
|
if len(next_unwatched_indx) == 0:
|
||||||
|
if unwatched_clips_indx[0] != prev_grid_list[i]['playlistPosition']:
|
||||||
|
playlist_pos = unwatched_clips_indx[0]
|
||||||
|
else:
|
||||||
|
next_playlist_index.append(i)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
playlist_pos = next_unwatched_indx[0]
|
||||||
|
|
||||||
rec_list.append((i, {
|
rec_list.append((i, {
|
||||||
'clips': playlist['clips'],
|
'clips': playlist['clips'],
|
||||||
# 'position': random.randrange(len(playlist['clips'])),
|
'position': playlist_pos,
|
||||||
'position': playlist_pos,
|
'name': playlist['name'],
|
||||||
'name': playlist['name'],
|
'tags': playlist['tags']
|
||||||
'tags': playlist['tags'],
|
}))
|
||||||
}))
|
|
||||||
|
|
||||||
# randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events)
|
debug_index_output["next_clip"].append((i,playlist['name']))
|
||||||
# for nextPlaylist pool.
|
|
||||||
|
|
||||||
|
#staySame pool
|
||||||
|
for i in stay_same_index:
|
||||||
|
# add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated due to update_user_playlists().
|
||||||
|
if prev_grid_list[i].get("playlist") not in [playlist["name"] for playlist in playlists]:
|
||||||
|
next_playlist_index.append(i)
|
||||||
|
else:
|
||||||
|
rec_list.append((i,{}))
|
||||||
|
debug_index_output["stay_same"].append(i)
|
||||||
|
|
||||||
|
# nextPlaylist pool: randomly select playlists (excluding the playlists from the current grid).
|
||||||
vids_exclude = [e.get("playlist") for e in prev_grid_list]
|
vids_exclude = [e.get("playlist") for e in prev_grid_list]
|
||||||
while None in vids_exclude:
|
while None in vids_exclude:
|
||||||
vids_exclude.remove(None)
|
vids_exclude.remove(None)
|
||||||
video = self.get_recommendations(user, vids_exclude)
|
rec = self.get_recommendations(playlists, user, vids_exclude)
|
||||||
rec_list += [(i, video[i]) for i in next_playlist_index]
|
rec_list += [(i, rec['videos'][i]) for i in next_playlist_index]
|
||||||
|
debug_index_output["new_playlist"] = [(i, rec['videos'][i]["name"]) for i in next_playlist_index]
|
||||||
|
|
||||||
#staySame pool
|
|
||||||
rec_list += [(i,{}) for i in stay_same_index]
|
|
||||||
|
|
||||||
rec_list = sorted(rec_list, key=lambda k:k[0])
|
rec_list = sorted(rec_list, key=lambda k:k[0])
|
||||||
return [e[1] for e in rec_list]
|
videos_ = [e[1] for e in rec_list]
|
||||||
|
return {
|
||||||
|
'user': {
|
||||||
|
'keywords': user.get('keywords', {})
|
||||||
|
},
|
||||||
|
'videos': videos_,
|
||||||
|
"_debug": {
|
||||||
|
"top_user_keywords": top_user_keywords, # list of (keyword, score)
|
||||||
|
"top_user_characters": top_user_characters, # list of (keyword, score)
|
||||||
|
"top_user_playlists": rec["top_user_playlists"], # list of (playlist name, score)
|
||||||
|
"top_global_playlists": rec["top_global_playlists"], # list of (playlist name, score)
|
||||||
|
"stay_same_index": debug_index_output["stay_same"], # list of integers
|
||||||
|
"next_clip_index": debug_index_output["next_clip"], # list of (integer, playlist name)
|
||||||
|
"new_playlist_index": debug_index_output["new_playlist"] # list of (integer, playlist name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_recommendations(self, user, vids_exclude = []):
|
def get_recommendations(self, playlists, user, vids_exclude = []):
|
||||||
channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
|
channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()}
|
||||||
sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
|
sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()}
|
||||||
gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
|
gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()}
|
||||||
|
userKeywordsWeights = {k: v.get('value', 1) for k, v in self.state['userKeywordsWeights'].items()}
|
||||||
|
|
||||||
# Exclude playlists from the most recent grid
|
# Exclude playlists from the most recent grid
|
||||||
playlists = copy.deepcopy(self.playlists)
|
|
||||||
if len(vids_exclude) > 0:
|
if len(vids_exclude) > 0:
|
||||||
for playlist in playlists:
|
for playlist in playlists:
|
||||||
if playlist["name"] in vids_exclude:
|
if playlist["name"] in vids_exclude:
|
||||||
playlists.remove(playlist)
|
playlists.remove(playlist)
|
||||||
|
|
||||||
# For each playlist, compute user keyword score
|
# Generate random weights if random option is chosen in the dashboard:
|
||||||
user_keywords = user.get('keywords', {})
|
if userKeywordsWeights.get('random'):
|
||||||
|
themeWeights = random.random()
|
||||||
|
charWeights = 1-themeWeights
|
||||||
|
else:
|
||||||
|
themeWeights = userKeywordsWeights['themeTags']
|
||||||
|
charWeights = userKeywordsWeights['characterTags']
|
||||||
|
|
||||||
|
# For each playlist, compute user keyword score by theme and character tags
|
||||||
|
user_keywords = copy.deepcopy(user.get('keywords', {}))
|
||||||
|
theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()}
|
||||||
|
character_tags = {k:v for k,v in user_keywords.items() if k.isupper()}
|
||||||
|
# manually modify some of the user keywords to match the playlist tags
|
||||||
|
theme_tags["god"] = theme_tags.get("god - gods",0)
|
||||||
|
theme_tags["visionary"] = theme_tags.get("visionary - enlightenment",0)
|
||||||
|
theme_tags["enlightenment"] = theme_tags.get("visionary - enlightenment",0)
|
||||||
|
character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0)
|
||||||
|
character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0)
|
||||||
|
character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0)
|
||||||
score = {}
|
score = {}
|
||||||
for playlist in playlists:
|
for playlist in playlists:
|
||||||
score[playlist['name']] = random.random()
|
score[playlist['name']] = random.random() * 0.1
|
||||||
for tag in [tag for tag in playlist['tags'] if tag in user_keywords]:
|
for tag in playlist['tags']:
|
||||||
score[playlist['name']] += user_keywords[tag]
|
if tag in theme_tags:
|
||||||
|
score[playlist['name']] += theme_tags[tag] * themeWeights
|
||||||
|
elif tag in character_tags:
|
||||||
|
score[playlist['name']] += character_tags[tag] * charWeights
|
||||||
# Select highest scoring playlists
|
# Select highest scoring playlists
|
||||||
playlists = sorted(
|
playlists = sorted(
|
||||||
playlists,
|
playlists,
|
||||||
key=lambda playlist: -score[playlist['name']]
|
key=lambda playlist: -score[playlist['name']]
|
||||||
)
|
)
|
||||||
|
# Record the following for debug view input
|
||||||
|
top_user_playlists = [(playlist['name'], score[playlist['name']]) for playlist in playlists[:channels['userKeywords']]]
|
||||||
|
# top_user_playlists = [{
|
||||||
|
# 'name': playlist['name'],
|
||||||
|
# 'tags': playlist['tags'],
|
||||||
|
# 'score': score[playlist['name']],
|
||||||
|
# } for playlist in playlists[:channels['userKeywords']]]
|
||||||
|
|
||||||
videos = playlists[:channels['userKeywords']]
|
videos = playlists[:channels['userKeywords']]
|
||||||
playlists = playlists[channels['userKeywords']:]
|
playlists = playlists[channels['userKeywords']:]
|
||||||
# For each playlist, compute global keyword score
|
# For each playlist, compute global keyword score
|
||||||
score = {}
|
score = {}
|
||||||
for playlist in playlists:
|
for playlist in playlists:
|
||||||
score[playlist['name']] = random.random()
|
score[playlist['name']] = random.random()
|
||||||
|
@ -216,46 +317,43 @@ class Engine:
|
||||||
playlists,
|
playlists,
|
||||||
key=lambda playlist: -score[playlist['name']]
|
key=lambda playlist: -score[playlist['name']]
|
||||||
)
|
)
|
||||||
videos += playlists[:channels['globalKeywords']]
|
# Record the following for debug view input
|
||||||
playlists = playlists[channels['globalKeywords']:]
|
top_global_playlists = [(playlist['name'], score[playlist['name']]) for playlist in playlists[:channels['globalKeywords']]]
|
||||||
# Count products the user has seen
|
# top_global_playlists = [{
|
||||||
count = defaultdict(lambda: 0)
|
# 'name': playlist['name'],
|
||||||
for event in user.get('events', []):
|
# 'tags': playlist['tags'],
|
||||||
if event.get('data', {}).get('product'):
|
# 'score': score[playlist['name']],
|
||||||
count[event['data']['product']] += 1
|
# } for playlist in playlists[:channels['globalKeywords']]]
|
||||||
# For each product in playlist tags, increment score by count
|
|
||||||
for playlist in playlists:
|
videos += playlists[:16 - channels['userKeywords']]
|
||||||
score[playlist['name']] = random.random()
|
|
||||||
for tag in set(playlist['tags']) & set(count):
|
|
||||||
score[playlist['name']] += count[tag]
|
|
||||||
# Select highest scoring playlists
|
|
||||||
videos += sorted(
|
|
||||||
playlists,
|
|
||||||
key=lambda playlist: -score[playlist['name']]
|
|
||||||
)[:16 - channels['userKeywords'] - channels['globalKeywords']]
|
|
||||||
# Shuffle playlists (randomize layout) and shift clips (randomize start)
|
# Shuffle playlists (randomize layout) and shift clips (randomize start)
|
||||||
random.shuffle(videos)
|
random.shuffle(videos)
|
||||||
return [{
|
return {
|
||||||
'clips': video['clips'],
|
'videos': [{
|
||||||
'position': random.randrange(len(video['clips'])),
|
'clips': video['clips'],
|
||||||
'name': video['name'],
|
'position': random.choice([i for i in range(len(video["clips"])) if not video["clips"][i].get("pass")]),
|
||||||
'tags': video['tags'],
|
'name': video['name'],
|
||||||
} for video in videos]
|
'tags': video['tags'],
|
||||||
|
} for video in videos],
|
||||||
|
"top_user_playlists":top_user_playlists,
|
||||||
|
"top_global_playlists": top_global_playlists
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def update_user_playlists(playlists, user, watch_cutoff = 0.9):
|
def update_user_playlists(self, user, watch_cutoff = 0.9):
|
||||||
# Output: playlists with updated in/out time of clips that have been watched.
|
# Output: playlists with updated in/out time of clips that have been watched as well as "pass" indicators for the clips that has been watched for more than watch_cutoff.
|
||||||
# Watched is defined as a video being played in full screen.
|
# Watched is defined as a video being played in full screen.
|
||||||
# "watch_cutoff" parameter: the portion of the clip duration to be determined as watched the whole clip. should be [0,1]
|
# "watch_cutoff" parameter: the portion of the clip duration to be determined as watched the whole clip. should be [0,1]
|
||||||
# + check (play, pause) pairs and eliminate unusual cases most likely due to a bug.
|
# + check (play, pause) pairs and eliminate unusual cases most likely due to a bug.
|
||||||
# + If (play, pause) pairs exceed XX(80-90?) percent of the clip length, eliminate the clip from the playlist.
|
# + If (play, pause) pairs exceed XX(80-90?) percent of the clip length, add "pass": True to the clip.
|
||||||
# + Otherwise, find the last pause position of a clip and record it as "in" position of the clip.
|
# + Otherwise, find the last pause position of a clip and record it as "in" position of the clip.
|
||||||
# + If the clips are all eliminated from a playlist, eliminate the playlist.
|
# + If clips are all marked as "pass" in a playlist, elliminate the playlist from the user playlists.
|
||||||
|
playlists = copy.deepcopy(self.playlists)
|
||||||
play = {}
|
play = {}
|
||||||
watched = []
|
|
||||||
clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs
|
clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs
|
||||||
# The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)"
|
# The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)"
|
||||||
for event in user["events"][::-1]:
|
# A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene"
|
||||||
|
for event in user.get('events', [])[::-1]:
|
||||||
if event["event"] == "play" and event["data"].get("type") == "video":
|
if event["event"] == "play" and event["data"].get("type") == "video":
|
||||||
play = event
|
play = event
|
||||||
elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video":
|
elif event["event"] == "pause" and play!={} and event["data"].get("type") == "video":
|
||||||
|
@ -270,28 +368,37 @@ class Engine:
|
||||||
if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15:
|
if play["data"]["position"] >= max(playlist["clips"][i]["in"] - 15, 0) and event["data"]["position"] <= playlist["clips"][i]["out"] + 15:
|
||||||
# This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference.
|
# This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference.
|
||||||
# instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug)
|
# instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug)
|
||||||
if event["data"]["position"] >= ((playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]):
|
if "orig_in" not in playlist["clips"][i]:
|
||||||
watched.append((playlist["name"],i))
|
cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["in"])*watch_cutoff + playlist["clips"][i]["in"]
|
||||||
else:
|
else:
|
||||||
|
cutoff_pos = (playlist["clips"][i]["out"]-playlist["clips"][i]["orig_in"])*watch_cutoff + playlist["clips"][i]["orig_in"]
|
||||||
|
|
||||||
|
if event["data"]["position"] >= cutoff_pos:
|
||||||
|
playlist["clips"][i]["pass"] = True
|
||||||
|
|
||||||
|
else:
|
||||||
|
if "orig_in" not in playlist["clips"][i]:
|
||||||
|
# record the original "in" position to calculate cutoff position in the future
|
||||||
|
playlist["clips"][i]["orig_in"] = playlist["clips"][i]["in"]
|
||||||
|
# update "in" position of the clip in the playlist
|
||||||
playlist["clips"][i]["in"] = event["data"]["position"]
|
playlist["clips"][i]["in"] = event["data"]["position"]
|
||||||
break
|
break
|
||||||
play = {}
|
play = {}
|
||||||
|
|
||||||
d_watched = defaultdict(set)
|
for playlist in playlists.copy():
|
||||||
for k, v in watched:
|
unwatched = [clip for clip in playlist["clips"] if not clip.get("pass")]
|
||||||
d_watched[k].add(v)
|
if not unwatched:
|
||||||
for k, v in d_watched.items():
|
playlists.remove(playlist)
|
||||||
for playlist in playlists:
|
# If the number of playlists is reduced to 30, reset it to the original.
|
||||||
if playlist["name"] == k:
|
if len(playlists) < 30:
|
||||||
if len(v) == len(playlist["clips"]):
|
playlists = copy.deepcopy(self.playlists)
|
||||||
playlists.remove(playlist)
|
|
||||||
else:
|
|
||||||
playlist["clips"] = [playlist["clips"][i] for i in range(len(playlist["clips"])) if i not in v]
|
|
||||||
break
|
|
||||||
return(playlists)
|
return(playlists)
|
||||||
|
|
||||||
|
|
||||||
def get_next(self, user, position):
|
def get_next(self, user, position):
|
||||||
|
# Update self_playlists to reflect user log history
|
||||||
|
playlists = self.update_user_playlists(user)
|
||||||
|
|
||||||
grid_events = {}
|
grid_events = {}
|
||||||
video_num = 16
|
video_num = 16
|
||||||
|
|
||||||
|
@ -302,8 +409,8 @@ class Engine:
|
||||||
break
|
break
|
||||||
prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index'])
|
prev_grid_list = sorted([v for v in grid_events.values()], key=lambda k:k['index'])
|
||||||
vids_exclude = [e.get("playlist") for e in prev_grid_list]
|
vids_exclude = [e.get("playlist") for e in prev_grid_list]
|
||||||
video = self.get_recommendations(user, vids_exclude)[position]
|
rec = self.get_recommendations(playlists, user, vids_exclude)
|
||||||
return video
|
return rec["videos"][position]
|
||||||
|
|
||||||
def update_state(self, data):
|
def update_state(self, data):
|
||||||
for key in data:
|
for key in data:
|
||||||
|
@ -365,10 +472,15 @@ class Engine:
|
||||||
f.write(json.dumps(videos, indent=4, sort_keys=True))
|
f.write(json.dumps(videos, indent=4, sort_keys=True))
|
||||||
# Get video order
|
# Get video order
|
||||||
order = {video['id']: int(video['order']) for video in videos}
|
order = {video['id']: int(video['order']) for video in videos}
|
||||||
|
code = {video['id']: video['code'] for video in videos}
|
||||||
# Sort clips
|
# Sort clips
|
||||||
clips = sorted(
|
clips = sorted(
|
||||||
clips,
|
clips,
|
||||||
key=lambda clip: (order[clip['id'].split('/')[0]], clip['in'])
|
key=lambda clip: (
|
||||||
|
order[clip['id'].split('/')[0]],
|
||||||
|
ox.sort_string(code[clip['id'].split('/')[0]]),
|
||||||
|
clip['in']
|
||||||
|
)
|
||||||
)
|
)
|
||||||
# Get and cache playlists
|
# Get and cache playlists
|
||||||
self.playlists = [playlist for playlist in [{
|
self.playlists = [playlist for playlist in [{
|
||||||
|
@ -377,13 +489,13 @@ class Engine:
|
||||||
'tags': storyline['tags'],
|
'tags': storyline['tags'],
|
||||||
'clips': [{
|
'clips': [{
|
||||||
'item': clip['id'].split('/')[0],
|
'item': clip['id'].split('/')[0],
|
||||||
'id': clip['id'],
|
'id': '%s_%0.3f-%0.3f' % (clip['id'].split('/')[0], clip['in'], clip['out']),
|
||||||
'in': clip['in'],
|
'in': clip['in'],
|
||||||
'out': clip['out']
|
'out': clip['out']
|
||||||
} for clip in clips if clip['value'] == storyline['name']]
|
} for clip in clips if clip['value'] == storyline['name']]
|
||||||
} for storyline in storylines] if playlist['clips']]
|
} for storyline in storylines] if playlist['clips']]
|
||||||
with open(os.path.join(self.path, 'playlists.json'), 'w') as f:
|
with open(os.path.join(self.path, 'playlists.json'), 'w') as f:
|
||||||
f.write(json.dumps(self.playlists, indent=4, sort_keys=True))
|
f.write(json.dumps(self.playlists, indent=4, sort_keys=True, ensure_ascii=False))
|
||||||
self.update_keywords()
|
self.update_keywords()
|
||||||
|
|
||||||
def update_keywords(self):
|
def update_keywords(self):
|
||||||
|
|
Loading…
Reference in a new issue