From 6899cc5d37f65ce72336a38c3a76b74a98e996d2 Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Sat, 1 Dec 2018 10:11:16 +0100 Subject: [PATCH 1/3] added random option for userKeywordWeights --- recommendation_engine.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index db47155..62827da 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -54,14 +54,15 @@ class Engine: } if 'gridChange' not in self.state: self.state['gridChange'] = { - 'nextClip': {'locked': True, 'value': 4}, - 'nextPlaylist': {'locked': False, 'value': 4}, - 'staySame': {'locked': False, 'value': 8} + 'nextClip': {'locked': False, 'value': 5}, + 'nextPlaylist': {'locked': False, 'value': 8}, + 'staySame': {'locked': True, 'value': 3} } if 'userKeywordsWeights' not in self.state: self.state['userKeywordsWeights'] = { 'themeTags': {'locked': False, 'value': 0.3}, - 'characterTags': {'locked': False, 'value': 0.7} + 'characterTags': {'locked': False, 'value': 0.7}, + 'random' : {'locked': False, 'value': True} } self.update_keywords() @@ -253,6 +254,14 @@ class Engine: if playlist["name"] in vids_exclude: playlists.remove(playlist) + # Generate random weights if random option is chosen in the dashboard: + if userKeywordsWeights['random']: + themeWeights = random.random() + charWeights = 1-themeWeights + else: + themeWeights = userKeywordsWeights['themeTags'] + charWeights = userKeywordsWeights['characterTags'] + # For each playlist, compute user keyword score by theme and character tags user_keywords = copy.deepcopy(user.get('keywords', {})) theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()} @@ -269,9 +278,9 @@ class Engine: score[playlist['name']] = random.random() * 0.001 for tag in playlist['tags']: if tag in theme_tags: - score[playlist['name']] += theme_tags[tag] * userKeywordsWeights["themeTags"] + score[playlist['name']] += theme_tags[tag] * themeWeights elif tag in character_tags: - score[playlist['name']] += character_tags[tag] * userKeywordsWeights["characterTags"] + score[playlist['name']] += character_tags[tag] * charWeights # Select highest scoring playlists playlists = sorted( playlists, From d96edf480fe9aefb6a89497aad3401ee00e7a8bd Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Sat, 1 Dec 2018 11:00:46 +0100 Subject: [PATCH 2/3] changed debug output for dd-re; added stay_same_index etc --- recommendation_engine.py | 44 ++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 62827da..5759b3d 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -102,12 +102,14 @@ class Engine: # Update self_playlists to reflect user log history playlists = self.update_user_playlists(user) + # Get the user keyword scores for debug view user_keywords = copy.deepcopy(user.get('keywords', {})) theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()} character_tags = {k:v for k,v in user_keywords.items() if k.isupper()} - top_user_keywords = sorted([(k,v) for (k,v) in theme_tags.items()], key=lambda kv: kv[1])[-5:] - top_user_characters = sorted([(k,v) for (k,v) in character_tags.items()], key=lambda kv: kv[1])[-5:] + top_user_keywords = sorted([(k,v) for (k,v) in theme_tags.items()], key=lambda kv: kv[1])[-10:] + top_user_characters = sorted([(k,v) for (k,v) in character_tags.items()], key=lambda kv: kv[1])[-10:] + debug_index_output = defaultdict(list) # If the most recent event is "login," initialize grid videos. if user.get('events', [{}])[0].get("event")=="login": @@ -201,7 +203,6 @@ class Engine: else: playlist_pos = next_unwatched_indx[0] - rec_list.append((i, { 'clips': playlist['clips'], 'position': playlist_pos, @@ -209,6 +210,8 @@ class Engine: 'tags': playlist['tags'] })) + debug_index_output["next_clip"].append((i,playlist['name'])) + #staySame pool for i in stay_same_index: @@ -217,6 +220,7 @@ class Engine: next_playlist_index.append(i) else: rec_list.append((i,{})) + debug_index_output["stay_same"].append(i) # nextPlaylist pool: randomly select playlists (excluding the playlists from the current grid). vids_exclude = [e.get("playlist") for e in prev_grid_list] @@ -224,6 +228,7 @@ class Engine: vids_exclude.remove(None) rec = self.get_recommendations(playlists, user, vids_exclude) rec_list += [(i, rec['videos'][i]) for i in next_playlist_index] + debug_index_output["new_playlist"] = [(i, rec['videos'][i]["name"]) for i in next_playlist_index] rec_list = sorted(rec_list, key=lambda k:k[0]) @@ -234,10 +239,13 @@ class Engine: }, 'videos': videos_, "_debug": { - "top_user_keywords": top_user_keywords, - "top_user_characters": top_user_characters, - "top_user_playlists": rec["top_user_playlists"], - "top_global_playlists": rec["top_global_playlists"] + "top_user_keywords": top_user_keywords, # list of (keyword, score) + "top_user_characters": top_user_characters, # list of (keyword, score) + "top_user_playlists": rec["top_user_playlists"], # list of (playlist name, score) + "top_global_playlists": rec["top_global_playlists"], # list of (playlist name, score) + "stay_same_index": debug_index_output["stay_same"], # list of integers + "next_clip_index": debug_index_output["next_clip"], # list of (integer, playlist name) + "new_playlist_index": debug_index_output["new_playlist"] # list of (integer, playlist name) } } @@ -287,11 +295,12 @@ class Engine: key=lambda playlist: -score[playlist['name']] ) # Record the following for debug view input - top_user_playlists = [{ - 'name': playlist['name'], - 'tags': playlist['tags'], - 'score': score[playlist['name']], - } for playlist in playlists[:channels['userKeywords']]] + top_user_playlists = [(playlist['name'], score[playlist['name']]) for playlist in playlists[:channels['userKeywords']]] + # top_user_playlists = [{ + # 'name': playlist['name'], + # 'tags': playlist['tags'], + # 'score': score[playlist['name']], + # } for playlist in playlists[:channels['userKeywords']]] videos = playlists[:channels['userKeywords']] playlists = playlists[channels['userKeywords']:] @@ -307,11 +316,12 @@ class Engine: key=lambda playlist: -score[playlist['name']] ) # Record the following for debug view input - top_global_playlists = [{ - 'name': playlist['name'], - 'tags': playlist['tags'], - 'score': score[playlist['name']], - } for playlist in playlists[:channels['globalKeywords']]] + top_global_playlists = [(playlist['name'], score[playlist['name']]) for playlist in playlists[:channels['globalKeywords']]] + # top_global_playlists = [{ + # 'name': playlist['name'], + # 'tags': playlist['tags'], + # 'score': score[playlist['name']], + # } for playlist in playlists[:channels['globalKeywords']]] videos += playlists[:16 - channels['userKeywords']] # Shuffle playlists (randomize layout) and shift clips (randomize start) From 8c618ab9888354ca287ff609ab4b79f6291de112 Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Sat, 1 Dec 2018 11:02:36 +0100 Subject: [PATCH 3/3] changed the magnitude of random bias in user keyword ranking --- recommendation_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 5759b3d..00cd055 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -283,7 +283,7 @@ class Engine: character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0) score = {} for playlist in playlists: - score[playlist['name']] = random.random() * 0.001 + score[playlist['name']] = random.random() * 0.1 for tag in playlist['tags']: if tag in theme_tags: score[playlist['name']] += theme_tags[tag] * themeWeights