From 024c1008fb47aa3e374f84878b643fb5afce7c8b Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Wed, 26 Sep 2018 22:14:58 +0200 Subject: [PATCH 1/2] implemented user keyword score ranking --- recommendation_engine.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 55fbe53..173550f 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -43,9 +43,9 @@ class Engine: else: self.state = { 'channels': { - 'globalKeywords': {'locked': False, 'value': 7}, - 'userKeywords': {'locked': False, 'value': 7}, - 'screenings': {'locked': True, 'value': 2} + 'globalKeywords': {'locked': False, 'value': 8}, + 'userKeywords': {'locked': False, 'value': 8} + # 'screenings': {'locked': True, 'value': 2} }, 'globalKeywords': {}, } @@ -55,6 +55,11 @@ class Engine: 'nextPlaylist': {'locked': False, 'value': 4}, 'staySame': {'locked': False, 'value': 8} } + if 'userKeywordsWeights' not in self.state: + self.state['userKeywordsWeights'] = { + 'themeTags': {'locked': False, 'value': 0.3}, + 'characterTags': {'locked': False, 'value': 0.7} + } self.update_keywords() @property @@ -203,6 +208,7 @@ class Engine: channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} gridChange = {k: v.get('value', 0) for k, v in self.state['gridChange'].items()} + userKeywordsWeights = {k: v.get('value', 1) for k, v in self.state['userKeywordsWeights'].items()} # Exclude playlists from the most recent grid playlists = copy.deepcopy(self.playlists) @@ -211,13 +217,26 @@ class Engine: if playlist["name"] in vids_exclude: playlists.remove(playlist) - # For each playlist, compute user keyword score - user_keywords = user.get('keywords', {}) + # For each playlist, compute user keyword score by theme and character tags + user_keywords = copy.deepcopy(user.get('keywords', {})) + theme_tags = {k.lower():v for k,v in user_keywords.items() if not k.isupper()} + character_tags = {k:v for k,v in user_keywords.items() if k.isupper()} + # manually modify some of the user keywords to match the playlist tags + theme_tags["god"] = theme_tags.get("god - gods",0) + theme_tags["visionary"] = theme_tags.get("visionary - enlightenment",0) + theme_tags["enlightenment"] = theme_tags.get("visionary - enlightenment",0) + character_tags["FEDOR MIKHAILOVICH SOFRONOV"] = character_tags.get("FYODOR MIKHAILOVICH SOFRONOV",0) + character_tags["SHKABARNYA OLGA SERGEEVNA"] = character_tags.get("OLGA SERGEEVNA SHKABARNYA",0) + character_tags["VICTORIA OLEGOVNA SKITSKAYA"] = character_tags.get("VIKTORIA OLEGOVNA SKITSKAYA",0) + score = {} for playlist in playlists: - score[playlist['name']] = random.random() - for tag in [tag for tag in playlist['tags'] if tag in user_keywords]: - score[playlist['name']] += user_keywords[tag] + score[playlist['name']] = random.random() * 0.001 + for tag in playlist['tags']: + if tag in theme_tags: + score[playlist['name']] += theme_tags[tag] * userKeywordsWeights["themeTags"] + elif tag in character_tags: + score[playlist['name']] += character_tags[tag] * userKeywordsWeights["characterTags"] # Select highest scoring playlists playlists = sorted( playlists, From 81cfe9c9d82b699a055534d874252149a7c8bd80 Mon Sep 17 00:00:00 2001 From: pythagoraswitch Date: Wed, 26 Sep 2018 23:56:29 +0200 Subject: [PATCH 2/2] removed the outdated portion: screening parameter etc --- recommendation_engine.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/recommendation_engine.py b/recommendation_engine.py index 173550f..b3d9e4f 100644 --- a/recommendation_engine.py +++ b/recommendation_engine.py @@ -45,7 +45,6 @@ class Engine: 'channels': { 'globalKeywords': {'locked': False, 'value': 8}, 'userKeywords': {'locked': False, 'value': 8} - # 'screenings': {'locked': True, 'value': 2} }, 'globalKeywords': {}, } @@ -202,8 +201,7 @@ class Engine: } -# NOTE for future improvement: vids_exclude element unit could be clip or in/out time pairs, rather than playlist. -# The same playlist could be played in the grid view as long as these are differenct clips or separate times. + def get_recommendations(self, user, vids_exclude = []): channels = {k: v.get('value', 0) for k, v in self.state['channels'].items()} sliders = {k: v.get('value', 0) for k, v in self.state['globalKeywords'].items()} @@ -255,23 +253,7 @@ class Engine: playlists, key=lambda playlist: -score[playlist['name']] ) - videos += playlists[:channels['globalKeywords']] - playlists = playlists[channels['globalKeywords']:] - # Count products the user has seen - count = defaultdict(lambda: 0) - for event in user.get('events', []): - if event.get('data', {}).get('product'): - count[event['data']['product']] += 1 - # For each product in playlist tags, increment score by count - for playlist in playlists: - score[playlist['name']] = random.random() - for tag in set(playlist['tags']) & set(count): - score[playlist['name']] += count[tag] - # Select highest scoring playlists - videos += sorted( - playlists, - key=lambda playlist: -score[playlist['name']] - )[:16 - channels['userKeywords'] - channels['globalKeywords']] + videos += playlists[:16 - channels['userKeywords']] # Shuffle playlists (randomize layout) and shift clips (randomize start) random.shuffle(videos) return [{