''' Recommendation Engine Example 1 Nov 2017, 0x2620 ''' from collections import defaultdict import json import os import random import ox from utils import run_async class Engine: def __init__(self, path, **kwargs): self.path = path self.pandora = Pandora( url=kwargs.get('pandora', 'http://pandora.dmp/api/'), username=kwargs.get('username', 'dd.re'), password=kwargs.get('password', 'dd.re') ) filename = os.path.join(self.path, 'playlists.json') if os.path.exists(filename): with open(filename) as f: self.playlists = json.loads(f.read()) else: self.playlists = [] def _patch_clips(self, clips): inpoints = {} for index, clip in enumerate(clips): video_id = clip['id'].split('/')[0] inpoints[video_id] = inpoints.get(video_id, []) + [{ 'index': index, 'position': clip['in'] }] for video_id in inpoints: for i, inpoint in enumerate(sorted( inpoints[video_id], key=lambda inpoint: inpoint['position'] )): if i < len(inpoints[video_id]) - 1: clips[inpoint['index']]['out'] = inpoints[video_id][i + 1]['position'] else: clips[inpoint['index']]['out'] = self.pandora.get(video_id, ['duration'])['duration'] return clips def get_videos(self, user): channels = {'keywords': 7, 'screenings': 7, 'random': 2} sliders = {'dau': -1, 'physics': 0, 'sex': 1} # For each playlist, compute keyword score score = {} for playlist in self.playlists: score[playlist['name']] = random.random() for tag in [tag for tag in playlist['tags'] if tag in sliders]: score[playlist['name']] += sliders[tag] # Select highest scoring playlists playlists = sorted( self.playlists, key=lambda playlist: -score[playlist['name']] ) videos = playlists[:channels['keywords']] playlists = playlists[channels['keywords']:] # Count tags for the user count = defaultdict(lambda: 0) for event in user.get('events', []): count[event['data']['product']] += 1 # For each tag in playlist, increment score by count for playlist in playlists: score[playlist['name']] = random.random() for tag in [tag for tag in playlist['tags'] if tag not in sliders]: score[playlist['name']] += count[tag] # Select highest scoring playlists videos += sorted( playlists, key=lambda playlist: -score[playlist['name']] )[:16 - channels['keywords']] # Shuffle playlists (randomize layout) and shift clips (randomize start) random.shuffle(videos) return [{ 'clips': video['clips'], 'position': random.randrange(len(video['clips'])), 'name': video['name'] } for video in videos] def update(self): # Get all storylines with tags storylines = [{ 'id': entity['id'], 'name': entity['name'], 'nodename': entity['nodename'], 'tags': entity['tags'] } for entity in self.pandora.find_entities({ 'conditions': [ {'key': 'type', 'operator': '==', 'value': 'storylines'}, ], 'operator': '&' }, ['id', 'name', 'tags', 'nodename']) if entity.get('tags', [])] # Get list of storyline names names = list(set([storyline['name'] for storyline in storylines])) # Get all clips annotated with storyline references clips = [clip for clip in self.pandora.find_annotations({ 'conditions': [ {'key': 'layer', 'operator': '==', 'value': 'storylines'} ], 'operator': '&' }, ['id', 'in', 'out', 'value']) if clip['value'] in names] # Get list of ids for videos with clips ids = list(set([clip['id'].split('/')[0] for clip in clips])) # Get and cache video data filename = os.path.join(self.path, 'videos.json') if os.path.exists(filename): with open(filename) as f: videos_ = json.loads(f.read()) ids_ = [video['id'] for video in videos_] else: videos_, ids_ = [], [] videos = sorted(videos_ + [ self.pandora.get(id, ['code', 'id', 'order', 'title']) for id in ids if not id in ids_ ], key=lambda video: video['order']) with open(filename, 'w') as f: f.write(json.dumps(videos, indent=4, sort_keys=True)) # Get video order order = {video['id']: video['order'] for video in videos} # Sort clips clips = sorted( clips, key=lambda clip: order[clip['id'].split('/')[0]] * 1000000 + clip['in'] ) # Get and cache playlists self.playlists = [playlist for playlist in [{ 'id': storyline['id'], 'name': storyline['nodename'], 'tags': storyline['tags'], 'clips': self._patch_clips([{ 'id': clip['id'], 'in': clip['in'], 'out': clip['out'] } for clip in clips if clip['value'] == storyline['name']]) } for storyline in storylines] if playlist['clips']] with open(os.path.join(self.path, 'playlists.json'), 'w') as f: f.write(json.dumps(self.playlists, indent=4, sort_keys=True)) @run_async def update_async(self): self.update() class Pandora: # pan.do/ra API wrapper def __init__(self, url, username, password): self.api = ox.API(url) self.api.signin(username=username, password=password) def find_annotations(self, query, keys): # print('FIND ANNOTATIONS', query, keys) return self.api.findAnnotations({ 'keys': keys, 'query': query, 'range': [0, 1000000] })['data']['items'] def find_entities(self, query, keys): # print('FIND ENTITIES', query, keys) return self.api.findEntities({ 'keys': keys, 'query': query, 'range': [0, 1000000] })['data']['items'] def get(self, id, keys): # print('GET', id, keys) return self.api.get({ 'id': id, 'keys': keys })['data'] if __name__ == '__main__': engine = Engine('json') engine.update()