dd-re/recommendation_engine.py

'''
Reccomendation Engine Example
'''

import json
import os
import random

import ox

class Pandora:

    def __init__(self, url, username, password):
        self.api = ox.API(url)
        self.api.signin(username=username, password=password)

    def find_annotations(self, query, keys):
        # print('FIND ANNOTATIONS', query, keys)
        return self.api.findAnnotations({
            'keys': keys,
            'query': query,
            'range': [0, 1000000]
        })['data']['items']

    def find_entities(self, query, keys):
        # print('FIND ENTITIES', query, keys)
        return self.api.findEntities({
            'keys': keys,
            'query': query,
            'range': [0, 1000000]
        })['data']['items']

    def get(self, id, keys):
        # print('GET', id, keys)
        return self.api.get({
            'id': id,
            'keys': keys
        })['data']

class Engine:

    def __init__(self, path):
        self.path = path
        self.pandora = Pandora(
            url='http://pandora.dmp/api/',
            username='dd.re',
            password='dd.re'
        )
        filename = os.path.join(self.path, 'playlists.json')
        if os.path.exists(filename):
            with open(filename) as f:
                self.playlists = json.loads(f.read())
        else:
            self.playlists = []

    def _shift_clips(self, clips):
        index = random.randrange(len(clips))
        return clips[index:] + clips[:index - 1]

    def get_videos(self, user):
        products = []
        for event in user['events']:
            if 'product' in event['data']:
                products.append(event['data']['product'])

    def update(self):
        # Get all storylines with tags
        storylines = [{
            'name': entity['name'],
            'tags': entity['tags']
        } for entity in self.pandora.find_entities({
            'conditions': [
                {'key': 'type', 'operator': '==', 'value': 'storylines'},
            ],
            'operator': '&'
        }, ['id', 'name', 'tags']) if entity.get('tags', [])]
        # Get list of storyline names
        names = list(set([storyline['name'] for storyline in storylines]))
        # Get all clips annotated with storyline references
        clips = [clip for clip in self.pandora.find_annotations({
            'conditions': [
                {'key': 'layer', 'operator': '==', 'value': 'storylines'}
            ],
            'operator': '&'
        }, ['id', 'in', 'out', 'value']) if clip['value'] in names]
        # Get list of ids for videos with clips
        ids = list(set([clip['id'].split('/')[0] for clip in clips]))
        # Get (and cache) order (and code + name) for each video
        filename = os.path.join(self.path, 'videos.json')
        if os.path.exists(filename):
            with open(filename) as f:
                videos_ = json.loads(f.read())
                ids_ = [video['id'] for video in videos_]
        else:
            videos_, ids_ = [], []
        videos = sorted(videos_ + [
            self.pandora.get(id, ['code', 'id', 'order', 'title'])
            for id in ids if not id in ids_
        ], key=lambda video: video['order'])
        with open(filename, 'w') as f:
            f.write(json.dumps(videos, indent=4, sort_keys=True))
        order = {video['id']: video['order'] for video in videos}
        # Sort clips
        clips = sorted(
            clips,
            key=lambda clip: order[clip['id'].split('/')[0]] * 1000000 + clip['in']
        )
        # Get playlists
        self.playlists = [playlist for playlist in [{
            'name': storyline['name'],
            'tags': storyline['tags'],
            'clips': [
                '{}_{:.3f}-{:.3f}'.format(
                    clip['id'].split('/')[0], clip['in'], clip['out']
                ) for clip in clips if clip['value'] == storyline['name']
            ]
        } for storyline in storylines] if playlist['clips']]
        with open(os.path.join(self.path, 'playlists.json'), 'w') as f:
            f.write(json.dumps(self.playlists, indent=4, sort_keys=True))

if __name__ == '__main__':
    engine = Engine('json')
    engine.update()