dd-re/recommendation_engine.py

'''
Recommendation Engine Example
1 Nov 2017, 0x2620
'''

import json
import os
import random

import ox

from utils import run_async

class Engine:

    def __init__(self, path):
        self.path = path
        self.pandora = Pandora(
            url='http://pandora.dmp/api/',
            username='dd.re',
            password='dd.re'
        )
        filename = os.path.join(self.path, 'playlists.json') 
        if os.path.exists(filename):
            with open(filename) as f:
                self.playlists = json.loads(f.read())
        else:
            self.playlists = []

    def _shift_clips(self, clips):
        index = random.randrange(len(clips))
        return clips[index:] + clips[:index - 1]

    def get_videos(self, user):
        # Count tags for the user
        count = {}
        for event in user['events']:
            if 'product' in event['data']:
                count[event['data']['product']] = count.get(
                    event['data']['product'], 0
                ) + 1
        score = {}
        # For each tag in playlist, increment score by count
        for playlist in self.playlists:
            score[playlist['name']] = random.random()
            for tag in playlist['tags']:
                score[playlist['name']] += count.get(tag, 0)
        # Select 16 highest scoring playlists
        playlists = sorted(
            self.playlists,
            key=lambda playlist: -score[playlist['name']]
        )[:16]
        # Shuffle playlists (randomize layout) and shift clips (randomize start)
        random.shuffle(playlists)
        return [{
            'clips': self._shift_clips(playlist['clips']),
            'name': playlist['name']
        } for playlist in playlists]

    def update(self):
        # Get all storylines with tags
        storylines = [{
            'name': entity['name'],
            'tags': entity['tags']
        } for entity in self.pandora.find_entities({
            'conditions': [
                {'key': 'type', 'operator': '==', 'value': 'storylines'},
            ],
            'operator': '&'
        }, ['id', 'name', 'tags']) if entity.get('tags', [])]
        # Get list of storyline names
        names = list(set([storyline['name'] for storyline in storylines]))
        # Get all clips annotated with storyline references
        clips = [clip for clip in self.pandora.find_annotations({
            'conditions': [
                {'key': 'layer', 'operator': '==', 'value': 'storylines'}
            ],
            'operator': '&'
        }, ['id', 'in', 'out', 'value']) if clip['value'] in names]
        # Get list of ids for videos with clips
        ids = list(set([clip['id'].split('/')[0] for clip in clips]))
        # Get and cache video data
        filename = os.path.join(self.path, 'videos.json')
        if os.path.exists(filename):
            with open(filename) as f:
                videos_ = json.loads(f.read())
                ids_ = [video['id'] for video in videos_]
        else:
            videos_, ids_ = [], []
        videos = sorted(videos_ + [
            self.pandora.get(id, ['code', 'id', 'order', 'title'])
            for id in ids if not id in ids_
        ], key=lambda video: video['order'])
        with open(filename, 'w') as f:
            f.write(json.dumps(videos, indent=4, sort_keys=True))
        # Get video order
        order = {video['id']: video['order'] for video in videos}
        # Sort clips
        clips = sorted(
            clips,
            key=lambda clip: order[clip['id'].split('/')[0]] * 1000000 + clip['in']
        )
        # Get and cache playlists
        self.playlists = [playlist for playlist in [{
            'name': storyline['name'],
            'tags': storyline['tags'],
            'clips': [{
                'id': clip['id'],
                'in': clip['in'],
                'out': clip['out']
            } for clip in clips if clip['value'] == storyline['name']]
        } for storyline in storylines] if playlist['clips']]
        with open(os.path.join(self.path, 'playlists.json'), 'w') as f:
            f.write(json.dumps(self.playlists, indent=4, sort_keys=True))

    @run_async
    def update_async(self):
        self.update()


class Pandora:

    # pan.do/ra API wrapper

    def __init__(self, url, username, password):
        self.api = ox.API(url)
        self.api.signin(username=username, password=password)

    def find_annotations(self, query, keys):
        # print('FIND ANNOTATIONS', query, keys)
        return self.api.findAnnotations({
            'keys': keys,
            'query': query,
            'range': [0, 1000000]
        })['data']['items']

    def find_entities(self, query, keys):
        # print('FIND ENTITIES', query, keys)
        return self.api.findEntities({
            'keys': keys,
            'query': query,
            'range': [0, 1000000]
        })['data']['items']

    def get(self, id, keys):
        # print('GET', id, keys)
        return self.api.get({
            'id': id,
            'keys': keys
        })['data']


if __name__ == '__main__':
    engine = Engine('json')
    engine.update()
rename 2017-11-01 16:38:58 +00:00			`'''`
formatting 2017-11-01 22:56:33 +00:00			`Recommendation Engine Example`
			`1 Nov 2017, 0x2620`
rename 2017-11-01 16:38:58 +00:00			`'''`

			`import json`
			`import os`
			`import random`

			`import ox`

run every 15m 2017-11-02 08:40:02 +00:00			`from utils import run_async`

rename 2017-11-01 16:38:58 +00:00			`class Engine:`

			`def __init__(self, path):`
			`self.path = path`
			`self.pandora = Pandora(`
			`url='http://pandora.dmp/api/',`
			`username='dd.re',`
			`password='dd.re'`
			`)`
			`filename = os.path.join(self.path, 'playlists.json')`
			`if os.path.exists(filename):`
			`with open(filename) as f:`
			`self.playlists = json.loads(f.read())`
			`else:`
			`self.playlists = []`

			`def _shift_clips(self, clips):`
			`index = random.randrange(len(clips))`
			`return clips[index:] + clips[:index - 1]`

			`def get_videos(self, user):`
implement get_videos 2017-11-01 18:14:15 +00:00			`# Count tags for the user`
			`count = {}`
rename 2017-11-01 16:38:58 +00:00			`for event in user['events']:`
			`if 'product' in event['data']:`
implement get_videos 2017-11-01 18:14:15 +00:00			`count[event['data']['product']] = count.get(`
			`event['data']['product'], 0`
			`) + 1`
			`score = {}`
			`# For each tag in playlist, increment score by count`
			`for playlist in self.playlists:`
			`score[playlist['name']] = random.random()`
			`for tag in playlist['tags']:`
			`score[playlist['name']] += count.get(tag, 0)`
update comments 2017-11-02 11:12:56 +00:00			`# Select 16 highest scoring playlists`
implement get_videos 2017-11-01 18:14:15 +00:00			`playlists = sorted(`
			`self.playlists,`
			`key=lambda playlist: -score[playlist['name']]`
			`)[:16]`
			`# Shuffle playlists (randomize layout) and shift clips (randomize start)`
			`random.shuffle(playlists)`
			`return [{`
			`'clips': self._shift_clips(playlist['clips']),`
			`'name': playlist['name']`
			`} for playlist in playlists]`
rename 2017-11-01 16:38:58 +00:00
			`def update(self):`
			`# Get all storylines with tags`
			`storylines = [{`
			`'name': entity['name'],`
			`'tags': entity['tags']`
			`} for entity in self.pandora.find_entities({`
			`'conditions': [`
			`{'key': 'type', 'operator': '==', 'value': 'storylines'},`
			`],`
			`'operator': '&'`
			`}, ['id', 'name', 'tags']) if entity.get('tags', [])]`
			`# Get list of storyline names`
			`names = list(set([storyline['name'] for storyline in storylines]))`
			`# Get all clips annotated with storyline references`
			`clips = [clip for clip in self.pandora.find_annotations({`
			`'conditions': [`
			`{'key': 'layer', 'operator': '==', 'value': 'storylines'}`
			`],`
			`'operator': '&'`
			`}, ['id', 'in', 'out', 'value']) if clip['value'] in names]`
			`# Get list of ids for videos with clips`
			`ids = list(set([clip['id'].split('/')[0] for clip in clips]))`
implement get_videos 2017-11-01 18:14:15 +00:00			`# Get and cache video data`
rename 2017-11-01 16:38:58 +00:00			`filename = os.path.join(self.path, 'videos.json')`
			`if os.path.exists(filename):`
			`with open(filename) as f:`
			`videos_ = json.loads(f.read())`
			`ids_ = [video['id'] for video in videos_]`
			`else:`
			`videos_, ids_ = [], []`
			`videos = sorted(videos_ + [`
			`self.pandora.get(id, ['code', 'id', 'order', 'title'])`
			`for id in ids if not id in ids_`
			`], key=lambda video: video['order'])`
			`with open(filename, 'w') as f:`
			`f.write(json.dumps(videos, indent=4, sort_keys=True))`
implement get_videos 2017-11-01 18:14:15 +00:00			`# Get video order`
rename 2017-11-01 16:38:58 +00:00			`order = {video['id']: video['order'] for video in videos}`
			`# Sort clips`
			`clips = sorted(`
			`clips,`
			`key=lambda clip: order[clip['id'].split('/')[0]] * 1000000 + clip['in']`
			`)`
implement get_videos 2017-11-01 18:14:15 +00:00			`# Get and cache playlists`
rename 2017-11-01 16:38:58 +00:00			`self.playlists = [playlist for playlist in [{`
			`'name': storyline['name'],`
			`'tags': storyline['tags'],`
implement get_videos 2017-11-01 18:14:15 +00:00			`'clips': [{`
			`'id': clip['id'],`
			`'in': clip['in'],`
			`'out': clip['out']`
			`} for clip in clips if clip['value'] == storyline['name']]`
rename 2017-11-01 16:38:58 +00:00			`} for storyline in storylines] if playlist['clips']]`
			`with open(os.path.join(self.path, 'playlists.json'), 'w') as f:`
			`f.write(json.dumps(self.playlists, indent=4, sort_keys=True))`

run every 15m 2017-11-02 08:40:02 +00:00			`@run_async`
			`def update_async(self):`
			`self.update()`

formatting 2017-11-01 22:56:33 +00:00
			`class Pandora:`

update comments 2017-11-02 11:12:56 +00:00			`# pan.do/ra API wrapper`

formatting 2017-11-01 22:56:33 +00:00			`def __init__(self, url, username, password):`
			`self.api = ox.API(url)`
			`self.api.signin(username=username, password=password)`

			`def find_annotations(self, query, keys):`
			`# print('FIND ANNOTATIONS', query, keys)`
			`return self.api.findAnnotations({`
			`'keys': keys,`
			`'query': query,`
			`'range': [0, 1000000]`
			`})['data']['items']`

			`def find_entities(self, query, keys):`
			`# print('FIND ENTITIES', query, keys)`
			`return self.api.findEntities({`
			`'keys': keys,`
			`'query': query,`
			`'range': [0, 1000000]`
			`})['data']['items']`

			`def get(self, id, keys):`
			`# print('GET', id, keys)`
			`return self.api.get({`
			`'id': id,`
			`'keys': keys`
			`})['data']`


rename 2017-11-01 16:38:58 +00:00			`if __name__ == '__main__':`
			`engine = Engine('json')`
			`engine.update()`