2017-11-01 16:38:58 +00:00
'''
2017-11-01 22:56:33 +00:00
Recommendation Engine Example
1 Nov 2017 , 0x2620
2017-11-01 16:38:58 +00:00
'''
2018-01-24 15:48:50 +00:00
from collections import defaultdict
2017-11-01 16:38:58 +00:00
import json
2018-02-05 10:47:38 +00:00
import logging
2017-11-01 16:38:58 +00:00
import os
import random
2018-02-05 10:47:38 +00:00
import time
2018-08-12 14:33:05 +00:00
import copy
2017-11-01 16:38:58 +00:00
import ox
2017-11-02 08:40:02 +00:00
from utils import run_async
2018-02-05 10:47:38 +00:00
logger = logging . getLogger ( __name__ )
2018-07-25 11:12:36 +00:00
verbose = True
2018-02-05 10:47:38 +00:00
2017-11-01 16:38:58 +00:00
class Engine :
2018-02-05 10:47:38 +00:00
_pandora = None
2017-11-01 16:38:58 +00:00
2018-01-18 20:32:14 +00:00
def __init__ ( self , path , * * kwargs ) :
2017-11-01 16:38:58 +00:00
self . path = path
2018-02-05 10:47:38 +00:00
self . pandora_args = dict (
2018-01-18 20:32:14 +00:00
url = kwargs . get ( ' pandora ' , ' http://pandora.dmp/api/ ' ) ,
username = kwargs . get ( ' username ' , ' dd.re ' ) ,
password = kwargs . get ( ' password ' , ' dd.re ' )
2017-11-01 16:38:58 +00:00
)
2018-02-05 14:15:39 +00:00
filename = os . path . join ( self . path , ' playlists.json ' )
2017-11-01 16:38:58 +00:00
if os . path . exists ( filename ) :
with open ( filename ) as f :
2018-02-05 14:15:39 +00:00
self . playlists = json . load ( f )
2018-11-20 00:41:37 +00:00
# ## the following is for testing purpose.
# for playlist in self.playlists:
# for clip in playlist["clips"]:
# clip["pass"] = bool(random.getrandbits(1))
2017-11-01 16:38:58 +00:00
else :
self . playlists = [ ]
2018-02-05 14:15:39 +00:00
filename = os . path . join ( self . path , ' state.json ' )
if os . path . exists ( filename ) :
with open ( filename ) as f :
self . state = json . load ( f )
else :
self . state = {
' channels ' : {
2018-09-26 20:14:58 +00:00
' globalKeywords ' : { ' locked ' : False , ' value ' : 8 } ,
' userKeywords ' : { ' locked ' : False , ' value ' : 8 }
2018-02-05 14:15:39 +00:00
} ,
2018-04-21 15:36:23 +00:00
' globalKeywords ' : { } ,
2018-08-02 22:43:13 +00:00
}
if ' gridChange ' not in self . state :
self . state [ ' gridChange ' ] = {
2018-08-02 22:58:54 +00:00
' nextClip ' : { ' locked ' : True , ' value ' : 4 } ,
2018-08-02 22:43:13 +00:00
' nextPlaylist ' : { ' locked ' : False , ' value ' : 4 } ,
' staySame ' : { ' locked ' : False , ' value ' : 8 }
2018-02-05 14:15:39 +00:00
}
2018-09-26 20:14:58 +00:00
if ' userKeywordsWeights ' not in self . state :
self . state [ ' userKeywordsWeights ' ] = {
' themeTags ' : { ' locked ' : False , ' value ' : 0.3 } ,
' characterTags ' : { ' locked ' : False , ' value ' : 0.7 }
}
2018-02-05 14:15:39 +00:00
self . update_keywords ( )
2018-02-05 10:47:38 +00:00
@property
def pandora ( self ) :
while not self . _pandora :
try :
self . _pandora = Pandora ( * * self . pandora_args )
except :
logger . error ( ' failed to connect to pandora, retry in 10 seconds ' )
time . sleep ( 10 )
return self . _pandora
2018-01-26 10:48:40 +00:00
def _patch_clips ( self , clips ) :
inpoints = { }
for index , clip in enumerate ( clips ) :
video_id = clip [ ' id ' ] . split ( ' / ' ) [ 0 ]
inpoints [ video_id ] = inpoints . get ( video_id , [ ] ) + [ {
' index ' : index ,
' position ' : clip [ ' in ' ]
} ]
for video_id in inpoints :
2018-01-26 11:01:36 +00:00
for i , inpoint in enumerate ( sorted (
2018-01-26 10:48:40 +00:00
inpoints [ video_id ] , key = lambda inpoint : inpoint [ ' position ' ]
2018-01-26 11:01:36 +00:00
) ) :
2018-01-26 10:48:40 +00:00
if i < len ( inpoints [ video_id ] ) - 1 :
2018-01-26 11:01:36 +00:00
clips [ inpoint [ ' index ' ] ] [ ' out ' ] = inpoints [ video_id ] [ i + 1 ] [ ' position ' ]
2018-01-26 10:48:40 +00:00
else :
2018-01-26 11:01:36 +00:00
clips [ inpoint [ ' index ' ] ] [ ' out ' ] = self . pandora . get ( video_id , [ ' duration ' ] ) [ ' duration ' ]
2018-01-26 10:48:40 +00:00
return clips
2018-08-03 14:30:02 +00:00
2017-11-01 16:38:58 +00:00
def get_videos ( self , user ) :
2018-11-16 22:47:02 +00:00
# Update self_playlists first to reflect changes
2018-11-19 18:52:00 +00:00
playlists = self . update_user_playlists ( user )
2018-11-20 01:22:09 +00:00
# Get the user keyword scores for debug view
user_keywords = copy . deepcopy ( user . get ( ' keywords ' , { } ) )
theme_tags = { k . lower ( ) : v for k , v in user_keywords . items ( ) if not k . isupper ( ) }
character_tags = { k : v for k , v in user_keywords . items ( ) if k . isupper ( ) }
top_user_keywords = sorted ( [ ( k , v ) for ( k , v ) in theme_tags . items ( ) ] , key = lambda kv : kv [ 1 ] ) [ - 5 : ]
top_user_characters = sorted ( [ ( k , v ) for ( k , v ) in character_tags . items ( ) ] , key = lambda kv : kv [ 1 ] ) [ - 5 : ]
2018-08-03 14:30:02 +00:00
2018-09-25 15:50:52 +00:00
if user . get ( ' events ' , [ { } ] ) [ 0 ] . get ( " event " ) == " login " :
return {
' user ' : {
' keywords ' : user . get ( ' keywords ' , { } )
} ,
2018-11-20 01:22:09 +00:00
' videos ' : self . get_recommendations ( playlists , user ) ,
" _debug " : {
" top_user_keywords " : top_user_keywords ,
" top_user_characters " : top_user_characters
}
2018-09-25 15:50:52 +00:00
}
2018-08-03 14:30:02 +00:00
2018-07-25 11:12:36 +00:00
channels = { k : v . get ( ' value ' , 0 ) for k , v in self . state [ ' channels ' ] . items ( ) }
sliders = { k : v . get ( ' value ' , 0 ) for k , v in self . state [ ' globalKeywords ' ] . items ( ) }
grid_change = { k : v . get ( ' value ' , 0 ) for k , v in self . state [ ' gridChange ' ] . items ( ) }
# check if there were grid events for all indexes.
grid_events = { }
( nc , np , ns ) = ( grid_change . get ( " nextClip " ) , grid_change . get ( " nextPlaylist " ) , grid_change . get ( " staySame " ) )
2018-11-16 22:47:02 +00:00
video_num = nc + np + ns
2018-07-25 11:12:36 +00:00
2018-11-16 22:47:02 +00:00
# # The version where the loop also extract play_index; requires "index" in play event data (previously unavailable):
2018-08-02 22:06:49 +00:00
play_index = None
for event in user . get ( ' events ' , [ ] ) :
if event . get ( ' event ' ) == " grid " and event . get ( ' data ' ) . get ( ' index ' ) not in grid_events :
grid_events [ event . get ( ' data ' ) . get ( ' index ' ) ] = event . get ( ' data ' )
2018-09-13 18:07:45 +00:00
if event . get ( ' event ' ) == " play " and event [ " data " ] . get ( " type " ) == " video " and not play_index :
2018-11-16 22:47:02 +00:00
play_index = event . get ( ' data ' ) . get ( ' index ' )
2018-08-02 22:06:49 +00:00
if len ( grid_events ) == video_num and play_index :
2018-11-16 22:47:02 +00:00
break
2018-07-25 14:13:43 +00:00
2018-07-25 11:12:36 +00:00
prev_grid_list = sorted ( [ v for v in grid_events . values ( ) ] , key = lambda k : k [ ' index ' ] )
# if there were no grid events for all, initialize all grids.
2018-11-16 22:47:02 +00:00
if len ( prev_grid_list ) < video_num :
2018-09-25 15:50:52 +00:00
return {
' user ' : {
' keywords ' : user . get ( ' keywords ' , { } )
} ,
2018-11-20 01:22:09 +00:00
' videos ' : self . get_recommendations ( playlists , user ) ,
" _debug " : {
" top_user_keywords " : top_user_keywords ,
" top_user_characters " : top_user_characters
}
2018-09-25 15:50:52 +00:00
}
2018-07-25 11:12:36 +00:00
else :
2018-08-03 10:16:46 +00:00
if play_index is None :
video_indx = list ( range ( video_num ) )
random . shuffle ( video_indx )
else :
2018-08-02 22:06:49 +00:00
# played index is excluded from the random shuffle and deterministically added to staySame pool.
2018-08-03 10:16:46 +00:00
video_indx = [ * range ( play_index ) ] + [ * range ( play_index + 1 , video_num ) ]
random . shuffle ( video_indx )
video_indx . append ( play_index )
2018-08-02 22:06:49 +00:00
next_clip_index = video_indx [ : nc ]
next_playlist_index = video_indx [ nc : nc + np ]
stay_same_index = video_indx [ nc + np : ]
2018-07-25 11:12:36 +00:00
rec_list = [ ]
2018-11-19 21:24:41 +00:00
# Make sure the playlist exists for staySame pool; otherwise add it to nextPlaylist pool.
for i in stay_same_index :
if prev_grid_list [ i ] . get ( " playlist " ) not in [ playlist [ " name " ] for playlist in playlists ] :
next_playlist_index . append ( stay_same_index )
# select next clip for nextClip pool except when the playlist has only one clip. It skips the clip with "pass":True when selecting the next clip.
2018-08-02 22:06:49 +00:00
for i in next_clip_index :
2018-11-19 21:24:41 +00:00
if prev_grid_list [ i ] . get ( " playlist " ) not in [ playlist [ " name " ] for playlist in playlists ] :
# add this to deal with the absence of "playlist" data in old grid event or the case where the playlist has been eliminated.
2018-08-03 10:16:46 +00:00
next_playlist_index . append ( next_clip_index )
break
else :
2018-11-19 18:52:00 +00:00
for playlist in playlists :
2018-08-03 10:16:46 +00:00
if playlist . get ( ' name ' ) == prev_grid_list [ i ] . get ( ' playlist ' ) :
2018-11-19 21:24:41 +00:00
unwatched_clips_indx = [ j for j in range ( len ( playlist [ " clips " ] ) ) if playlist [ " clips " ] [ j ] . get ( " pass " , False ) != True ]
2018-08-03 10:16:46 +00:00
if len ( playlist [ " clips " ] ) == 1 :
next_playlist_index . append ( i )
break
# Discuss how this behavour should be: should it switch to a new playlist if it is the end of the playlist clip sequence already?
elif prev_grid_list [ i ] . get ( ' playlistPosition ' , 0 ) + 1 == len ( playlist [ ' clips ' ] ) :
2018-11-19 21:24:41 +00:00
if unwatched_clips_indx [ 0 ] != prev_grid_list [ i ] [ ' playlistPosition ' ] :
playlist_pos = unwatched_clips_indx [ 0 ]
else :
next_playlist_index . append ( i )
2018-08-03 10:16:46 +00:00
else :
2018-11-19 21:24:41 +00:00
if len ( [ j for j in unwatched_clips_indx if j > prev_grid_list [ i ] [ ' playlistPosition ' ] ] ) == 0 :
if unwatched_clips_indx [ 0 ] != prev_grid_list [ i ] [ ' playlistPosition ' ] :
playlist_pos = unwatched_clips_indx [ 0 ]
else :
next_playlist_index . append ( i )
else :
playlist_pos = [ j for j in unwatched_clips_indx if j > prev_grid_list [ i ] [ ' playlistPosition ' ] ] [ 0 ]
2018-08-03 10:16:46 +00:00
rec_list . append ( ( i , {
' clips ' : playlist [ ' clips ' ] ,
# 'position': random.randrange(len(playlist['clips'])),
' position ' : playlist_pos ,
' name ' : playlist [ ' name ' ] ,
' tags ' : playlist [ ' tags ' ] ,
} ) )
2018-07-25 11:12:36 +00:00
# randomly select playlists (excluding the playlists from the current grid once "playlist" is recorded for grid events)
2018-07-25 14:07:31 +00:00
# for nextPlaylist pool.
2018-08-02 22:06:49 +00:00
vids_exclude = [ e . get ( " playlist " ) for e in prev_grid_list ]
2018-08-03 10:16:46 +00:00
while None in vids_exclude :
vids_exclude . remove ( None )
2018-11-19 18:52:00 +00:00
video = self . get_recommendations ( playlists , user , vids_exclude )
2018-08-02 22:06:49 +00:00
rec_list + = [ ( i , video [ i ] ) for i in next_playlist_index ]
2018-07-25 11:12:36 +00:00
#staySame pool
2018-08-02 22:06:49 +00:00
rec_list + = [ ( i , { } ) for i in stay_same_index ]
2018-07-25 11:12:36 +00:00
rec_list = sorted ( rec_list , key = lambda k : k [ 0 ] )
2018-09-21 20:09:45 +00:00
videos_ = [ e [ 1 ] for e in rec_list ]
return {
' user ' : {
' keywords ' : user . get ( ' keywords ' , { } )
} ,
2018-11-20 01:22:09 +00:00
' videos ' : videos_ ,
" _debug " : {
" top_user_keywords " : top_user_keywords ,
" top_user_characters " : top_user_characters
}
2018-09-21 20:09:45 +00:00
}
2018-07-25 11:12:36 +00:00
2018-09-26 21:56:29 +00:00
2018-11-19 18:52:00 +00:00
def get_recommendations ( self , playlists , user , vids_exclude = [ ] ) :
2018-02-05 14:25:46 +00:00
channels = { k : v . get ( ' value ' , 0 ) for k , v in self . state [ ' channels ' ] . items ( ) }
2018-02-15 16:06:18 +00:00
sliders = { k : v . get ( ' value ' , 0 ) for k , v in self . state [ ' globalKeywords ' ] . items ( ) }
2018-07-20 13:59:42 +00:00
gridChange = { k : v . get ( ' value ' , 0 ) for k , v in self . state [ ' gridChange ' ] . items ( ) }
2018-09-26 20:14:58 +00:00
userKeywordsWeights = { k : v . get ( ' value ' , 1 ) for k , v in self . state [ ' userKeywordsWeights ' ] . items ( ) }
2018-07-25 11:12:36 +00:00
# Exclude playlists from the most recent grid
if len ( vids_exclude ) > 0 :
for playlist in playlists :
if playlist [ " name " ] in vids_exclude :
playlists . remove ( playlist )
2018-09-26 20:14:58 +00:00
# For each playlist, compute user keyword score by theme and character tags
user_keywords = copy . deepcopy ( user . get ( ' keywords ' , { } ) )
theme_tags = { k . lower ( ) : v for k , v in user_keywords . items ( ) if not k . isupper ( ) }
character_tags = { k : v for k , v in user_keywords . items ( ) if k . isupper ( ) }
# manually modify some of the user keywords to match the playlist tags
theme_tags [ " god " ] = theme_tags . get ( " god - gods " , 0 )
theme_tags [ " visionary " ] = theme_tags . get ( " visionary - enlightenment " , 0 )
theme_tags [ " enlightenment " ] = theme_tags . get ( " visionary - enlightenment " , 0 )
character_tags [ " FEDOR MIKHAILOVICH SOFRONOV " ] = character_tags . get ( " FYODOR MIKHAILOVICH SOFRONOV " , 0 )
character_tags [ " SHKABARNYA OLGA SERGEEVNA " ] = character_tags . get ( " OLGA SERGEEVNA SHKABARNYA " , 0 )
character_tags [ " VICTORIA OLEGOVNA SKITSKAYA " ] = character_tags . get ( " VIKTORIA OLEGOVNA SKITSKAYA " , 0 )
2018-02-15 16:06:18 +00:00
score = { }
2018-07-25 11:12:36 +00:00
for playlist in playlists :
2018-09-26 20:14:58 +00:00
score [ playlist [ ' name ' ] ] = random . random ( ) * 0.001
for tag in playlist [ ' tags ' ] :
if tag in theme_tags :
score [ playlist [ ' name ' ] ] + = theme_tags [ tag ] * userKeywordsWeights [ " themeTags " ]
elif tag in character_tags :
score [ playlist [ ' name ' ] ] + = character_tags [ tag ] * userKeywordsWeights [ " characterTags " ]
2018-02-15 16:06:18 +00:00
# Select highest scoring playlists
playlists = sorted (
2018-07-25 11:12:36 +00:00
playlists ,
2018-02-15 16:06:18 +00:00
key = lambda playlist : - score [ playlist [ ' name ' ] ]
)
videos = playlists [ : channels [ ' userKeywords ' ] ]
playlists = playlists [ channels [ ' userKeywords ' ] : ]
# For each playlist, compute global keyword score
2018-01-22 11:55:02 +00:00
score = { }
2018-05-23 19:29:18 +00:00
for playlist in playlists :
2018-01-22 11:55:02 +00:00
score [ playlist [ ' name ' ] ] = random . random ( )
for tag in [ tag for tag in playlist [ ' tags ' ] if tag in sliders ] :
score [ playlist [ ' name ' ] ] + = sliders [ tag ]
# Select highest scoring playlists
playlists = sorted (
2018-05-23 19:29:18 +00:00
playlists ,
2018-01-22 11:55:02 +00:00
key = lambda playlist : - score [ playlist [ ' name ' ] ]
)
2018-09-26 21:56:29 +00:00
videos + = playlists [ : 16 - channels [ ' userKeywords ' ] ]
2017-11-01 18:14:15 +00:00
# Shuffle playlists (randomize layout) and shift clips (randomize start)
2018-01-22 11:55:02 +00:00
random . shuffle ( videos )
2018-09-25 15:50:52 +00:00
return [ {
2018-01-27 14:13:28 +00:00
' clips ' : video [ ' clips ' ] ,
2018-11-19 20:07:04 +00:00
' position ' : random . choice ( [ i for i in range ( len ( video [ " clips " ] ) ) if video [ " clips " ] [ i ] . get ( " pass " , False ) != True ] ) ,
2018-04-21 16:13:02 +00:00
' name ' : video [ ' name ' ] ,
' tags ' : video [ ' tags ' ] ,
2018-01-22 11:55:02 +00:00
} for video in videos ]
2017-11-01 16:38:58 +00:00
2018-07-20 13:59:42 +00:00
2018-11-19 19:32:54 +00:00
def update_user_playlists ( self , user , watch_cutoff = 0.9 ) :
2018-11-19 20:07:04 +00:00
# Output: playlists with updated in/out time of clips that have been watched as well as "pass" indicators for the clips that has been watched for more than watch_cutoff.
2018-09-26 18:24:37 +00:00
# Watched is defined as a video being played in full screen.
# "watch_cutoff" parameter: the portion of the clip duration to be determined as watched the whole clip. should be [0,1]
# + check (play, pause) pairs and eliminate unusual cases most likely due to a bug.
2018-11-19 20:07:04 +00:00
# + If (play, pause) pairs exceed XX(80-90?) percent of the clip length, add "pass": True to the clip.
2018-09-26 18:24:37 +00:00
# + Otherwise, find the last pause position of a clip and record it as "in" position of the clip.
2018-11-19 20:07:04 +00:00
# + If clips are all marked as "pass" in a playlist, elliminate the playlist from the user playlists.
2018-11-19 18:52:00 +00:00
playlists = copy . deepcopy ( self . playlists )
2018-09-13 18:07:45 +00:00
play = { }
clip_max_dur = 10800 # = 3 hours; arbitrary max duration allowed for (pause time - play time) to detect outlier/bugs
# The current max time of a clip duration is 10379.383333377269 from "DDLaunch: Erik Verlinde, Gravity as an emergent force (1956)"
2018-11-16 22:47:02 +00:00
# A user could potentially spend more than 3 hours if they keep watching after the clip enters into the subsequent "scene"
for event in user . get ( ' events ' , [ ] ) [ : : - 1 ] :
2018-09-13 18:07:45 +00:00
if event [ " event " ] == " play " and event [ " data " ] . get ( " type " ) == " video " :
play = event
elif event [ " event " ] == " pause " and play != { } and event [ " data " ] . get ( " type " ) == " video " :
if " position " not in play [ " data " ] :
play = { }
break
if play [ " data " ] . get ( " playlist " ) == event [ " data " ] . get ( " playlist " ) :
if event [ " data " ] [ " position " ] - play [ " data " ] [ " position " ] > 0 and event [ " data " ] [ " position " ] - play [ " data " ] [ " position " ] < clip_max_dur and event [ " data " ] . get ( " playlistPosition " ) == play [ " data " ] . get ( " playlistPosition " ) and event [ " data " ] . get ( " playlistPosition " ) is not None :
i = event [ " data " ] [ " playlistPosition " ]
for playlist in playlists :
if playlist [ " name " ] == event [ " data " ] [ " playlist " ] and i < len ( playlist [ " clips " ] ) :
2018-09-26 18:24:37 +00:00
if play [ " data " ] [ " position " ] > = max ( playlist [ " clips " ] [ i ] [ " in " ] - 15 , 0 ) and event [ " data " ] [ " position " ] < = playlist [ " clips " ] [ i ] [ " out " ] + 15 :
# This assumes the (play, pause) fits inside the clip's (in, out) segment with +/- 15secs buffer. There were newer edits of clip positions with 12 seconds difference.
# instances where this might not be the case: clip in/out may be largely edited (before after edit inconsistency); skip may trigger jump to a wrong clip (bug)
2018-11-16 22:47:02 +00:00
if " orig_in " not in playlist [ " clips " ] [ i ] :
cutoff_pos = ( playlist [ " clips " ] [ i ] [ " out " ] - playlist [ " clips " ] [ i ] [ " in " ] ) * watch_cutoff + playlist [ " clips " ] [ i ] [ " in " ]
else :
cutoff_pos = ( playlist [ " clips " ] [ i ] [ " out " ] - playlist [ " clips " ] [ i ] [ " orig_in " ] ) * watch_cutoff + playlist [ " clips " ] [ i ] [ " orig_in " ]
if event [ " data " ] [ " position " ] > = cutoff_pos :
2018-11-20 00:41:37 +00:00
playlist [ " clips " ] [ i ] [ " pass " ] = True
2018-11-19 20:07:04 +00:00
2018-11-16 22:47:02 +00:00
else :
if " orig_in " not in playlist [ " clips " ] [ i ] :
# record the original "in" position to calculate cutoff position in the future
playlist [ " clips " ] [ i ] [ " orig_in " ] = playlist [ " clips " ] [ i ] [ " in " ]
# update "in" position of the clip in the playlist
2018-09-13 18:07:45 +00:00
playlist [ " clips " ] [ i ] [ " in " ] = event [ " data " ] [ " position " ]
break
play = { }
2018-11-20 00:41:37 +00:00
for playlist in playlists . copy ( ) :
unwatched = [ clip for clip in playlist [ " clips " ] if not clip . get ( " pass " ) ]
if not unwatched :
playlists . remove ( playlist )
# If the number of playlists is reduced to 30, reset it to the original.
if len ( playlists ) < 30 :
playlists = copy . deepcopy ( self . playlists )
2018-09-13 18:07:45 +00:00
return ( playlists )
2018-05-19 11:51:02 +00:00
def get_next ( self , user , position ) :
2018-11-16 22:47:02 +00:00
# Update self_playlists first to reflect changes
2018-11-19 18:52:00 +00:00
playlists = self . update_user_playlists ( user )
2018-11-16 22:47:02 +00:00
2018-07-25 11:12:36 +00:00
grid_events = { }
video_num = 16
for event in user . get ( ' events ' , [ ] ) :
if event . get ( ' event ' ) == " grid " and event . get ( ' data ' ) . get ( ' index ' ) not in grid_events :
grid_events [ event . get ( ' data ' ) . get ( ' index ' ) ] = event . get ( ' data ' )
if len ( grid_events ) == video_num :
break
prev_grid_list = sorted ( [ v for v in grid_events . values ( ) ] , key = lambda k : k [ ' index ' ] )
vids_exclude = [ e . get ( " playlist " ) for e in prev_grid_list ]
2018-11-19 18:52:00 +00:00
video = self . get_recommendations ( playlists , user , vids_exclude ) [ position ]
2018-05-19 11:51:02 +00:00
return video
2018-02-05 14:15:39 +00:00
def update_state ( self , data ) :
for key in data :
if key in self . state :
self . state [ key ] . update ( data [ key ] )
else :
self . state [ key ] = data [ key ]
self . save_state ( )
return self . state
def save_state ( self ) :
filename = os . path . join ( self . path , ' state.json ' )
with open ( filename , ' w ' ) as f :
json . dump ( self . state , f , indent = 4 , ensure_ascii = False , sort_keys = True )
2017-11-01 16:38:58 +00:00
def update ( self ) :
# Get all storylines with tags
storylines = [ {
2018-01-25 20:54:38 +00:00
' id ' : entity [ ' id ' ] ,
2017-11-01 16:38:58 +00:00
' name ' : entity [ ' name ' ] ,
2018-01-25 20:54:38 +00:00
' nodename ' : entity [ ' nodename ' ] ,
2018-05-27 16:48:39 +00:00
' tags ' : [ t . strip ( ) for t in entity [ ' tags ' ] ]
2017-11-01 16:38:58 +00:00
} for entity in self . pandora . find_entities ( {
' conditions ' : [
{ ' key ' : ' type ' , ' operator ' : ' == ' , ' value ' : ' storylines ' } ,
] ,
' operator ' : ' & '
2018-02-14 16:27:39 +00:00
} , [ ' id ' , ' name ' , ' tags ' , ' nodename ' ] ) if entity . get ( ' tags ' , [ ] ) and entity . get ( ' nodename ' ) ]
2017-11-01 16:38:58 +00:00
# Get list of storyline names
names = list ( set ( [ storyline [ ' name ' ] for storyline in storylines ] ) )
2018-06-20 08:44:40 +00:00
# Get list of items to use in DD
items = [ item [ ' id ' ] for item in self . pandora . find ( {
' conditions ' : [
{ ' key ' : ' list ' , ' operator ' : ' == ' , ' value ' : ' dau:DD ' }
]
} , [ ' id ' ] ) ]
2017-11-01 16:38:58 +00:00
# Get all clips annotated with storyline references
clips = [ clip for clip in self . pandora . find_annotations ( {
' conditions ' : [
{ ' key ' : ' layer ' , ' operator ' : ' == ' , ' value ' : ' storylines ' }
] ,
' operator ' : ' & '
2018-06-20 08:44:40 +00:00
} , [ ' id ' , ' in ' , ' out ' , ' value ' ] ) if clip [ ' value ' ] in names and clip [ ' id ' ] . split ( ' / ' ) [ 0 ] in items ]
2017-11-01 16:38:58 +00:00
# Get list of ids for videos with clips
ids = list ( set ( [ clip [ ' id ' ] . split ( ' / ' ) [ 0 ] for clip in clips ] ) )
2017-11-01 18:14:15 +00:00
# Get and cache video data
2017-11-01 16:38:58 +00:00
filename = os . path . join ( self . path , ' videos.json ' )
if os . path . exists ( filename ) :
with open ( filename ) as f :
videos_ = json . loads ( f . read ( ) )
ids_ = [ video [ ' id ' ] for video in videos_ ]
else :
videos_ , ids_ = [ ] , [ ]
videos = sorted ( videos_ + [
self . pandora . get ( id , [ ' code ' , ' id ' , ' order ' , ' title ' ] )
for id in ids if not id in ids_
2018-04-21 16:28:08 +00:00
] , key = lambda video : int ( video [ ' order ' ] ) )
2017-11-01 16:38:58 +00:00
with open ( filename , ' w ' ) as f :
f . write ( json . dumps ( videos , indent = 4 , sort_keys = True ) )
2017-11-01 18:14:15 +00:00
# Get video order
2018-04-21 16:28:08 +00:00
order = { video [ ' id ' ] : int ( video [ ' order ' ] ) for video in videos }
2017-11-01 16:38:58 +00:00
# Sort clips
clips = sorted (
clips ,
2018-01-27 18:40:02 +00:00
key = lambda clip : ( order [ clip [ ' id ' ] . split ( ' / ' ) [ 0 ] ] , clip [ ' in ' ] )
2017-11-01 16:38:58 +00:00
)
2017-11-01 18:14:15 +00:00
# Get and cache playlists
2017-11-01 16:38:58 +00:00
self . playlists = [ playlist for playlist in [ {
2018-01-25 20:54:38 +00:00
' id ' : storyline [ ' id ' ] ,
2018-02-04 18:28:35 +00:00
' name ' : storyline [ ' nodename ' ] . strip ( ) ,
2017-11-01 16:38:58 +00:00
' tags ' : storyline [ ' tags ' ] ,
2018-01-27 18:40:02 +00:00
' clips ' : [ {
' item ' : clip [ ' id ' ] . split ( ' / ' ) [ 0 ] ,
2017-11-01 18:14:15 +00:00
' id ' : clip [ ' id ' ] ,
' in ' : clip [ ' in ' ] ,
' out ' : clip [ ' out ' ]
2018-01-27 18:40:02 +00:00
} for clip in clips if clip [ ' value ' ] == storyline [ ' name ' ] ]
2017-11-01 16:38:58 +00:00
} for storyline in storylines ] if playlist [ ' clips ' ] ]
with open ( os . path . join ( self . path , ' playlists.json ' ) , ' w ' ) as f :
f . write ( json . dumps ( self . playlists , indent = 4 , sort_keys = True ) )
2018-02-05 14:15:39 +00:00
self . update_keywords ( )
def update_keywords ( self ) :
changed = False
2018-02-15 16:06:18 +00:00
if ' globalKeywords ' not in self . state :
self . state [ ' globalKeywords ' ] = { }
2018-02-05 14:15:39 +00:00
changed = True
2018-04-21 16:13:02 +00:00
existing_tags = set ( )
2018-02-05 14:15:39 +00:00
for playlist in self . playlists :
for tag in playlist . get ( ' tags ' , [ ] ) :
2018-04-21 16:13:02 +00:00
if not tag . isupper ( ) and tag :
existing_tags . add ( tag )
2018-02-15 16:06:18 +00:00
if not tag . isupper ( ) and tag not in self . state [ ' globalKeywords ' ] :
self . state [ ' globalKeywords ' ] [ tag ] = { ' value ' : 0 }
2018-02-05 14:15:39 +00:00
changed = True
2018-04-21 16:13:02 +00:00
for tag in set ( self . state [ ' globalKeywords ' ] ) - existing_tags :
del self . state [ ' globalKeywords ' ] [ tag ]
changed = True
2018-02-05 14:15:39 +00:00
if changed :
self . save_state ( )
2017-11-02 08:40:02 +00:00
@run_async
def update_async ( self ) :
self . update ( )
2017-11-01 22:56:33 +00:00
class Pandora :
2017-11-02 11:12:56 +00:00
# pan.do/ra API wrapper
2017-11-01 22:56:33 +00:00
def __init__ ( self , url , username , password ) :
self . api = ox . API ( url )
self . api . signin ( username = username , password = password )
2018-06-20 08:44:40 +00:00
def find ( self , query , keys ) :
# print('FIND', query, keys)
return self . api . find ( {
' keys ' : keys ,
' query ' : query ,
' range ' : [ 0 , 1000000 ]
} ) [ ' data ' ] [ ' items ' ]
2017-11-01 22:56:33 +00:00
def find_annotations ( self , query , keys ) :
# print('FIND ANNOTATIONS', query, keys)
return self . api . findAnnotations ( {
' keys ' : keys ,
' query ' : query ,
' range ' : [ 0 , 1000000 ]
} ) [ ' data ' ] [ ' items ' ]
def find_entities ( self , query , keys ) :
# print('FIND ENTITIES', query, keys)
return self . api . findEntities ( {
' keys ' : keys ,
' query ' : query ,
' range ' : [ 0 , 1000000 ]
} ) [ ' data ' ] [ ' items ' ]
def get ( self , id , keys ) :
# print('GET', id, keys)
return self . api . get ( {
' id ' : id ,
' keys ' : keys
} ) [ ' data ' ]
2017-11-01 16:38:58 +00:00
if __name__ == ' __main__ ' :
engine = Engine ( ' json ' )
engine . update ( )