oxtimelinesutils/hash.py

from __future__ import division
import Image
import os

'''
This shows how to use the data timelines to implement a "Find Similar Clips"
feature, i.e. how to compute sequences and their hashes. There are two modes:
similar shapes and similar colors.
'''

ZONE_INDEX = []
for pixel_index in range(64):
    x, y = pixel_index % 8, int(pixel_index / 8)
    ZONE_INDEX.append(int(x / 2) + int(y / 4) * 4)

def get_hash(image, mode, debug=False):
    if mode == 'color':
        # divide the image into 8 zones:
        # 0 0 1 1 2 2 3 3
        # 0 0 1 1 2 2 3 3
        # 0 0 1 1 2 2 3 3
        # 0 0 1 1 2 2 3 3
        # 4 4 5 5 6 6 7 7
        # 4 4 5 5 6 6 7 7
        # 4 4 5 5 6 6 7 7
        # 4 4 5 5 6 6 7 7
        image_data = image.getdata()
        image_hash = 0
        zone_values = []
        for zone_index in range(8):
            zone_values.append([])
        for pixel_index, pixel_value in enumerate(image_data):
            zone_values[ZONE_INDEX[pixel_index]].append(pixel_value)
        for zone_index, pixel_values in enumerate(zone_values):
            # get the mean for each color channel
            mean = map(lambda x: int(round(sum(x) / 8)), zip(*pixel_values))
            # store the mean color of each zone as an 8-bit value:
            # RRRGGGBB
            color_index = sum((
                int(mean[0] / 32) << 5,
                int(mean[1] / 32) << 2,
                int(mean[2] / 64)
            ))
            image_hash += color_index * pow(2, zone_index * 8)
    elif mode == 'shape':
        image_data = image.convert('L').getdata()
        image_mean = sum(image_data) / 64
        image_hash = 0
        for pixel_index, pixel_value in enumerate(image_data):
            if pixel_value > image_mean:
                image_hash += pow(2, pixel_index)
    return image_hash

def get_sequences(path):
    modes = ['color', 'shape']
    sequences = {}
    for mode in modes:
        sequences[mode] = []
    fps = 25
    position = 0
    file_names = filter(lambda x: 'timelinedata8p' in x, os.listdir(path))
    file_names = sorted(file_names, key=lambda x: int(x[14:-4]))
    file_names = map(lambda x: path + x, file_names)
    for file_name in file_names:
        timeline_image = Image.open(file_name)
        timeline_width = timeline_image.size[0]
        for x in range(0, timeline_width, 8):
            frame_image = timeline_image.crop((x, 0, x + 8, 8))
            for mode in modes:
                frame_hash = get_hash(frame_image, mode)
                if position == 0 or frame_hash != sequences[mode][-1]['hash']:
                    if position > 0:
                        sequences[mode][-1]['out'] = position
                    sequences[mode].append({'in': position, 'hash': frame_hash})
            position += 1 / fps
    for mode in modes:
        sequences[mode][-1]['out'] = position
    return sequences

if __name__ == '__main__':
    from time import time
    start = time()
    sequences = get_sequences('../tiles/0084628/')
    #sequences = get_sequences('../tiles/0097514/')
    print 'get_sequences() took', time() - start , 'seconds'
    for mode in ['color', 'shape']:
        hashes = []
        index = {}
        for sequence in sequences[mode]:
            if sequence['hash'] > 0:
                if not sequence['hash'] in index:
                    index[sequence['hash']] = len(hashes)
                    hashes.append({
                        'hash': sequence['hash'],
                        'sequences': []
                    })
                hashes[index[sequence['hash']]]['sequences'].append({
                    'in': sequence['in'],
                    'out': sequence['out']
                })
        hashes = filter(lambda x: len(x['sequences']) > 1, hashes)
        print '-' * 64
        print mode
        print '-' * 64
        #for h in sorted(hashes, key=lambda x: -len(x['sequences'])):
        for h in sorted(hashes, key=lambda x: x['sequences'][-1]['out'] - x['sequences'][0]['in']):
            print h['hash'], len(h['sequences']), ', '.join(
                map(lambda x: '%.2f-%.2f' % (x['in'], x['out']), h['sequences'])
            )