diff --git a/subtitles.py b/subtitles.py index f96078c..8ce965f 100755 --- a/subtitles.py +++ b/subtitles.py @@ -113,15 +113,64 @@ def render_subtitles(item_json, output_json, output_srt, lang): elif slang == lang: subs[sub_id]['value'].append(value) position += clip['duration'] + + if isinstance(lang, list): + #fixme = [sub for sub in subs.values() if [s for s in list(subs.values())]] + fixme = [sub for sub in subs.values() if set(sub['languages']) != set(lang) and sub['value']] + if fixme: + remove = [] + for key, sub in list(subs.items()): + intersections = [] + for s in list(subs.values()): + intersections += overlaps(sub, s) + if intersections: + points = list(sorted(set([sub['in'], sub['out']] + intersections))) + #print(points, sub['value']) + sub_in = points[0] + for sub_out in points[1:]: + sub_id = '%0.3f-%0.3f' % (sub_in, sub_out) + if sub_id not in subs: + subs[sub_id] = { + 'in': sub_in, + 'out': sub_out, + 'value': [], + 'ids': [], + 'languages': [] + } + if set(subs[sub_id]['languages']) != set(lang): + if not subs[sub_id]['languages']: + subs[sub_id]['value'] += sub['value'] + subs[sub_id]['languages'] += sub['languages'] + subs[sub_id]['ids'] += sub['ids'] + elif subs[sub_id]['languages'] == [lang[0]] \ + and sub['languages'][0] not in subs[sub_id]['languages']: + subs[sub_id]['value'].append(sub['value'][0]) + subs[sub_id]['languages'].append(sub['languages'][0]) + subs[sub_id]['ids'] += sub['ids'] + elif subs[sub_id]['languages'] == [lang[1]] \ + and sub['languages'][0] not in subs[sub_id]['languages']: + subs[sub_id]['value'].insert(0, sub['value'][0]) + subs[sub_id]['languages'].insert(0, sub['languages'][0]) + subs[sub_id]['ids'] += sub['ids'] + #else: + # print('WTF', sub['languages'], subs[sub_id]['languages']) + + sub_in = sub_out + remove.append(key) + #for key, sub in list(subs.items()): + # if len(sub['languages']) == 1: + # del subs[key] + for key in remove: + if len(subs[key]['languages']) == 1: + del subs[key] + for key, sub in list(subs.items()): + if abs(sub['out'] - sub['in']) <= 0.040001: + del subs[key] subs = sorted(subs.values(), key=lambda c: (c['in'], c['out'])) for sub in subs: sub['value'] = '\n'.join(sub['value']) - subtitles.append(sub) - - if isinstance(lang, list): - fixme = [sub for sub in subs if set(sub['languages']) != set(lang)] - if fixme: - print('split/merge overlaps', output_srt, output_json) + if sub['value'].strip(): + subtitles.append(sub) if output_srt: with open(output_srt, 'wb') as fd: @@ -130,6 +179,20 @@ def render_subtitles(item_json, output_json, output_srt, lang): json.dump(subtitles, fd, indent=4, ensure_ascii=False, sort_keys=True) +def overlaps(src, other): + src_in = float('%0.3f' % src['in']) + src_out = float('%0.3f' % src['out']) + other_in = float('%0.3f' % other['in']) + other_out = float('%0.3f' % other['out']) + points = [] + if src_in != other_in or src_out != other_out: + # src inside + if other_in >= src_in and other_in < src_out: + points += [other['in']] + if other_out > src_in and other_out <= src_out: + points += [other['out']] + return points + if __name__ == '__main__': if os.path.exists('subtitles.json'): items = json.load(open('subtitles.json'))