overlapping subtitles
This commit is contained in:
parent
c99ec59bb7
commit
6e6469f39c
1 changed files with 69 additions and 6 deletions
73
subtitles.py
73
subtitles.py
|
@ -113,16 +113,65 @@ def render_subtitles(item_json, output_json, output_srt, lang):
|
||||||
elif slang == lang:
|
elif slang == lang:
|
||||||
subs[sub_id]['value'].append(value)
|
subs[sub_id]['value'].append(value)
|
||||||
position += clip['duration']
|
position += clip['duration']
|
||||||
|
|
||||||
|
if isinstance(lang, list):
|
||||||
|
#fixme = [sub for sub in subs.values() if [s for s in list(subs.values())]]
|
||||||
|
fixme = [sub for sub in subs.values() if set(sub['languages']) != set(lang) and sub['value']]
|
||||||
|
if fixme:
|
||||||
|
remove = []
|
||||||
|
for key, sub in list(subs.items()):
|
||||||
|
intersections = []
|
||||||
|
for s in list(subs.values()):
|
||||||
|
intersections += overlaps(sub, s)
|
||||||
|
if intersections:
|
||||||
|
points = list(sorted(set([sub['in'], sub['out']] + intersections)))
|
||||||
|
#print(points, sub['value'])
|
||||||
|
sub_in = points[0]
|
||||||
|
for sub_out in points[1:]:
|
||||||
|
sub_id = '%0.3f-%0.3f' % (sub_in, sub_out)
|
||||||
|
if sub_id not in subs:
|
||||||
|
subs[sub_id] = {
|
||||||
|
'in': sub_in,
|
||||||
|
'out': sub_out,
|
||||||
|
'value': [],
|
||||||
|
'ids': [],
|
||||||
|
'languages': []
|
||||||
|
}
|
||||||
|
if set(subs[sub_id]['languages']) != set(lang):
|
||||||
|
if not subs[sub_id]['languages']:
|
||||||
|
subs[sub_id]['value'] += sub['value']
|
||||||
|
subs[sub_id]['languages'] += sub['languages']
|
||||||
|
subs[sub_id]['ids'] += sub['ids']
|
||||||
|
elif subs[sub_id]['languages'] == [lang[0]] \
|
||||||
|
and sub['languages'][0] not in subs[sub_id]['languages']:
|
||||||
|
subs[sub_id]['value'].append(sub['value'][0])
|
||||||
|
subs[sub_id]['languages'].append(sub['languages'][0])
|
||||||
|
subs[sub_id]['ids'] += sub['ids']
|
||||||
|
elif subs[sub_id]['languages'] == [lang[1]] \
|
||||||
|
and sub['languages'][0] not in subs[sub_id]['languages']:
|
||||||
|
subs[sub_id]['value'].insert(0, sub['value'][0])
|
||||||
|
subs[sub_id]['languages'].insert(0, sub['languages'][0])
|
||||||
|
subs[sub_id]['ids'] += sub['ids']
|
||||||
|
#else:
|
||||||
|
# print('WTF', sub['languages'], subs[sub_id]['languages'])
|
||||||
|
|
||||||
|
sub_in = sub_out
|
||||||
|
remove.append(key)
|
||||||
|
#for key, sub in list(subs.items()):
|
||||||
|
# if len(sub['languages']) == 1:
|
||||||
|
# del subs[key]
|
||||||
|
for key in remove:
|
||||||
|
if len(subs[key]['languages']) == 1:
|
||||||
|
del subs[key]
|
||||||
|
for key, sub in list(subs.items()):
|
||||||
|
if abs(sub['out'] - sub['in']) <= 0.040001:
|
||||||
|
del subs[key]
|
||||||
subs = sorted(subs.values(), key=lambda c: (c['in'], c['out']))
|
subs = sorted(subs.values(), key=lambda c: (c['in'], c['out']))
|
||||||
for sub in subs:
|
for sub in subs:
|
||||||
sub['value'] = '\n'.join(sub['value'])
|
sub['value'] = '\n'.join(sub['value'])
|
||||||
|
if sub['value'].strip():
|
||||||
subtitles.append(sub)
|
subtitles.append(sub)
|
||||||
|
|
||||||
if isinstance(lang, list):
|
|
||||||
fixme = [sub for sub in subs if set(sub['languages']) != set(lang)]
|
|
||||||
if fixme:
|
|
||||||
print('split/merge overlaps', output_srt, output_json)
|
|
||||||
|
|
||||||
if output_srt:
|
if output_srt:
|
||||||
with open(output_srt, 'wb') as fd:
|
with open(output_srt, 'wb') as fd:
|
||||||
fd.write(ox.srt.encode(subtitles))
|
fd.write(ox.srt.encode(subtitles))
|
||||||
|
@ -130,6 +179,20 @@ def render_subtitles(item_json, output_json, output_srt, lang):
|
||||||
json.dump(subtitles, fd, indent=4, ensure_ascii=False, sort_keys=True)
|
json.dump(subtitles, fd, indent=4, ensure_ascii=False, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
|
def overlaps(src, other):
|
||||||
|
src_in = float('%0.3f' % src['in'])
|
||||||
|
src_out = float('%0.3f' % src['out'])
|
||||||
|
other_in = float('%0.3f' % other['in'])
|
||||||
|
other_out = float('%0.3f' % other['out'])
|
||||||
|
points = []
|
||||||
|
if src_in != other_in or src_out != other_out:
|
||||||
|
# src inside
|
||||||
|
if other_in >= src_in and other_in < src_out:
|
||||||
|
points += [other['in']]
|
||||||
|
if other_out > src_in and other_out <= src_out:
|
||||||
|
points += [other['out']]
|
||||||
|
return points
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if os.path.exists('subtitles.json'):
|
if os.path.exists('subtitles.json'):
|
||||||
items = json.load(open('subtitles.json'))
|
items = json.load(open('subtitles.json'))
|
||||||
|
|
Loading…
Reference in a new issue