#pip install internetarchive import json import internetarchive data = {} for e in internetarchive.search.Search('collection:prelinger'): item = internetarchive.get_item(e['identifier']) for key in ('h.264', 'MPEG4', '512Kb MPEG4', 'HiRes MPEG4'): files = [f for f in item.files if f['format'] == key] if files: break if files: print(item.metadata['title']) print('https://archive.org/details/%s' % item.identifier) url = 'https://archive.org/download/%s/%s' % (item.identifier, files[0]['name']) print(url) data[item.identifier] = { 'id': item.identifier, 'mp4': url, 'mp4_size': files[0]['size'], } for key in ( 'title', 'description', 'year', 'publisher', 'addeddate', 'sound', 'creator', 'color', 'credits', 'publisher', 'sponsor', 'uploader', 'licenseurl', 'subject', 'language' #needed? 'date', ): if key in item.metadata and item.metadata[key]: data[item.identifier][key] = item.metadata[key] if data[item.identifier][key][0] == '[' and data[item.identifier][key][-1] == ']': data[item.identifier][key] = data[item.identifier][key][1:-1] else: formats = sorted({f['format']:1 for f in item.files}.keys()) if formats: print(item.identifier, item.files) print(formats) with open('prelinger.json', 'w') as f: json.dump(list(data.values()), f, indent=2)