44 lines
1.6 KiB
Python
44 lines
1.6 KiB
Python
#pip install internetarchive
|
|
import json
|
|
import internetarchive
|
|
|
|
|
|
data = {}
|
|
for e in internetarchive.search.Search('collection:prelinger'):
|
|
item = internetarchive.get_item(e['identifier'])
|
|
for key in ('h.264', 'MPEG4', '512Kb MPEG4', 'HiRes MPEG4'):
|
|
files = [f for f in item.files if f['format'] == key]
|
|
if files:
|
|
break
|
|
if files:
|
|
print(item.metadata['title'])
|
|
print('https://archive.org/details/%s' % item.identifier)
|
|
url = 'https://archive.org/download/%s/%s' % (item.identifier, files[0]['name'])
|
|
print(url)
|
|
data[item.identifier] = {
|
|
'id': item.identifier,
|
|
'mp4': url,
|
|
'mp4_size': files[0]['size'],
|
|
}
|
|
for key in (
|
|
'title', 'description', 'year',
|
|
'publisher', 'addeddate', 'sound',
|
|
'creator', 'color', 'credits', 'publisher',
|
|
'sponsor', 'uploader', 'licenseurl', 'subject',
|
|
'language'
|
|
#needed?
|
|
'date',
|
|
):
|
|
if key in item.metadata and item.metadata[key]:
|
|
data[item.identifier][key] = item.metadata[key]
|
|
if data[item.identifier][key][0] == '[' and data[item.identifier][key][-1] == ']':
|
|
data[item.identifier][key] = data[item.identifier][key][1:-1]
|
|
else:
|
|
formats = sorted({f['format']:1 for f in item.files}.keys())
|
|
if formats:
|
|
print(item.identifier, item.files)
|
|
print(formats)
|
|
|
|
with open('prelinger.json', 'w') as f:
|
|
json.dump(list(data.values()), f, indent=2)
|