openmedialibrary/oml/utils.py

618 lines
19 KiB
Python
Raw Normal View History

2014-05-04 17:26:43 +00:00
# -*- coding: utf-8 -*-
2014-09-02 22:32:44 +00:00
2014-05-04 17:26:43 +00:00
2016-01-19 10:05:16 +00:00
from datetime import datetime
2014-09-02 22:32:44 +00:00
from io import StringIO, BytesIO
2018-12-31 23:19:55 +00:00
from PIL import Image, ImageFile
2016-01-19 10:05:16 +00:00
import base64
2024-06-08 14:51:30 +00:00
import functools
2016-01-19 10:05:16 +00:00
import hashlib
import json
import os
2014-05-04 17:26:43 +00:00
import re
2014-05-17 22:18:32 +00:00
import socket
2016-01-19 10:05:16 +00:00
import stdnum.isbn
import subprocess
2016-01-19 10:05:16 +00:00
import sys
import time
2019-01-14 15:02:34 +00:00
import unicodedata
2014-05-04 17:26:43 +00:00
import ox
2024-06-08 11:39:27 +00:00
import OpenSSL.crypto
from OpenSSL.crypto import (
2024-06-08 11:39:27 +00:00
dump_certificate,
dump_privatekey,
FILETYPE_PEM,
load_certificate,
load_privatekey,
PKey,
TYPE_RSA,
X509,
X509Extension
)
2024-06-08 11:39:27 +00:00
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519
2014-05-04 17:26:43 +00:00
from meta.utils import normalize_isbn, find_isbns, get_language, to_isbn13
2016-01-31 17:28:53 +00:00
from win32utils import get_short_path_name
2014-05-14 09:57:11 +00:00
2024-06-08 11:39:27 +00:00
2014-05-18 23:24:04 +00:00
import logging
2016-01-23 13:26:13 +00:00
logging.getLogger('PIL').setLevel(logging.ERROR)
2015-11-29 14:56:38 +00:00
logger = logging.getLogger(__name__)
2014-05-17 22:18:32 +00:00
2018-12-31 23:19:55 +00:00
ImageFile.LOAD_TRUNCATED_IMAGES = True
2016-12-26 15:55:07 +00:00
ENCODING = 'base64'
2014-05-17 22:18:32 +00:00
2014-05-04 17:26:43 +00:00
def valid_olid(id):
return id.startswith('OL') and id.endswith('M')
def get_positions(ids, pos):
'''
>>> get_positions([1,2,3,4], [2,4])
{2: 1, 4: 3}
'''
positions = {}
for i in pos:
try:
positions[i] = ids.index(i)
except:
pass
return positions
def get_by_key(objects, key, value):
2014-09-02 22:32:44 +00:00
obj = [o for o in objects if o.get(key) == value]
2014-05-04 17:26:43 +00:00
return obj and obj[0] or None
def get_by_id(objects, id):
return get_by_key(objects, 'id', id)
def is_svg(data):
return data and b'<svg' in data[:256]
2014-05-04 17:26:43 +00:00
def resize_image(data, width=None, size=None):
2014-09-02 22:32:44 +00:00
if isinstance(data, bytes):
data = BytesIO(data)
else:
data = StringIO(data)
source = Image.open(data)
#if source.mode not in ('1', 'CMYK', 'L', 'RGB', 'RGBA', 'RGBX', 'YCbCr'):
2018-12-31 23:19:55 +00:00
if source.mode != 'RGB':
source = source.convert('RGB')
2014-05-04 17:26:43 +00:00
source_width = source.size[0]
source_height = source.size[1]
if size:
if source_width > source_height:
width = size
height = int(width / (float(source_width) / source_height))
height = height - height % 2
else:
height = size
width = int(height * (float(source_width) / source_height))
width = width - width % 2
else:
height = int(width / (float(source_width) / source_height))
height = height - height % 2
width = max(width, 1)
height = max(height, 1)
if width < source_width:
2024-06-08 11:39:27 +00:00
resize_method = Image.LANCZOS
2014-05-04 17:26:43 +00:00
else:
resize_method = Image.BICUBIC
output = source.resize((width, height), resize_method)
2014-09-02 22:32:44 +00:00
o = BytesIO()
2014-05-04 17:26:43 +00:00
output.save(o, format='jpeg')
data = o.getvalue()
o.close()
return data
def sort_title(title):
2014-09-02 22:32:44 +00:00
title = title.replace('Æ', 'Ae')
2014-05-04 17:26:43 +00:00
if isinstance(title, str):
2014-09-02 22:32:44 +00:00
title = str(title)
2014-05-04 17:26:43 +00:00
title = ox.sort_string(title)
#title
2014-09-02 22:32:44 +00:00
title = re.sub('[\'!¿¡,\.;\-"\:\*\[\]]', '', title)
2014-05-04 17:26:43 +00:00
return title.strip()
def get_position_by_id(list, key):
for i in range(0, len(list)):
if list[i]['id'] == key:
return i
return -1
2024-06-08 11:39:27 +00:00
def sign_cert(cert, key):
# pyOpenSSL sgin api does not allow NULL hash
# return cert.sign(key, None)
return OpenSSL.crypto._lib.X509_sign(cert._x509, key._pkey, OpenSSL.crypto._ffi.NULL)
def load_pem_key(pem):
with open(pem) as fd:
ca_key_pem = fd.read()
key = load_privatekey(FILETYPE_PEM, ca_key_pem)
if key.bits() != 256:
raise Exception("Invalid key %s" % pem)
key = key.to_cryptography_key()
private_key = key.private_bytes_raw()
public_key = key.public_key().public_bytes_raw()
return private_key, public_key
def expand_private_key(secret_key) -> bytes:
hash = hashlib.sha512(secret_key).digest()
hash = bytearray(hash)
hash[0] &= 248
hash[31] &= 127
hash[31] |= 64
return bytes(hash)
def get_onion(pubkey):
version_byte = b"\x03"
checksum_str = ".onion checksum".encode()
checksum = hashlib.sha3_256(checksum_str + pubkey + version_byte).digest()[:2]
return base64.b32encode(pubkey + checksum + version_byte).decode().lower()
def get_onion_key(private_key):
onion_key = expand_private_key(private_key)
key_type = 'ED25519-V3'
key_content = base64.encodebytes(onion_key).decode().strip().replace('\n', '')
return key_type, key_content
def get_user_id(key_path, cert_path, ca_key_path, ca_cert_path):
if os.path.exists(ca_key_path):
try:
private_key, public_key = load_pem_key(ca_key_path)
except:
os.unlink(ca_key_path)
else:
2024-06-08 11:39:27 +00:00
user_id = get_onion(public_key)
if not os.path.exists(ca_key_path):
private_key = ed25519.Ed25519PrivateKey.generate()
private_bytes = private_key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption()
)
with open(ca_key_path, 'wb') as fd:
fd.write(private_bytes)
public_key = private_key.public_key().public_bytes_raw()
user_id = get_onion(public_key)
if not os.path.exists(ca_cert_path) or \
(datetime.now() - datetime.fromtimestamp(os.path.getmtime(ca_cert_path))).days > 5*365:
with open(ca_key_path, 'rb') as key_file:
key_data = key_file.read()
cakey = load_privatekey(FILETYPE_PEM, key_data)
ca = X509()
ca.set_version(2)
ca.set_serial_number(1)
ca.get_subject().CN = user_id
ca.gmtime_adj_notBefore(0)
2024-06-08 11:39:27 +00:00
ca.gmtime_adj_notAfter(10 * 356 * 24 * 60 * 60)
ca.set_issuer(ca.get_subject())
2024-06-08 11:39:27 +00:00
ca.set_pubkey(cakey)
ca.add_extensions([
2024-06-08 11:39:27 +00:00
X509Extension(b"basicConstraints", False, b"CA:TRUE"),
X509Extension(b"keyUsage", False, b"keyCertSign, cRLSign"),
X509Extension(
b"subjectKeyIdentifier", False, b"hash", subject=ca
),
])
ca.add_extensions([
X509Extension(
b"authorityKeyIdentifier", False, b"keyid:always", issuer=ca
)
])
sign_cert(ca, cakey)
with open(ca_cert_path, 'wb') as fd:
fd.write(dump_certificate(FILETYPE_PEM, ca))
if os.path.exists(cert_path):
os.unlink(cert_path)
if os.path.exists(key_path):
os.unlink(key_path)
else:
with open(ca_cert_path) as fd:
ca = load_certificate(FILETYPE_PEM, fd.read())
with open(ca_key_path) as fd:
cakey = load_privatekey(FILETYPE_PEM, fd.read())
# create RSA intermediate certificate since clients don't quite like Ed25519 yet
if not os.path.exists(cert_path) or \
(datetime.now() - datetime.fromtimestamp(os.path.getmtime(cert_path))).days > 60:
key = PKey()
key.generate_key(TYPE_RSA, 2048)
cert = X509()
cert.set_version(2)
cert.set_serial_number(2)
cert.get_subject().CN = user_id + ".onion"
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(90 * 24 * 60 * 60)
cert.set_issuer(ca.get_subject())
cert.set_pubkey(key)
subject_alt_names = b"DNS: %s.onion" % user_id.encode()
cert.add_extensions([
X509Extension(b"basicConstraints", True, b"CA:FALSE"),
X509Extension(b"extendedKeyUsage", True,
b"serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC"),
X509Extension(b"keyUsage", False, b"keyCertSign, cRLSign"),
X509Extension(b"subjectKeyIdentifier", False, b"hash", subject=ca),
2024-06-08 11:39:27 +00:00
X509Extension(b"subjectAltName", critical=True, value=subject_alt_names),
])
2024-06-08 11:39:27 +00:00
sign_cert(cert, cakey)
with open(cert_path, 'wb') as fd:
2024-06-08 11:39:27 +00:00
fd.write(dump_certificate(FILETYPE_PEM, cert))
fd.write(dump_certificate(FILETYPE_PEM, ca))
2024-06-08 11:39:27 +00:00
with open(key_path, 'wb') as fd:
fd.write(dump_privatekey(FILETYPE_PEM, key))
return user_id
2024-06-08 11:39:27 +00:00
def get_service_id(private_key_file=None, cert=None):
'''
service_id is the first half of the sha1 of the rsa public key encoded in base32
'''
if private_key_file:
2024-06-08 11:39:27 +00:00
with open(private_key_file, 'rb') as key_file:
key_type, key_content = key_file.read().split(b':', 1)
private_key = base64.decodebytes(key_content)
public_key = Ed25519().public_key_from_hash(private_key)
service_id = get_onion(public_key)
elif cert:
2024-06-08 11:39:27 +00:00
cert_ = load_certificate(FILETYPE_PEM, cert)
key = cert_.get_pubkey()
public_key = key.to_cryptography_key().public_bytes_raw()
service_id = get_onion(public_key)
else:
service_id = None
return service_id
2014-05-18 23:24:04 +00:00
def update_dict(root, data):
for key in data:
keys = [part.replace('\0', '.') for part in key.replace('\\.', '\0').split('.')]
2014-05-18 23:24:04 +00:00
value = data[key]
p = root
2019-01-16 11:15:56 +00:00
while len(keys) > 1:
2014-05-18 23:24:04 +00:00
key = keys.pop(0)
if isinstance(p, list):
p = p[get_position_by_id(p, key)]
else:
if key not in p:
p[key] = {}
p = p[key]
2019-01-16 11:15:56 +00:00
if value is None and keys[0] in p:
2014-05-18 23:24:04 +00:00
del p[keys[0]]
else:
p[keys[0]] = value
if hasattr(root, '_save'):
root._save()
2014-05-18 23:24:04 +00:00
def remove_empty_folders(prefix, keep_root=False):
2014-05-18 23:24:04 +00:00
empty = []
for root, folders, files in os.walk(prefix):
if len(files) == 1 and files[0] == '.DS_Store':
os.unlink(os.path.join(root, files[0]))
files = []
2014-05-18 23:24:04 +00:00
if not folders and not files:
if root != prefix or not keep_root:
empty.append(root)
2014-05-18 23:24:04 +00:00
for folder in empty:
remove_empty_tree(folder)
def remove_empty_tree(leaf):
while leaf:
if not os.path.exists(leaf):
leaf = os.path.dirname(leaf)
elif os.path.isdir(leaf) and not os.listdir(leaf):
logger.debug('rmdir %s', leaf)
os.rmdir(leaf)
else:
break
2014-05-21 00:02:21 +00:00
2016-02-07 13:53:22 +00:00
try:
utc_0 = int(time.mktime(datetime(1970, 1, 1).timetuple()))
except:
utc_0 = int(time.mktime(time.gmtime()) - time.mktime(time.localtime()))
2014-05-21 00:02:21 +00:00
def datetime2ts(dt):
return int(time.mktime(dt.utctimetuple())) - utc_0
def ts2datetime(ts):
return datetime.utcfromtimestamp(float(ts))
def run(*cmd):
p = subprocess.Popen(cmd, close_fds=True)
p.wait()
return p.returncode
def get(*cmd):
2016-01-31 13:23:11 +00:00
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, error = p.communicate()
return stdout.decode()
def makefolder(path):
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
2015-11-30 16:50:03 +00:00
2016-01-19 05:21:43 +00:00
def open_file(path=None):
cmd = []
if sys.platform == 'darwin':
cmd += ['open', path]
elif sys.platform.startswith('linux'):
2017-10-08 10:55:53 +00:00
if os.path.exists('/usr/bin/gio'):
cmd += ['gio', 'open', path]
else:
cmd += ['xdg-open', path]
2016-01-31 13:23:11 +00:00
elif sys.platform == 'win32':
path = '\\'.join(path.split('/'))
os.startfile(path)
cmd = []
2016-01-19 05:21:43 +00:00
else:
logger.debug('unsupported platform %s', sys.platform)
2016-01-31 13:23:11 +00:00
if cmd:
subprocess.Popen(cmd, close_fds=True)
2016-01-19 05:21:43 +00:00
2015-11-30 17:07:07 +00:00
def open_folder(folder=None, path=None):
cmd = []
if path and not folder:
folder = os.path.dirname(path)
if folder and not path:
path = folder
if sys.platform == 'darwin':
if folder and not path:
path = folder
cmd += ['open', '-R', path]
elif sys.platform.startswith('linux'):
2017-10-08 10:55:53 +00:00
if os.path.exists('/usr/bin/gio'):
cmd += ['gio', 'open', folder]
else:
cmd += ['xdg-open', folder]
2016-01-31 13:23:11 +00:00
elif sys.platform == 'win32':
2016-01-31 19:19:25 +00:00
path = '\\'.join(path.split('/'))
2016-01-31 19:32:02 +00:00
cmd = 'explorer.exe /select,"%s"' % path
2015-11-30 17:07:07 +00:00
else:
logger.debug('unsupported platform %s', sys.platform)
2016-01-31 13:23:11 +00:00
if cmd:
subprocess.Popen(cmd, close_fds=True)
2015-12-02 21:05:23 +00:00
def can_connect_dns(host="8.8.8.8", port=53):
"""
host: 8.8.8.8 (google-public-dns-a.google.com)
port: 53/tcp
"""
import socks
import state
2015-12-02 21:05:23 +00:00
try:
sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM, 6)
2016-03-18 10:29:18 +00:00
sock.settimeout(2)
socks_port = state.tor.socks_port if state.tor else 9150
sock.set_proxy(socks.SOCKS5, "localhost", socks_port, True)
sock.connect((host, port))
2015-12-02 21:05:23 +00:00
return True
except:
2016-03-18 10:29:18 +00:00
#logger.debug('failed to connect', exc_info=True)
2015-12-02 21:05:23 +00:00
pass
return False
2016-01-15 07:59:35 +00:00
def _to_json(python_object):
if isinstance(python_object, datetime):
if python_object.year < 1900:
tt = python_object.timetuple()
return '%d-%02d-%02dT%02d:%02d%02dZ' % tuple(list(tt)[:6])
return python_object.strftime('%Y-%m-%dT%H:%M:%SZ')
raise TypeError(u'%s %s is not JSON serializable' % (repr(python_object), type(python_object)))
2016-01-16 05:17:52 +00:00
def get_ratio(data):
try:
img = Image.open(BytesIO(data))
return img.size[0]/img.size[1]
except:
return 1
2016-01-19 10:05:16 +00:00
def get_meta_hash(data):
2016-02-10 14:02:32 +00:00
data = data.copy()
2016-01-19 10:05:16 +00:00
if 'sharemetadata' in data:
del data['sharemetadata']
for key in list(data):
if not data[key]:
del data[key]
return hashlib.sha1(json.dumps(data,
ensure_ascii=False, sort_keys=True).encode()).hexdigest()
def update_static():
import settings
import os
import ox
path = os.path.join(settings.static_path, 'js')
files = sorted([
file for file in os.listdir(path)
if not file.startswith('.')
and not file.startswith('oml.')
])
ox.file.write_json(os.path.join(settings.static_path, 'json', 'js.json'), files, indent=4)
ox.file.write_file(
os.path.join(path, 'oml.min.js'),
'\n'.join([
ox.js.minify(ox.file.read_file(os.path.join(path, file)).decode('utf-8'))
for file in files
])
)
2016-02-01 07:45:34 +00:00
def check_pid(pid):
2019-01-29 13:39:25 +00:00
if sys.platform == 'win32':
import ctypes
kernel32 = ctypes.windll.kernel32
SYNCHRONIZE = 0x100000
process = kernel32.OpenProcess(SYNCHRONIZE, 0, pid)
if process != 0:
kernel32.CloseHandle(process)
return True
else:
return False
2016-02-01 07:45:34 +00:00
else:
2019-01-29 13:39:25 +00:00
try:
os.kill(pid, 0)
except:
return False
else:
return True
2016-02-01 07:45:34 +00:00
def check_pidfile(pid):
try:
with open(pid) as fd:
pid = int(fd.read())
except:
return False
return check_pid(pid)
def ctl(*args):
2016-02-01 08:06:48 +00:00
import settings
2016-02-01 07:45:34 +00:00
if sys.platform == 'win32':
2024-06-10 13:57:12 +00:00
platform_win32 = os.path.normpath(os.path.join(settings.top_dir, 'platform_win32'))
2016-02-01 07:45:34 +00:00
python = os.path.join(platform_win32, 'pythonw.exe')
cmd = [python, 'oml'] + list(args)
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = subprocess.SW_HIDE
subprocess.Popen(cmd, cwd=settings.base_dir, start_new_session=True, startupinfo=startupinfo)
else:
subprocess.Popen([os.path.join(settings.base_dir, 'ctl')] + list(args),
close_fds=True, start_new_session=True)
2019-01-31 13:41:13 +00:00
def ctl_output(*args):
import settings
if sys.platform == 'win32':
platform_win32 = os.path.join('..', 'platform_win32')
python = os.path.join(platform_win32, 'python.exe')
cmd = [python, 'oml'] + list(args)
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = subprocess.SW_HIDE
p = subprocess.Popen(cmd, cwd=settings.base_dir, startupinfo=startupinfo,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
p = subprocess.Popen([os.path.join(settings.base_dir, 'ctl')] + list(args),
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
2019-02-01 12:57:58 +00:00
#logger.debug('ctl_output%s -> %s [%s]', args, stdout, stderr)
2019-01-31 13:41:13 +00:00
return stdout.decode('utf-8').strip()
def user_sort_key(u):
return ox.sort_string(str(u.get('index', '')) + 'Z' + (u.get('name') or ''))
2016-02-10 14:02:32 +00:00
def get_peer(peerid):
import state
import library
if peerid not in state.peers:
state.peers[peerid] = library.Peer(peerid)
return state.peers[peerid]
2016-02-23 08:17:10 +00:00
def send_debug():
import settings
import tor_request
import gzip
import io
url = 'http://rnogx24drkbnrxa3.onion/debug'
headers = {
'User-Agent': settings.USER_AGENT,
}
debug_log = os.path.join(settings.data_path, 'debug.log')
last_debug = settings.server.get('last_debug')
old = last_debug is not None
2016-02-23 08:17:10 +00:00
try:
if os.path.exists(debug_log):
data = []
with open(debug_log, 'r') as fd:
2016-02-23 08:17:10 +00:00
for line in fd:
t = line.split(':DEBUG')[0]
if t.count('-') == 2:
timestamp = t
if old and timestamp > last_debug:
old = False
2016-02-23 08:17:10 +00:00
if not old:
data.append(line)
2016-02-27 07:06:58 +00:00
data = ''.join(data)
2016-02-23 08:17:10 +00:00
if data:
bytes_io = io.BytesIO()
gzip_file = gzip.GzipFile(fileobj=bytes_io, mode='wb')
gzip_file.write(data.encode())
2016-02-23 08:17:10 +00:00
gzip_file.close()
result = bytes_io.getvalue()
bytes_io.close()
opener = tor_request.get_opener()
opener.addheaders = list(zip(headers.keys(), headers.values()))
r = opener.open(url, result)
if r.status != 200:
2019-01-16 11:15:56 +00:00
logger.debug('failed to send debug information (server error)')
else:
settings.server['last_debug'] = timestamp
2016-02-23 08:17:10 +00:00
except:
2019-01-16 11:15:56 +00:00
logger.error('failed to send debug information (connection error)', exc_info=True)
def iexists(path):
parts = path.split(os.sep)
name = parts[-1].lower()
if len(parts) == 1:
folder = '.'
else:
folder = os.path.dirname(path)
2017-06-14 10:49:23 +00:00
try:
files = os.listdir(folder)
except FileNotFoundError:
return False
files = {os.path.basename(f).lower() for f in files}
return name in files
2019-01-14 15:02:34 +00:00
def same_path(f1, f2):
return unicodedata.normalize('NFC', f1) == unicodedata.normalize('NFC', f2)
2024-06-08 14:51:30 +00:00
def time_cache(max_age, maxsize=128, typed=False):
def _decorator(fn):
@functools.lru_cache(maxsize=maxsize, typed=typed)
def _new(*args, __time_salt, **kwargs):
return fn(*args, **kwargs)
@functools.wraps(fn)
def _wrapped(*args, **kwargs):
return _new(*args, **kwargs, __time_salt=int(time.time() / max_age))
return _wrapped
return _decorator
2024-06-08 14:53:02 +00:00
def migrate_userid(old_id, new_id):
from db import run_sql
import settings
statements = [
"UPDATE user SET id = '{nid}' WHERE id = '{oid}'",
"UPDATE list SET user_id = '{nid}' WHERE user_id = '{oid}'",
"UPDATE useritem SET user_id = '{nid}' WHERE user_id = '{oid}'",
"UPDATE changelog SET user_id = '{nid}' WHERE user_id = '{oid}'",
]
run_sql([
sql.format(oid=old_id, nid=new_id)
for sql in statements
])
for ext in ('log', 'db', 'json'):
old_file = os.path.join(settings.data_path, 'peers/%s.%s' % (old_id, ext))
new_file = os.path.join(settings.data_path, 'peers/%s.%s' % (new_id, ext))
if os.path.exists(old_file) and not os.path.exists(new_file):
os.rename(old_file, new_file)