openmedialibrary/oml/utils.py

598 lines
18 KiB
Python

# -*- coding: utf-8 -*-
from datetime import datetime
from io import StringIO, BytesIO
from PIL import Image, ImageFile
import base64
import functools
import hashlib
import json
import os
import re
import socket
import stdnum.isbn
import subprocess
import sys
import time
import unicodedata
import ox
import OpenSSL.crypto
from OpenSSL.crypto import (
dump_certificate,
dump_privatekey,
FILETYPE_PEM,
load_certificate,
load_privatekey,
PKey,
TYPE_RSA,
X509,
X509Extension
)
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519
from meta.utils import normalize_isbn, find_isbns, get_language, to_isbn13
from win32utils import get_short_path_name
import logging
logging.getLogger('PIL').setLevel(logging.ERROR)
logger = logging.getLogger(__name__)
ImageFile.LOAD_TRUNCATED_IMAGES = True
ENCODING = 'base64'
def valid_olid(id):
return id.startswith('OL') and id.endswith('M')
def get_positions(ids, pos):
'''
>>> get_positions([1,2,3,4], [2,4])
{2: 1, 4: 3}
'''
positions = {}
for i in pos:
try:
positions[i] = ids.index(i)
except:
pass
return positions
def get_by_key(objects, key, value):
obj = [o for o in objects if o.get(key) == value]
return obj and obj[0] or None
def get_by_id(objects, id):
return get_by_key(objects, 'id', id)
def is_svg(data):
return data and b'<svg' in data[:256]
def resize_image(data, width=None, size=None):
if isinstance(data, bytes):
data = BytesIO(data)
else:
data = StringIO(data)
source = Image.open(data)
#if source.mode not in ('1', 'CMYK', 'L', 'RGB', 'RGBA', 'RGBX', 'YCbCr'):
if source.mode != 'RGB':
source = source.convert('RGB')
source_width = source.size[0]
source_height = source.size[1]
if size:
if source_width > source_height:
width = size
height = int(width / (float(source_width) / source_height))
height = height - height % 2
else:
height = size
width = int(height * (float(source_width) / source_height))
width = width - width % 2
else:
height = int(width / (float(source_width) / source_height))
height = height - height % 2
width = max(width, 1)
height = max(height, 1)
if width < source_width:
resize_method = Image.LANCZOS
else:
resize_method = Image.BICUBIC
output = source.resize((width, height), resize_method)
o = BytesIO()
output.save(o, format='jpeg')
data = o.getvalue()
o.close()
return data
def sort_title(title):
title = title.replace('Æ', 'Ae')
if isinstance(title, str):
title = str(title)
title = ox.sort_string(title)
#title
title = re.sub('[\'!¿¡,\.;\-"\:\*\[\]]', '', title)
return title.strip()
def get_position_by_id(list, key):
for i in range(0, len(list)):
if list[i]['id'] == key:
return i
return -1
def sign_cert(cert, key):
# pyOpenSSL sgin api does not allow NULL hash
# return cert.sign(key, None)
return OpenSSL.crypto._lib.X509_sign(cert._x509, key._pkey, OpenSSL.crypto._ffi.NULL)
def load_pem_key(pem):
with open(pem) as fd:
ca_key_pem = fd.read()
key = load_privatekey(FILETYPE_PEM, ca_key_pem)
if key.bits() != 256:
raise Exception("Invalid key %s" % pem)
key = key.to_cryptography_key()
private_key = key.private_bytes_raw()
public_key = key.public_key().public_bytes_raw()
return private_key, public_key
def expand_private_key(secret_key) -> bytes:
hash = hashlib.sha512(secret_key).digest()
hash = bytearray(hash)
hash[0] &= 248
hash[31] &= 127
hash[31] |= 64
return bytes(hash)
def get_onion(pubkey):
version_byte = b"\x03"
checksum_str = ".onion checksum".encode()
checksum = hashlib.sha3_256(checksum_str + pubkey + version_byte).digest()[:2]
return base64.b32encode(pubkey + checksum + version_byte).decode().lower()
def get_onion_key(private_key):
onion_key = expand_private_key(private_key)
key_type = 'ED25519-V3'
key_content = base64.encodebytes(onion_key).decode().strip().replace('\n', '')
return key_type, key_content
def get_user_id(key_path, cert_path, ca_key_path, ca_cert_path):
if os.path.exists(ca_key_path):
try:
private_key, public_key = load_pem_key(ca_key_path)
except:
os.unlink(ca_key_path)
else:
user_id = get_onion(public_key)
if not os.path.exists(ca_key_path):
private_key = ed25519.Ed25519PrivateKey.generate()
private_bytes = private_key.private_bytes(
encoding=serialization.Encoding.PEM,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption()
)
with open(ca_key_path, 'wb') as fd:
fd.write(private_bytes)
public_key = private_key.public_key().public_bytes_raw()
user_id = get_onion(public_key)
if not os.path.exists(ca_cert_path) or \
(datetime.now() - datetime.fromtimestamp(os.path.getmtime(ca_cert_path))).days > 5*365:
with open(ca_key_path, 'rb') as key_file:
key_data = key_file.read()
cakey = load_privatekey(FILETYPE_PEM, key_data)
ca = X509()
ca.set_version(2)
ca.set_serial_number(1)
ca.get_subject().CN = user_id
ca.gmtime_adj_notBefore(0)
ca.gmtime_adj_notAfter(10 * 356 * 24 * 60 * 60)
ca.set_issuer(ca.get_subject())
ca.set_pubkey(cakey)
ca.add_extensions([
X509Extension(b"basicConstraints", False, b"CA:TRUE"),
X509Extension(b"keyUsage", False, b"keyCertSign, cRLSign"),
X509Extension(
b"subjectKeyIdentifier", False, b"hash", subject=ca
),
])
ca.add_extensions([
X509Extension(
b"authorityKeyIdentifier", False, b"keyid:always", issuer=ca
)
])
sign_cert(ca, cakey)
with open(ca_cert_path, 'wb') as fd:
fd.write(dump_certificate(FILETYPE_PEM, ca))
if os.path.exists(cert_path):
os.unlink(cert_path)
if os.path.exists(key_path):
os.unlink(key_path)
else:
with open(ca_cert_path) as fd:
ca = load_certificate(FILETYPE_PEM, fd.read())
with open(ca_key_path) as fd:
cakey = load_privatekey(FILETYPE_PEM, fd.read())
# create RSA intermediate certificate since clients don't quite like Ed25519 yet
if not os.path.exists(cert_path) or \
(datetime.now() - datetime.fromtimestamp(os.path.getmtime(cert_path))).days > 60:
key = PKey()
key.generate_key(TYPE_RSA, 2048)
cert = X509()
cert.set_version(2)
cert.set_serial_number(2)
cert.get_subject().CN = user_id + ".onion"
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(90 * 24 * 60 * 60)
cert.set_issuer(ca.get_subject())
cert.set_pubkey(key)
subject_alt_names = b"DNS: %s.onion" % user_id.encode()
cert.add_extensions([
X509Extension(b"basicConstraints", True, b"CA:FALSE"),
X509Extension(b"extendedKeyUsage", True,
b"serverAuth,clientAuth,emailProtection,timeStamping,msCodeInd,msCodeCom,msCTLSign,msSGC,msEFS,nsSGC"),
X509Extension(b"keyUsage", False, b"keyCertSign, cRLSign"),
X509Extension(b"subjectKeyIdentifier", False, b"hash", subject=ca),
X509Extension(b"subjectAltName", critical=True, value=subject_alt_names),
])
sign_cert(cert, cakey)
with open(cert_path, 'wb') as fd:
fd.write(dump_certificate(FILETYPE_PEM, cert))
fd.write(dump_certificate(FILETYPE_PEM, ca))
with open(key_path, 'wb') as fd:
fd.write(dump_privatekey(FILETYPE_PEM, key))
return user_id
def get_service_id(private_key_file=None, cert=None):
'''
service_id is the first half of the sha1 of the rsa public key encoded in base32
'''
if private_key_file:
with open(private_key_file, 'rb') as key_file:
key_type, key_content = key_file.read().split(b':', 1)
private_key = base64.decodebytes(key_content)
public_key = Ed25519().public_key_from_hash(private_key)
service_id = get_onion(public_key)
elif cert:
cert_ = load_certificate(FILETYPE_PEM, cert)
key = cert_.get_pubkey()
public_key = key.to_cryptography_key().public_bytes_raw()
service_id = get_onion(public_key)
else:
service_id = None
return service_id
def update_dict(root, data):
for key in data:
keys = [part.replace('\0', '.') for part in key.replace('\\.', '\0').split('.')]
value = data[key]
p = root
while len(keys) > 1:
key = keys.pop(0)
if isinstance(p, list):
p = p[get_position_by_id(p, key)]
else:
if key not in p:
p[key] = {}
p = p[key]
if value is None and keys[0] in p:
del p[keys[0]]
else:
p[keys[0]] = value
if hasattr(root, '_save'):
root._save()
def remove_empty_folders(prefix, keep_root=False):
empty = []
for root, folders, files in os.walk(prefix):
if len(files) == 1 and files[0] == '.DS_Store':
os.unlink(os.path.join(root, files[0]))
files = []
if not folders and not files:
if root != prefix or not keep_root:
empty.append(root)
for folder in empty:
remove_empty_tree(folder)
def remove_empty_tree(leaf):
while leaf:
if not os.path.exists(leaf):
leaf = os.path.dirname(leaf)
elif os.path.isdir(leaf) and not os.listdir(leaf):
logger.debug('rmdir %s', leaf)
os.rmdir(leaf)
else:
break
try:
utc_0 = int(time.mktime(datetime(1970, 1, 1).timetuple()))
except:
utc_0 = int(time.mktime(time.gmtime()) - time.mktime(time.localtime()))
def datetime2ts(dt):
return int(time.mktime(dt.utctimetuple())) - utc_0
def ts2datetime(ts):
return datetime.utcfromtimestamp(float(ts))
def run(*cmd):
p = subprocess.Popen(cmd, close_fds=True)
p.wait()
return p.returncode
def get(*cmd):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, error = p.communicate()
return stdout.decode()
def makefolder(path):
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
def open_file(path=None):
cmd = []
if sys.platform == 'darwin':
cmd += ['open', path]
elif sys.platform.startswith('linux'):
if os.path.exists('/usr/bin/gio'):
cmd += ['gio', 'open', path]
else:
cmd += ['xdg-open', path]
elif sys.platform == 'win32':
path = '\\'.join(path.split('/'))
os.startfile(path)
cmd = []
else:
logger.debug('unsupported platform %s', sys.platform)
if cmd:
subprocess.Popen(cmd, close_fds=True)
def open_folder(folder=None, path=None):
cmd = []
if path and not folder:
folder = os.path.dirname(path)
if folder and not path:
path = folder
if sys.platform == 'darwin':
if folder and not path:
path = folder
cmd += ['open', '-R', path]
elif sys.platform.startswith('linux'):
if os.path.exists('/usr/bin/gio'):
cmd += ['gio', 'open', folder]
else:
cmd += ['xdg-open', folder]
elif sys.platform == 'win32':
path = '\\'.join(path.split('/'))
cmd = 'explorer.exe /select,"%s"' % path
else:
logger.debug('unsupported platform %s', sys.platform)
if cmd:
subprocess.Popen(cmd, close_fds=True)
def can_connect_dns(host="8.8.8.8", port=53):
"""
host: 8.8.8.8 (google-public-dns-a.google.com)
port: 53/tcp
"""
import socks
import state
try:
sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM, 6)
sock.settimeout(2)
socks_port = state.tor.socks_port if state.tor else 9150
sock.set_proxy(socks.SOCKS5, "localhost", socks_port, True)
sock.connect((host, port))
return True
except:
#logger.debug('failed to connect', exc_info=True)
pass
return False
def _to_json(python_object):
if isinstance(python_object, datetime):
if python_object.year < 1900:
tt = python_object.timetuple()
return '%d-%02d-%02dT%02d:%02d%02dZ' % tuple(list(tt)[:6])
return python_object.strftime('%Y-%m-%dT%H:%M:%SZ')
raise TypeError(u'%s %s is not JSON serializable' % (repr(python_object), type(python_object)))
def get_ratio(data):
try:
img = Image.open(BytesIO(data))
return img.size[0]/img.size[1]
except:
return 1
def get_meta_hash(data):
data = data.copy()
if 'sharemetadata' in data:
del data['sharemetadata']
for key in list(data):
if not data[key]:
del data[key]
return hashlib.sha1(json.dumps(data,
ensure_ascii=False, sort_keys=True).encode()).hexdigest()
def update_static():
import settings
import os
import ox
path = os.path.join(settings.static_path, 'js')
files = sorted([
file for file in os.listdir(path)
if not file.startswith('.')
and not file.startswith('oml.')
])
ox.file.write_json(os.path.join(settings.static_path, 'json', 'js.json'), files, indent=4)
ox.file.write_file(
os.path.join(path, 'oml.min.js'),
'\n'.join([
ox.js.minify(ox.file.read_file(os.path.join(path, file)).decode('utf-8'))
for file in files
])
)
def check_pid(pid):
if sys.platform == 'win32':
import ctypes
kernel32 = ctypes.windll.kernel32
SYNCHRONIZE = 0x100000
process = kernel32.OpenProcess(SYNCHRONIZE, 0, pid)
if process != 0:
kernel32.CloseHandle(process)
return True
else:
return False
else:
try:
os.kill(pid, 0)
except:
return False
else:
return True
def check_pidfile(pid):
try:
with open(pid) as fd:
pid = int(fd.read())
except:
return False
return check_pid(pid)
def ctl(*args):
import settings
if sys.platform == 'win32':
platform_win32 = os.path.normpath(os.path.join(settings.base_dir, '..', 'platform_win32'))
python = os.path.join(platform_win32, 'pythonw.exe')
cmd = [python, 'oml'] + list(args)
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = subprocess.SW_HIDE
subprocess.Popen(cmd, cwd=settings.base_dir, start_new_session=True, startupinfo=startupinfo)
else:
subprocess.Popen([os.path.join(settings.base_dir, 'ctl')] + list(args),
close_fds=True, start_new_session=True)
def ctl_output(*args):
import settings
if sys.platform == 'win32':
platform_win32 = os.path.join('..', 'platform_win32')
python = os.path.join(platform_win32, 'python.exe')
cmd = [python, 'oml'] + list(args)
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = subprocess.SW_HIDE
p = subprocess.Popen(cmd, cwd=settings.base_dir, startupinfo=startupinfo,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
p = subprocess.Popen([os.path.join(settings.base_dir, 'ctl')] + list(args),
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
#logger.debug('ctl_output%s -> %s [%s]', args, stdout, stderr)
return stdout.decode('utf-8').strip()
def user_sort_key(u):
return ox.sort_string(str(u.get('index', '')) + 'Z' + (u.get('name') or ''))
def get_peer(peerid):
import state
import library
if peerid not in state.peers:
state.peers[peerid] = library.Peer(peerid)
return state.peers[peerid]
def send_debug():
import settings
import tor_request
import gzip
import io
url = 'http://rnogx24drkbnrxa3.onion/debug'
headers = {
'User-Agent': settings.USER_AGENT,
}
debug_log = os.path.join(settings.data_path, 'debug.log')
last_debug = settings.server.get('last_debug')
old = last_debug is not None
try:
if os.path.exists(debug_log):
data = []
with open(debug_log, 'r') as fd:
for line in fd:
t = line.split(':DEBUG')[0]
if t.count('-') == 2:
timestamp = t
if old and timestamp > last_debug:
old = False
if not old:
data.append(line)
data = ''.join(data)
if data:
bytes_io = io.BytesIO()
gzip_file = gzip.GzipFile(fileobj=bytes_io, mode='wb')
gzip_file.write(data.encode())
gzip_file.close()
result = bytes_io.getvalue()
bytes_io.close()
opener = tor_request.get_opener()
opener.addheaders = list(zip(headers.keys(), headers.values()))
r = opener.open(url, result)
if r.status != 200:
logger.debug('failed to send debug information (server error)')
else:
settings.server['last_debug'] = timestamp
except:
logger.error('failed to send debug information (connection error)', exc_info=True)
def iexists(path):
parts = path.split(os.sep)
name = parts[-1].lower()
if len(parts) == 1:
folder = '.'
else:
folder = os.path.dirname(path)
try:
files = os.listdir(folder)
except FileNotFoundError:
return False
files = {os.path.basename(f).lower() for f in files}
return name in files
def same_path(f1, f2):
return unicodedata.normalize('NFC', f1) == unicodedata.normalize('NFC', f2)
def time_cache(max_age, maxsize=128, typed=False):
def _decorator(fn):
@functools.lru_cache(maxsize=maxsize, typed=typed)
def _new(*args, __time_salt, **kwargs):
return fn(*args, **kwargs)
@functools.wraps(fn)
def _wrapped(*args, **kwargs):
return _new(*args, **kwargs, __time_salt=int(time.time() / max_age))
return _wrapped
return _decorator