minimal support for cbr/cbz documents

This commit is contained in:
j 2025-07-07 08:40:09 +01:00
commit 28f36879bc
14 changed files with 3312 additions and 6 deletions

106
pandora/document/cbr.py Normal file
View file

@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
import logging
import os
import zipfile
import ox
logger = logging.getLogger(__name__)
IMAGE_EXTENSIONS = ['.jpg', '.png', '.gif']
def filter_images(files):
return [f for f in files if os.path.splitext(f)[-1].lower() in IMAGE_EXTENSIONS]
def filter_folders(files):
out = []
for path in files:
if not [f for f in files if f.startswith(path + '/')]:
out.append(path)
return out
def detect_format(path):
with open(path, 'rb') as fd:
head = fd.read(10)
if head[:2] == b'PK':
return 'cbz'
if head[:3] == b'Rar':
return 'cbr'
logger.debug('unknown cbr/cbz file %s - %s', head, path)
return 'unknown'
def cover(path):
format = detect_format(path)
if format == 'cbz':
cover = cover_cbz(path)
elif format == 'cbr':
cover = cover_cbr(path)
else:
cover = None
return cover
def cover_cbr(path):
data = None
try:
from unrardll import names, extract_member
except:
logger.error('to extract covers from cbr files you have to install python3-unrardll: apt install python3-unrardll')
return data
try:
files = list(names(path))
files = filter_folders(files)
files = filter_images(files)
if files:
cover = ox.sorted_strings(files)[0]
filename, data = extract_member(path, lambda h: h['filename'] == cover)
except:
logger.debug('invalid cbr file %s', path)
data = None
return data
def cover_cbz(path):
data = None
logger.debug('cover %s', path)
data = None
try:
z = zipfile.ZipFile(path)
except zipfile.BadZipFile:
logger.debug('invalid cbz file %s', path)
return data
files = [f.filename for f in z.filelist]
files = filter_images(files)
if files:
cover = ox.sorted_strings(files)[0]
try:
data = z.read(cover)
except:
data = None
return data
def get_pages(path):
files = []
format = detect_format(path)
if format == 'cbz':
try:
z = zipfile.ZipFile(path)
except zipfile.BadZipFile:
logger.debug('invalid cbz file %s', path)
return data
files = [f.filename for f in z.filelist]
elif format == 'cbr':
try:
from unrar import rarfile
rar = rarfile.RarFile(path)
files = rar.namelist()
except:
pass
files = filter_images(files)
return len(files)
def info(path):
data = {}
data['title'] = os.path.splitext(os.path.basename(path))[0]
data['pages'] = get_pages(path)
return data

View file

@ -27,11 +27,12 @@ from archive.chunk import save_chunk
from user.models import Group
from user.utils import update_groups
from . import managers
from . import utils
from . import tasks
from . import cbr
from . import epub
from . import managers
from . import tasks
from . import txt
from . import utils
from .fulltext import FulltextMixin, FulltextPageMixin
User = get_user_model()
@ -182,6 +183,9 @@ class Document(models.Model, FulltextMixin):
elif self.extension == 'txt':
prefix = 4
value = self.pages
elif self.extension in ('cbr', 'cbz'):
prefix = 5
value = self.pages
elif self.extension == 'html':
prefix = 1
value = self.dimensions
@ -397,7 +401,13 @@ class Document(models.Model, FulltextMixin):
@property
def dimensions(self):
if self.extension in ('pdf', 'epub', 'txt'):
if self.extension in (
'cbr',
'cbz',
'epub',
'pdf',
'txt',
):
return self.pages
elif self.extension == 'html':
return len(self.data.get('text', '').split(' '))
@ -571,6 +581,15 @@ class Document(models.Model, FulltextMixin):
path = os.path.join(folder, '%dp%d,%s.jpg' % (size, page, ','.join(map(str, crop))))
if not os.path.exists(path):
resize_image(src, path, size=size)
elif self.extension in ('cbr', 'cbz'):
path = os.path.join(folder, '1024.jpg')
if os.path.exists(src) and not os.path.exists(path):
data = cbr.cover(src)
if data:
with open(path, "wb") as fd:
fd.write(data)
else:
return os.path.join(settings.STATIC_ROOT, 'png/document.png')
elif self.extension == 'epub':
path = os.path.join(folder, '1024.jpg')
if os.path.exists(src) and not os.path.exists(path):
@ -578,10 +597,14 @@ class Document(models.Model, FulltextMixin):
if data:
with open(path, "wb") as fd:
fd.write(data)
else:
return os.path.join(settings.STATIC_ROOT, 'png/document.png')
elif self.extension == 'txt':
path = os.path.join(folder, '1024.jpg')
if os.path.exists(src) and not os.path.exists(path):
txt.render(src, path)
if not os.path.exists(path):
return os.path.join(settings.STATIC_ROOT, 'png/document.png')
elif self.extension in ('jpg', 'png', 'gif', 'webp', 'heic', 'heif', 'cr2'):
if os.path.exists(src):
if size and page:
@ -625,6 +648,12 @@ class Document(models.Model, FulltextMixin):
self.width = -1
self.height = -1
self.pages = utils.pdfpages(self.file.path)
elif self.extension in ('cbr', 'cbz'):
from . import cbr
thumb = self.thumbnail(1024)
if thumb:
self.width, self.height = open_image_rgb(thumb).size
self.pages = cbr.get_pages(self.file.path)
elif self.extension == 'epub':
thumb = self.thumbnail(1024)
if thumb: