mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
More work on kepubify
This commit is contained in:
parent
9c5d1c0f4f
commit
e23dcffe43
@ -12,24 +12,30 @@
|
||||
# * Cover marking in the OPF
|
||||
# * Markup cleanup (remove various things that trip up the Kobo renderer)
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.oeb.base import XHTML, XPath, escape_cdata
|
||||
from calibre.ebooks.oeb.base import OEB_DOCS, XHTML, XPath, escape_cdata
|
||||
from calibre.ebooks.oeb.parse_utils import barename, merge_multiple_html_heads_and_bodies
|
||||
from calibre.ebooks.oeb.polish.container import get_container
|
||||
from calibre.ebooks.oeb.polish.cover import find_cover_image, find_cover_image3, find_cover_page
|
||||
from calibre.ebooks.oeb.polish.parsing import parse
|
||||
from calibre.ebooks.oeb.polish.tts import lang_for_elem
|
||||
from calibre.ebooks.oeb.polish.utils import extract, insert_self_closing
|
||||
from calibre.spell.break_iterator import sentence_positions
|
||||
from calibre.srv.render_book import Profiler, calculate_number_of_workers
|
||||
from calibre.utils.localization import canonicalize_lang, get_lang
|
||||
|
||||
KOBO_CSS_CLASS = 'kobostylehacks'
|
||||
OUTER_DIV_ID = 'book-columns'
|
||||
INNER_DIV_ID = 'book-inner'
|
||||
KOBO_SPAN_CLASS = 'koboSpan'
|
||||
DUMMY_TITLE_PAGE_NAME = 'kobo-title-page-generated-by-calibre'
|
||||
SKIPPED_TAGS = frozenset((
|
||||
'', 'script', 'style', 'atom', 'pre', 'audio', 'video', 'svg', 'math'
|
||||
))
|
||||
@ -224,6 +230,16 @@ def kepubify_html_data(raw: str | bytes, metadata_lang: str = 'en'):
|
||||
return root
|
||||
|
||||
|
||||
def kepubify_html_path(path: str, metadata_lang: str = 'en'):
|
||||
with open(path, 'r+b') as f:
|
||||
raw = f.read()
|
||||
root = kepubify_html_data(raw)
|
||||
raw = serialize_html(root)
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
f.write(raw)
|
||||
|
||||
|
||||
def is_probably_a_title_page(root):
|
||||
for title in XPath('//h:title')(root):
|
||||
if title.text:
|
||||
@ -259,7 +275,7 @@ def add_dummy_title_page(container, cover_image_name):
|
||||
__CONTENT__
|
||||
</div></div></body></html>
|
||||
'''
|
||||
titlepage_name = container.add_file('kobo-title-page-generated-by-calibre.html', modify_name_if_needed=True)
|
||||
titlepage_name = container.add_file(f'{DUMMY_TITLE_PAGE_NAME}.html', modify_name_if_needed=True)
|
||||
if cover_image_name:
|
||||
cover_href = container.name_to_href(cover_image_name, titlepage_name)
|
||||
html = html.replace('__CONTENT__', f'<img src="{cover_href}" alt="cover" style="height: 100%" />')
|
||||
@ -272,6 +288,15 @@ def add_dummy_title_page(container, cover_image_name):
|
||||
''')
|
||||
with container.open(titlepage_name, 'w') as f:
|
||||
f.write(html)
|
||||
container.apply_unique_properties(titlepage_name, 'calibre:title-page')
|
||||
|
||||
|
||||
def remove_dummy_title_page(container):
|
||||
for name, is_linear in container.spine_names():
|
||||
if is_linear:
|
||||
if DUMMY_TITLE_PAGE_NAME in name:
|
||||
container.remove_item(name)
|
||||
break
|
||||
|
||||
|
||||
def first_spine_item_is_probably_cover(container) -> bool:
|
||||
@ -285,10 +310,55 @@ def first_spine_item_is_probably_cover(container) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def kepubify_container(container):
|
||||
lang = container.mi.language
|
||||
def kepubify_container(container, max_workers=0):
|
||||
remove_dummy_title_page(container)
|
||||
metadata_lang = container.mi.language
|
||||
cover_image_name = find_cover_image(container) or find_cover_image3(container)
|
||||
if cover_image_name:
|
||||
container.apply_unique_properties(cover_image_name, 'cover-image')
|
||||
if not find_cover_page(container) and not first_spine_item_is_probably_cover(container):
|
||||
add_dummy_title_page(container, cover_image_name)
|
||||
names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS)
|
||||
num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
|
||||
paths = tuple(map(container.name_to_abspath, names_that_need_work))
|
||||
if num_workers < 2:
|
||||
for path in paths:
|
||||
kepubify_html_path(path, metadata_lang)
|
||||
else:
|
||||
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
||||
futures = tuple(executor.submit(kepubify_html_path, path, metadata_lang) for path in paths)
|
||||
for future in futures:
|
||||
future.result()
|
||||
|
||||
|
||||
def profile():
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
path = sys.argv[-1]
|
||||
with TemporaryDirectory() as tdir, Profiler():
|
||||
main(path, max_workers=1)
|
||||
|
||||
|
||||
def develop():
|
||||
from zipfile import ZipFile
|
||||
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
path = sys.argv[-1]
|
||||
with TemporaryDirectory() as tdir:
|
||||
outpath = main(path, max_workers=1)
|
||||
with ZipFile(outpath) as zf:
|
||||
zf.extractall(tdir)
|
||||
print('Extracted to:', tdir)
|
||||
input('Press Enter to quit')
|
||||
|
||||
|
||||
def main(path, max_workers=0):
|
||||
container = get_container(path, tweak_mode=True)
|
||||
kepubify_container(container, max_workers=max_workers)
|
||||
base, ext = os.path.splitext(path)
|
||||
outpath = base + '.kepub'
|
||||
container.commit(output=outpath)
|
||||
return outpath
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[-1])
|
||||
|
Loading…
x
Reference in New Issue
Block a user