mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
More work on kepubify
This commit is contained in:
parent
9c5d1c0f4f
commit
e23dcffe43
@ -12,24 +12,30 @@
|
|||||||
# * Cover marking in the OPF
|
# * Cover marking in the OPF
|
||||||
# * Markup cleanup (remove various things that trip up the Kobo renderer)
|
# * Markup cleanup (remove various things that trip up the Kobo renderer)
|
||||||
|
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre.ebooks.oeb.base import XHTML, XPath, escape_cdata
|
from calibre.ebooks.oeb.base import OEB_DOCS, XHTML, XPath, escape_cdata
|
||||||
from calibre.ebooks.oeb.parse_utils import barename, merge_multiple_html_heads_and_bodies
|
from calibre.ebooks.oeb.parse_utils import barename, merge_multiple_html_heads_and_bodies
|
||||||
|
from calibre.ebooks.oeb.polish.container import get_container
|
||||||
from calibre.ebooks.oeb.polish.cover import find_cover_image, find_cover_image3, find_cover_page
|
from calibre.ebooks.oeb.polish.cover import find_cover_image, find_cover_image3, find_cover_page
|
||||||
from calibre.ebooks.oeb.polish.parsing import parse
|
from calibre.ebooks.oeb.polish.parsing import parse
|
||||||
from calibre.ebooks.oeb.polish.tts import lang_for_elem
|
from calibre.ebooks.oeb.polish.tts import lang_for_elem
|
||||||
from calibre.ebooks.oeb.polish.utils import extract, insert_self_closing
|
from calibre.ebooks.oeb.polish.utils import extract, insert_self_closing
|
||||||
from calibre.spell.break_iterator import sentence_positions
|
from calibre.spell.break_iterator import sentence_positions
|
||||||
|
from calibre.srv.render_book import Profiler, calculate_number_of_workers
|
||||||
from calibre.utils.localization import canonicalize_lang, get_lang
|
from calibre.utils.localization import canonicalize_lang, get_lang
|
||||||
|
|
||||||
KOBO_CSS_CLASS = 'kobostylehacks'
|
KOBO_CSS_CLASS = 'kobostylehacks'
|
||||||
OUTER_DIV_ID = 'book-columns'
|
OUTER_DIV_ID = 'book-columns'
|
||||||
INNER_DIV_ID = 'book-inner'
|
INNER_DIV_ID = 'book-inner'
|
||||||
KOBO_SPAN_CLASS = 'koboSpan'
|
KOBO_SPAN_CLASS = 'koboSpan'
|
||||||
|
DUMMY_TITLE_PAGE_NAME = 'kobo-title-page-generated-by-calibre'
|
||||||
SKIPPED_TAGS = frozenset((
|
SKIPPED_TAGS = frozenset((
|
||||||
'', 'script', 'style', 'atom', 'pre', 'audio', 'video', 'svg', 'math'
|
'', 'script', 'style', 'atom', 'pre', 'audio', 'video', 'svg', 'math'
|
||||||
))
|
))
|
||||||
@ -224,6 +230,16 @@ def kepubify_html_data(raw: str | bytes, metadata_lang: str = 'en'):
|
|||||||
return root
|
return root
|
||||||
|
|
||||||
|
|
||||||
|
def kepubify_html_path(path: str, metadata_lang: str = 'en'):
|
||||||
|
with open(path, 'r+b') as f:
|
||||||
|
raw = f.read()
|
||||||
|
root = kepubify_html_data(raw)
|
||||||
|
raw = serialize_html(root)
|
||||||
|
f.seek(0)
|
||||||
|
f.truncate()
|
||||||
|
f.write(raw)
|
||||||
|
|
||||||
|
|
||||||
def is_probably_a_title_page(root):
|
def is_probably_a_title_page(root):
|
||||||
for title in XPath('//h:title')(root):
|
for title in XPath('//h:title')(root):
|
||||||
if title.text:
|
if title.text:
|
||||||
@ -259,7 +275,7 @@ def add_dummy_title_page(container, cover_image_name):
|
|||||||
__CONTENT__
|
__CONTENT__
|
||||||
</div></div></body></html>
|
</div></div></body></html>
|
||||||
'''
|
'''
|
||||||
titlepage_name = container.add_file('kobo-title-page-generated-by-calibre.html', modify_name_if_needed=True)
|
titlepage_name = container.add_file(f'{DUMMY_TITLE_PAGE_NAME}.html', modify_name_if_needed=True)
|
||||||
if cover_image_name:
|
if cover_image_name:
|
||||||
cover_href = container.name_to_href(cover_image_name, titlepage_name)
|
cover_href = container.name_to_href(cover_image_name, titlepage_name)
|
||||||
html = html.replace('__CONTENT__', f'<img src="{cover_href}" alt="cover" style="height: 100%" />')
|
html = html.replace('__CONTENT__', f'<img src="{cover_href}" alt="cover" style="height: 100%" />')
|
||||||
@ -272,6 +288,15 @@ def add_dummy_title_page(container, cover_image_name):
|
|||||||
''')
|
''')
|
||||||
with container.open(titlepage_name, 'w') as f:
|
with container.open(titlepage_name, 'w') as f:
|
||||||
f.write(html)
|
f.write(html)
|
||||||
|
container.apply_unique_properties(titlepage_name, 'calibre:title-page')
|
||||||
|
|
||||||
|
|
||||||
|
def remove_dummy_title_page(container):
|
||||||
|
for name, is_linear in container.spine_names():
|
||||||
|
if is_linear:
|
||||||
|
if DUMMY_TITLE_PAGE_NAME in name:
|
||||||
|
container.remove_item(name)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
def first_spine_item_is_probably_cover(container) -> bool:
|
def first_spine_item_is_probably_cover(container) -> bool:
|
||||||
@ -285,10 +310,55 @@ def first_spine_item_is_probably_cover(container) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def kepubify_container(container):
|
def kepubify_container(container, max_workers=0):
|
||||||
lang = container.mi.language
|
remove_dummy_title_page(container)
|
||||||
|
metadata_lang = container.mi.language
|
||||||
cover_image_name = find_cover_image(container) or find_cover_image3(container)
|
cover_image_name = find_cover_image(container) or find_cover_image3(container)
|
||||||
if cover_image_name:
|
if cover_image_name:
|
||||||
container.apply_unique_properties(cover_image_name, 'cover-image')
|
container.apply_unique_properties(cover_image_name, 'cover-image')
|
||||||
if not find_cover_page(container) and not first_spine_item_is_probably_cover(container):
|
if not find_cover_page(container) and not first_spine_item_is_probably_cover(container):
|
||||||
add_dummy_title_page(container, cover_image_name)
|
add_dummy_title_page(container, cover_image_name)
|
||||||
|
names_that_need_work = tuple(name for name, mt in container.mime_map.items() if mt in OEB_DOCS)
|
||||||
|
num_workers = calculate_number_of_workers(names_that_need_work, container, max_workers)
|
||||||
|
paths = tuple(map(container.name_to_abspath, names_that_need_work))
|
||||||
|
if num_workers < 2:
|
||||||
|
for path in paths:
|
||||||
|
kepubify_html_path(path, metadata_lang)
|
||||||
|
else:
|
||||||
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
||||||
|
futures = tuple(executor.submit(kepubify_html_path, path, metadata_lang) for path in paths)
|
||||||
|
for future in futures:
|
||||||
|
future.result()
|
||||||
|
|
||||||
|
|
||||||
|
def profile():
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
path = sys.argv[-1]
|
||||||
|
with TemporaryDirectory() as tdir, Profiler():
|
||||||
|
main(path, max_workers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def develop():
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
path = sys.argv[-1]
|
||||||
|
with TemporaryDirectory() as tdir:
|
||||||
|
outpath = main(path, max_workers=1)
|
||||||
|
with ZipFile(outpath) as zf:
|
||||||
|
zf.extractall(tdir)
|
||||||
|
print('Extracted to:', tdir)
|
||||||
|
input('Press Enter to quit')
|
||||||
|
|
||||||
|
|
||||||
|
def main(path, max_workers=0):
|
||||||
|
container = get_container(path, tweak_mode=True)
|
||||||
|
kepubify_container(container, max_workers=max_workers)
|
||||||
|
base, ext = os.path.splitext(path)
|
||||||
|
outpath = base + '.kepub'
|
||||||
|
container.commit(output=outpath)
|
||||||
|
return outpath
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[-1])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user