mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on kepubify
This commit is contained in:
parent
42f4676771
commit
23323e3ecd
@ -16,7 +16,10 @@ import os
|
||||
import re
|
||||
import sys
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import NamedTuple
|
||||
|
||||
from css_parser import parseString as parse_css_string
|
||||
from css_parser.css import CSSRule
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
@ -45,15 +48,21 @@ BLOCK_TAGS = frozenset((
|
||||
KOBO_CSS = 'div#book-inner { margin-top: 0; margin-bottom: 0; }'
|
||||
|
||||
|
||||
class Options(NamedTuple):
|
||||
extra_css: str = KOBO_CSS
|
||||
remove_widows_and_orphans: bool = False
|
||||
remove_at_page_rules: bool = False
|
||||
|
||||
|
||||
def outer_html(node):
|
||||
return etree.tostring(node, encoding='unicode', with_tail=False)
|
||||
|
||||
|
||||
def add_style(root, css=KOBO_CSS, cls=KOBO_CSS_CLASS) -> bool:
|
||||
def add_style(root, opts: Options, cls=KOBO_CSS_CLASS) -> bool:
|
||||
|
||||
def add(parent):
|
||||
e = parent.makeelement(XHTML('style'), type='text/css')
|
||||
e.text = css
|
||||
e.text = opts.extra_css
|
||||
e.set('class', cls)
|
||||
insert_self_closing(parent, e)
|
||||
|
||||
@ -196,9 +205,9 @@ def remove_kobo_spans(body: etree.Element) -> bool:
|
||||
return found
|
||||
|
||||
|
||||
def add_kobo_markup_to_html(root, metadata_lang):
|
||||
def add_kobo_markup_to_html(root, opts, metadata_lang):
|
||||
root_lang = canonicalize_lang(lang_for_elem(root, canonicalize_lang(metadata_lang or get_lang())) or 'en')
|
||||
add_style(root)
|
||||
add_style(root, opts)
|
||||
for body in XPath('./h:body')(root):
|
||||
inner = wrap_body_contents(body)
|
||||
add_kobo_spans(inner, lang_for_elem(body, root_lang))
|
||||
@ -218,22 +227,22 @@ def serialize_html(root) -> bytes:
|
||||
return b"<?xml version='1.0' encoding='utf-8'?>\n" + ans.encode('utf-8')
|
||||
|
||||
|
||||
def kepubify_parsed_html(root, metadata_lang: str = 'en'):
|
||||
def kepubify_parsed_html(root, opts: Options, metadata_lang: str = 'en'):
|
||||
remove_kobo_markup_from_html(root)
|
||||
merge_multiple_html_heads_and_bodies(root)
|
||||
add_kobo_markup_to_html(root, metadata_lang)
|
||||
add_kobo_markup_to_html(root, opts, metadata_lang)
|
||||
|
||||
|
||||
def kepubify_html_data(raw: str | bytes, metadata_lang: str = 'en'):
|
||||
def kepubify_html_data(raw: str | bytes, opts: Options = Options(), metadata_lang: str = 'en'):
|
||||
root = parse(raw)
|
||||
kepubify_parsed_html(root, metadata_lang)
|
||||
kepubify_parsed_html(root, opts, metadata_lang)
|
||||
return root
|
||||
|
||||
|
||||
def kepubify_html_path(path: str, metadata_lang: str = 'en'):
|
||||
def kepubify_html_path(path: str, metadata_lang: str = 'en', opts: Options = Options()):
|
||||
with open(path, 'r+b') as f:
|
||||
raw = f.read()
|
||||
root = kepubify_html_data(raw)
|
||||
root = kepubify_html_data(raw, opts, metadata_lang)
|
||||
raw = serialize_html(root)
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
@ -310,7 +319,7 @@ def first_spine_item_is_probably_title_page(container: Container) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def kepubify_container(container: Container, max_workers=0):
|
||||
def kepubify_container(container: Container, opts: Options, max_workers: int = 0) -> None:
|
||||
remove_dummy_title_page(container)
|
||||
metadata_lang = container.mi.language
|
||||
cover_image_name = find_cover_image(container) or find_cover_image3(container)
|
||||
@ -323,17 +332,17 @@ def kepubify_container(container: Container, max_workers=0):
|
||||
paths = tuple(map(container.name_to_abspath, names_that_need_work))
|
||||
if num_workers < 2:
|
||||
for path in paths:
|
||||
kepubify_html_path(path, metadata_lang)
|
||||
kepubify_html_path(path, metadata_lang, opts)
|
||||
else:
|
||||
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
||||
futures = tuple(executor.submit(kepubify_html_path, path, metadata_lang) for path in paths)
|
||||
futures = tuple(executor.submit(kepubify_html_path, path, metadata_lang, opts) for path in paths)
|
||||
for future in futures:
|
||||
future.result()
|
||||
|
||||
|
||||
def kepubify_path(path, outpath='', max_workers=0, allow_overwrite=False):
|
||||
def kepubify_path(path, outpath='', max_workers=0, allow_overwrite=False, opts: Options = Options()):
|
||||
container = get_container(path, tweak_mode=True)
|
||||
kepubify_container(container, max_workers=max_workers)
|
||||
kepubify_container(container, opts, max_workers=max_workers)
|
||||
base, ext = os.path.splitext(path)
|
||||
outpath = outpath or base + '.kepub'
|
||||
c = 0
|
||||
@ -344,6 +353,70 @@ def kepubify_path(path, outpath='', max_workers=0, allow_overwrite=False):
|
||||
return outpath
|
||||
|
||||
|
||||
def make_options(
|
||||
extra_css: str = '',
|
||||
affect_hyphenation: bool = False,
|
||||
disable_hyphenation: bool = False,
|
||||
hyphenation_min_chars: int = 6,
|
||||
hyphenation_min_chars_before: int = 3,
|
||||
hyphenation_min_chars_after: int = 3,
|
||||
hyphenation_limit_lines: int = 2,
|
||||
) -> Options:
|
||||
remove_widows_and_orphans = remove_at_page_rules = False
|
||||
if extra_css:
|
||||
sheet = parse_css_string(extra_css)
|
||||
for rule in sheet.cssRules:
|
||||
if rule.type == CSSRule.PAGE_RULE:
|
||||
remove_at_page_rules = True
|
||||
elif rule.type == CSSRule.STYLE_RULE:
|
||||
if rule.style['widows'] or rule.style['orphans']:
|
||||
remove_widows_and_orphans = True
|
||||
if remove_widows_and_orphans and remove_at_page_rules:
|
||||
break
|
||||
hyphen_css = ''
|
||||
if affect_hyphenation:
|
||||
if disable_hyphenation:
|
||||
hyphen_css = '''
|
||||
* {
|
||||
-webkit-hyphens: none !important;
|
||||
hyphens: none !important;
|
||||
}
|
||||
'''
|
||||
elif hyphenation_min_chars > 0:
|
||||
hyphen_css = f'''
|
||||
* {{
|
||||
/* Vendor-prefixed CSS properties for hyphenation. Keep -webkit first since
|
||||
* some user agents also recognize -webkit properties and will apply them.
|
||||
*/
|
||||
-webkit-hyphens: auto;
|
||||
-webkit-hyphenate-limit-after: {hyphenation_min_chars_after};
|
||||
-webkit-hyphenate-limit-before: {hyphenation_min_chars_before};
|
||||
-webkit-hyphenate-limit-chars: {hyphenation_min_chars} {hyphenation_min_chars_before} {hyphenation_min_chars_after};
|
||||
-webkit-hyphenate-limit-lines: {hyphenation_limit_lines};
|
||||
|
||||
/* CSS4 standard properties for hyphenation. If a property isn't represented
|
||||
* in the standard, don't put a vendor-prefixed property for it above.
|
||||
*/
|
||||
hyphens: auto;
|
||||
hyphenate-limit-chars: {hyphenation_min_chars} {hyphenation_min_chars_before} {hyphenation_min_chars_after};
|
||||
hyphenate-limit-lines: {hyphenation_limit_lines};
|
||||
hyphenate-limit-last: page;
|
||||
}}
|
||||
|
||||
h1, h2, h3, h4, h5, h6, td {{
|
||||
-webkit-hyphens: none !important;
|
||||
hyphens: none !important;
|
||||
}}
|
||||
'''
|
||||
if extra_css:
|
||||
extra_css = KOBO_CSS + '\n\n' + extra_css
|
||||
else:
|
||||
extra_css = KOBO_CSS
|
||||
if hyphen_css:
|
||||
extra_css += '\n\n' + hyphen_css
|
||||
return Options(extra_css=extra_css, remove_widows_and_orphans=remove_widows_and_orphans, remove_at_page_rules=remove_at_page_rules)
|
||||
|
||||
|
||||
def profile():
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
path = sys.argv[-1]
|
||||
|
Loading…
x
Reference in New Issue
Block a user