diff --git a/resources/mime.types b/resources/mime.types index 45caa946f5..e1d47fd361 100644 --- a/resources/mime.types +++ b/resources/mime.types @@ -172,8 +172,8 @@ application/simple-filter+xml application/simple-message-summary application/simplesymbolcontainer application/slate -application/smil smi smil -application/smil+xml smi smil +# application/smil obsoleted by smil+xml +application/smil+xml smil smi sml application/soap+fastinfoset application/soap+xml application/sparql-query rq diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 63f9687763..133dc7ebe1 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -54,13 +54,14 @@ RE_NS = 'http://exslt.org/regular-expressions' MBP_NS = 'http://www.mobipocket.com' EPUB_NS = 'http://www.idpf.org/2007/ops' MATHML_NS = 'http://www.w3.org/1998/Math/MathML' +SMIL_NS = 'http://www.w3.org/ns/SMIL' XPNSMAP = { - 'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS, 'd09': DC09_NS, - 'd10': DC10_NS, 'd11': DC11_NS, 'xsi': XSI_NS, 'dt': DCTERMS_NS, - 'ncx': NCX_NS, 'svg': SVG_NS, 'xl': XLINK_NS, 're': RE_NS, - 'mathml': MATHML_NS, 'mbp': MBP_NS, 'calibre': CALIBRE_NS, - 'epub':EPUB_NS + 'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS, 'd09': DC09_NS, + 'd10': DC10_NS, 'd11': DC11_NS, 'xsi': XSI_NS, 'dt': DCTERMS_NS, + 'ncx': NCX_NS, 'svg': SVG_NS, 'xl': XLINK_NS, 're': RE_NS, + 'mathml': MATHML_NS, 'mbp': MBP_NS, 'calibre': CALIBRE_NS, + 'epub':EPUB_NS, 'smil': SMIL_NS, } OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS} @@ -100,6 +101,14 @@ def XLINK(name): return f'{{{XLINK_NS}}}{name}' +def SMIL(name): + return f'{{{SMIL_NS}}}{name}' + + +def EPUB(name): + return f'{{{EPUB_NS}}}{name}' + + def CALIBRE(name): return f'{{{CALIBRE_NS}}}{name}' diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index 807312a234..5e2f4180cd 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -18,13 +18,13 @@ from calibre import detect_ncpus, force_unicode, prepare_string_for_xml from calibre.constants import iswindows from calibre.customize.ui import plugin_for_input_format from calibre.ebooks.oeb.base import ( - OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, - rewrite_links, urlunquote + EPUB, OEB_DOCS, OEB_STYLES, OPF, SMIL, XHTML, XHTML_NS, XLINK, XPath as _XPath, + rewrite_links, urlunquote, ) from calibre.ebooks.oeb.iterator.book import extract_book from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.cover import ( - find_cover_image, find_cover_image_in_page, find_cover_page + find_cover_image, find_cover_image_in_page, find_cover_page, ) from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc from calibre.ebooks.oeb.polish.utils import guess_type @@ -35,15 +35,13 @@ from calibre.utils.date import EPOCH from calibre.utils.filenames import rmtree from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.logging import default_log -from calibre.utils.serialize import ( - json_dumps, json_loads, msgpack_dumps, msgpack_loads -) +from calibre.utils.serialize import json_dumps, json_loads, msgpack_dumps, msgpack_loads from calibre.utils.short_uuid import uuid4 from calibre_extensions import speedup from calibre_extensions.fast_css_transform import transform_properties from polyglot.binary import ( as_base64_unicode as encode_component, from_base64_bytes, - from_base64_unicode as decode_component + from_base64_unicode as decode_component, ) from polyglot.builtins import as_bytes, iteritems from polyglot.urllib import quote, urlparse @@ -324,6 +322,96 @@ def transform_svg_image(container, name, link_uid, virtualize_resources, virtual container.commit_item(name) +def parse_smil_time(x): + # https://www.w3.org/TR/SMIL3/smil-timing.html#q22 + parts = x.split(':') + seconds = 0 + if len(parts) == 3: + hours, minutes, seconds = int(parts[0]), int(parts[1]), float(parts[2]) + seconds = abs(hours) * 3600 + max(0, min(abs(minutes), 59)) * 60 + max(0, min(abs(seconds), 59)) + elif len(parts) == 2: + minutes, seconds = int(parts[0]), float(parts[1]) + seconds = max(0, min(abs(minutes), 59)) * 60 + max(0, min(abs(seconds), 59)) + elif len(parts) == 1: + if x.endswith('s'): + seconds = float(x[:-1]) + elif x.endswith('ms'): + seconds = float(x[:-2]) * 0.001 + elif x.endswith('min'): + seconds = float(x[:-3]) * 60 + elif x.endswith('h'): + seconds = float(x[:-1]) * 3600 + else: + raise ValueError(f'Malformed SMIL time: {x}') + else: + raise ValueError(f'Malformed SMIL time: {x}') + return seconds + + + +def transform_smil(container, name, link_uid, virtualize_resources, virtualized_names, smil_map): + root = container.parsed(name) + text_tag, audio_tag = SMIL('text'), SMIL('audio') + body_tag, seq_tag, par_tag = SMIL('body'), SMIL('seq'), SMIL('par') + type_attr, textref_attr = EPUB('type'), EPUB('textref') + + def make_par(par): + ans = {} + t = par.get(type_attr) + if t: + ans['type'] = t + for child in par.iterchildren('*'): + if child.tag == text_tag: + src = child.get('src') + if src: + ans['text'] = [container.href_to_name(child.get('src'), name), src.partition('#')[2]] + elif child.tag == audio_tag: + src = child.get('src') + if src: + ans['audio'] = container.href_to_name(src, name) + b, e = child.get('clipBegin'), child.get('clipEnd') + if b: + ans['start'] = parse_smil_time(b) + if e: + ans['end'] = parse_smil_time(e) + return ans + + def process_seq(seq_xml_element, tref, parent_seq=None): + target = container.href_to_name(tref, name) + seq = {'textref': [target, tref.partition('#')[2]], 'par': [], 'seq': []} + t = seq_xml_element.get(type_attr) + if t: + seq['type'] = t + if parent_seq is None: + parent_seq = smil_map.get(target) + if parent_seq is None: + smil_map[target] = parent_seq = {'textref': [target, ''], 'par':[], 'seq':[], 'type': 'root'} + parent_seq['seq'].append(seq) + for child in seq_xml_element.iterchildren('*'): + if child.tag == par_tag: + seq['par'].append(make_par(child)) + elif child.tag == seq_tag: + tref = child.get(textref_attr) + if tref: + process_seq(child, tref, seq) + if not seq['par']: + del seq['par'] + if not seq['seq']: + del seq['seq'] + + for child in root.iterchildren('*'): + if child.tag == body_tag: + tref = child.get(textref_attr) + if tref: + process_seq(child, tref) + else: + for gc in child.iterchildren('*'): + if gc.tag == seq_tag: + tref = gc.get(textref_attr) + if tref: + process_seq(gc, tref) + + def transform_inline_styles(container, name, transform_sheet, transform_style): root = container.parsed(name) changed = False @@ -553,6 +641,7 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link container.cloned = False link_to_map = {} html_data = {} + smil_map = {'__smil_file_names__': []} virtualized_names = set() for name in names: if name is None: @@ -570,7 +659,10 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names) elif mt == 'image/svg+xml': transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names) - return link_to_map, html_data, virtualized_names + elif mt in ('application/smil', 'application/smil+xml'): + smil_map['__smil_file_names__'].append(name) + transform_smil(container, name, link_uid, virtualize_resources, virtualized_names, smil_map) + return link_to_map, html_data, virtualized_names, smil_map def process_exploded_book( @@ -583,7 +675,7 @@ def process_exploded_book( is_comic = bool(getattr(input_plugin, 'is_image_collection', False)) def needs_work(mt): - return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml' + return mt in OEB_STYLES or mt in OEB_DOCS or mt in ('image/svg+xml', 'application/smil', 'application/smil+xml') def work_priority(name): # ensure workers with large files or stylesheets @@ -666,14 +758,27 @@ def process_exploded_book( else: dest[k] = v - for link_to_map, hdata, vnames in results: + final_smil_map = {} + + def merge_smil_map(smil_map): + for n in smil_map.pop('__smil_file_names__'): + excluded_names.add(n) + for n, d in smil_map.items(): + if d: + # This assumes all smil data for a spine item is in a single + # smil file, which is required per the spec + final_smil_map[n] = d + + for link_to_map, hdata, vnames, smil_map in results: html_data.update(hdata) virtualized_names |= vnames + merge_smil_map(smil_map) for k, v in iteritems(link_to_map): if k in ltm: merge_ltm(ltm[k], v) else: ltm[k] = v + book_render_data['has_smil'] = bool(final_smil_map) def manifest_data(name): mt = (container.mime_map.get(name) or 'application/octet-stream').lower() @@ -693,6 +798,9 @@ def process_exploded_book( if hm: book_render_data['has_maths'] = True ans['anchor_map'] = data['anchor_map'] + smil_map = final_smil_map.get(name) + if smil_map: + ans['smil_map'] = smil_map return ans book_render_data['files'] = {name:manifest_data(name) for name in set(container.name_path_map) - excluded_names}