Implement parsing of SMIL files for the viewer

This commit is contained in:
Kovid Goyal 2023-10-19 13:55:22 +05:30
parent d61ad01055
commit f02948d4f3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 134 additions and 17 deletions

View File

@ -172,8 +172,8 @@ application/simple-filter+xml
application/simple-message-summary application/simple-message-summary
application/simplesymbolcontainer application/simplesymbolcontainer
application/slate application/slate
application/smil smi smil # application/smil obsoleted by smil+xml
application/smil+xml smi smil application/smil+xml smil smi sml
application/soap+fastinfoset application/soap+fastinfoset
application/soap+xml application/soap+xml
application/sparql-query rq application/sparql-query rq

View File

@ -54,13 +54,14 @@ RE_NS = 'http://exslt.org/regular-expressions'
MBP_NS = 'http://www.mobipocket.com' MBP_NS = 'http://www.mobipocket.com'
EPUB_NS = 'http://www.idpf.org/2007/ops' EPUB_NS = 'http://www.idpf.org/2007/ops'
MATHML_NS = 'http://www.w3.org/1998/Math/MathML' MATHML_NS = 'http://www.w3.org/1998/Math/MathML'
SMIL_NS = 'http://www.w3.org/ns/SMIL'
XPNSMAP = { XPNSMAP = {
'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS, 'd09': DC09_NS, 'h': XHTML_NS, 'o1': OPF1_NS, 'o2': OPF2_NS, 'd09': DC09_NS,
'd10': DC10_NS, 'd11': DC11_NS, 'xsi': XSI_NS, 'dt': DCTERMS_NS, 'd10': DC10_NS, 'd11': DC11_NS, 'xsi': XSI_NS, 'dt': DCTERMS_NS,
'ncx': NCX_NS, 'svg': SVG_NS, 'xl': XLINK_NS, 're': RE_NS, 'ncx': NCX_NS, 'svg': SVG_NS, 'xl': XLINK_NS, 're': RE_NS,
'mathml': MATHML_NS, 'mbp': MBP_NS, 'calibre': CALIBRE_NS, 'mathml': MATHML_NS, 'mbp': MBP_NS, 'calibre': CALIBRE_NS,
'epub':EPUB_NS 'epub':EPUB_NS, 'smil': SMIL_NS,
} }
OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS} OPF1_NSMAP = {'dc': DC11_NS, 'oebpackage': OPF1_NS}
@ -100,6 +101,14 @@ def XLINK(name):
return f'{{{XLINK_NS}}}{name}' return f'{{{XLINK_NS}}}{name}'
def SMIL(name):
return f'{{{SMIL_NS}}}{name}'
def EPUB(name):
return f'{{{EPUB_NS}}}{name}'
def CALIBRE(name): def CALIBRE(name):
return f'{{{CALIBRE_NS}}}{name}' return f'{{{CALIBRE_NS}}}{name}'

View File

@ -18,13 +18,13 @@ from calibre import detect_ncpus, force_unicode, prepare_string_for_xml
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.customize.ui import plugin_for_input_format from calibre.customize.ui import plugin_for_input_format
from calibre.ebooks.oeb.base import ( from calibre.ebooks.oeb.base import (
OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, EPUB, OEB_DOCS, OEB_STYLES, OPF, SMIL, XHTML, XHTML_NS, XLINK, XPath as _XPath,
rewrite_links, urlunquote rewrite_links, urlunquote,
) )
from calibre.ebooks.oeb.iterator.book import extract_book from calibre.ebooks.oeb.iterator.book import extract_book
from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.cover import ( from calibre.ebooks.oeb.polish.cover import (
find_cover_image, find_cover_image_in_page, find_cover_page find_cover_image, find_cover_image_in_page, find_cover_page,
) )
from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ebooks.oeb.polish.utils import guess_type
@ -35,15 +35,13 @@ from calibre.utils.date import EPOCH
from calibre.utils.filenames import rmtree from calibre.utils.filenames import rmtree
from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.ipc.simple_worker import start_pipe_worker
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.serialize import ( from calibre.utils.serialize import json_dumps, json_loads, msgpack_dumps, msgpack_loads
json_dumps, json_loads, msgpack_dumps, msgpack_loads
)
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from calibre_extensions import speedup from calibre_extensions import speedup
from calibre_extensions.fast_css_transform import transform_properties from calibre_extensions.fast_css_transform import transform_properties
from polyglot.binary import ( from polyglot.binary import (
as_base64_unicode as encode_component, from_base64_bytes, as_base64_unicode as encode_component, from_base64_bytes,
from_base64_unicode as decode_component from_base64_unicode as decode_component,
) )
from polyglot.builtins import as_bytes, iteritems from polyglot.builtins import as_bytes, iteritems
from polyglot.urllib import quote, urlparse from polyglot.urllib import quote, urlparse
@ -324,6 +322,96 @@ def transform_svg_image(container, name, link_uid, virtualize_resources, virtual
container.commit_item(name) container.commit_item(name)
def parse_smil_time(x):
# https://www.w3.org/TR/SMIL3/smil-timing.html#q22
parts = x.split(':')
seconds = 0
if len(parts) == 3:
hours, minutes, seconds = int(parts[0]), int(parts[1]), float(parts[2])
seconds = abs(hours) * 3600 + max(0, min(abs(minutes), 59)) * 60 + max(0, min(abs(seconds), 59))
elif len(parts) == 2:
minutes, seconds = int(parts[0]), float(parts[1])
seconds = max(0, min(abs(minutes), 59)) * 60 + max(0, min(abs(seconds), 59))
elif len(parts) == 1:
if x.endswith('s'):
seconds = float(x[:-1])
elif x.endswith('ms'):
seconds = float(x[:-2]) * 0.001
elif x.endswith('min'):
seconds = float(x[:-3]) * 60
elif x.endswith('h'):
seconds = float(x[:-1]) * 3600
else:
raise ValueError(f'Malformed SMIL time: {x}')
else:
raise ValueError(f'Malformed SMIL time: {x}')
return seconds
def transform_smil(container, name, link_uid, virtualize_resources, virtualized_names, smil_map):
root = container.parsed(name)
text_tag, audio_tag = SMIL('text'), SMIL('audio')
body_tag, seq_tag, par_tag = SMIL('body'), SMIL('seq'), SMIL('par')
type_attr, textref_attr = EPUB('type'), EPUB('textref')
def make_par(par):
ans = {}
t = par.get(type_attr)
if t:
ans['type'] = t
for child in par.iterchildren('*'):
if child.tag == text_tag:
src = child.get('src')
if src:
ans['text'] = [container.href_to_name(child.get('src'), name), src.partition('#')[2]]
elif child.tag == audio_tag:
src = child.get('src')
if src:
ans['audio'] = container.href_to_name(src, name)
b, e = child.get('clipBegin'), child.get('clipEnd')
if b:
ans['start'] = parse_smil_time(b)
if e:
ans['end'] = parse_smil_time(e)
return ans
def process_seq(seq_xml_element, tref, parent_seq=None):
target = container.href_to_name(tref, name)
seq = {'textref': [target, tref.partition('#')[2]], 'par': [], 'seq': []}
t = seq_xml_element.get(type_attr)
if t:
seq['type'] = t
if parent_seq is None:
parent_seq = smil_map.get(target)
if parent_seq is None:
smil_map[target] = parent_seq = {'textref': [target, ''], 'par':[], 'seq':[], 'type': 'root'}
parent_seq['seq'].append(seq)
for child in seq_xml_element.iterchildren('*'):
if child.tag == par_tag:
seq['par'].append(make_par(child))
elif child.tag == seq_tag:
tref = child.get(textref_attr)
if tref:
process_seq(child, tref, seq)
if not seq['par']:
del seq['par']
if not seq['seq']:
del seq['seq']
for child in root.iterchildren('*'):
if child.tag == body_tag:
tref = child.get(textref_attr)
if tref:
process_seq(child, tref)
else:
for gc in child.iterchildren('*'):
if gc.tag == seq_tag:
tref = gc.get(textref_attr)
if tref:
process_seq(gc, tref)
def transform_inline_styles(container, name, transform_sheet, transform_style): def transform_inline_styles(container, name, transform_sheet, transform_style):
root = container.parsed(name) root = container.parsed(name)
changed = False changed = False
@ -553,6 +641,7 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link
container.cloned = False container.cloned = False
link_to_map = {} link_to_map = {}
html_data = {} html_data = {}
smil_map = {'__smil_file_names__': []}
virtualized_names = set() virtualized_names = set()
for name in names: for name in names:
if name is None: if name is None:
@ -570,7 +659,10 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link
transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names) transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names)
elif mt == 'image/svg+xml': elif mt == 'image/svg+xml':
transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names) transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names)
return link_to_map, html_data, virtualized_names elif mt in ('application/smil', 'application/smil+xml'):
smil_map['__smil_file_names__'].append(name)
transform_smil(container, name, link_uid, virtualize_resources, virtualized_names, smil_map)
return link_to_map, html_data, virtualized_names, smil_map
def process_exploded_book( def process_exploded_book(
@ -583,7 +675,7 @@ def process_exploded_book(
is_comic = bool(getattr(input_plugin, 'is_image_collection', False)) is_comic = bool(getattr(input_plugin, 'is_image_collection', False))
def needs_work(mt): def needs_work(mt):
return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml' return mt in OEB_STYLES or mt in OEB_DOCS or mt in ('image/svg+xml', 'application/smil', 'application/smil+xml')
def work_priority(name): def work_priority(name):
# ensure workers with large files or stylesheets # ensure workers with large files or stylesheets
@ -666,14 +758,27 @@ def process_exploded_book(
else: else:
dest[k] = v dest[k] = v
for link_to_map, hdata, vnames in results: final_smil_map = {}
def merge_smil_map(smil_map):
for n in smil_map.pop('__smil_file_names__'):
excluded_names.add(n)
for n, d in smil_map.items():
if d:
# This assumes all smil data for a spine item is in a single
# smil file, which is required per the spec
final_smil_map[n] = d
for link_to_map, hdata, vnames, smil_map in results:
html_data.update(hdata) html_data.update(hdata)
virtualized_names |= vnames virtualized_names |= vnames
merge_smil_map(smil_map)
for k, v in iteritems(link_to_map): for k, v in iteritems(link_to_map):
if k in ltm: if k in ltm:
merge_ltm(ltm[k], v) merge_ltm(ltm[k], v)
else: else:
ltm[k] = v ltm[k] = v
book_render_data['has_smil'] = bool(final_smil_map)
def manifest_data(name): def manifest_data(name):
mt = (container.mime_map.get(name) or 'application/octet-stream').lower() mt = (container.mime_map.get(name) or 'application/octet-stream').lower()
@ -693,6 +798,9 @@ def process_exploded_book(
if hm: if hm:
book_render_data['has_maths'] = True book_render_data['has_maths'] = True
ans['anchor_map'] = data['anchor_map'] ans['anchor_map'] = data['anchor_map']
smil_map = final_smil_map.get(name)
if smil_map:
ans['smil_map'] = smil_map
return ans return ans
book_render_data['files'] = {name:manifest_data(name) for name in set(container.name_path_map) - excluded_names} book_render_data['files'] = {name:manifest_data(name) for name in set(container.name_path_map) - excluded_names}