mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
...
This commit is contained in:
parent
5392f2e765
commit
27a855b477
@ -17,13 +17,26 @@ from lxml import etree
|
|||||||
from calibre import isbytestring, force_unicode
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.ebooks.mobi.utils import to_base
|
from calibre.ebooks.mobi.utils import to_base
|
||||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||||
extract, XHTML)
|
extract, XHTML, urlnormalize)
|
||||||
|
from calibre.ebooks.oeb.parse_utils import barename
|
||||||
|
|
||||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||||
|
|
||||||
# References to record numbers in KF8 are stored as base-32 encoded integers,
|
# References to record numbers in KF8 are stored as base-32 encoded integers,
|
||||||
# with 4 digits
|
# with 4 digits
|
||||||
to_ref = partial(to_base, base=32, min_num_digits=4)
|
to_ref = partial(to_base, base=32, min_num_digits=4)
|
||||||
|
# References in links are stored with 10 digits
|
||||||
|
to_href = partial(to_base, base=32, min_num_digits=10)
|
||||||
|
|
||||||
|
# Tags to which kindlegen adds the aid attribute
|
||||||
|
aid_able_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
|
||||||
|
'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details',
|
||||||
|
'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer',
|
||||||
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd',
|
||||||
|
'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p',
|
||||||
|
'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small',
|
||||||
|
'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
|
||||||
|
'video'}
|
||||||
|
|
||||||
class KF8Writer(object):
|
class KF8Writer(object):
|
||||||
|
|
||||||
@ -37,6 +50,8 @@ class KF8Writer(object):
|
|||||||
self.replace_resource_links()
|
self.replace_resource_links()
|
||||||
self.extract_css_into_flows()
|
self.extract_css_into_flows()
|
||||||
self.extract_svg_into_flows()
|
self.extract_svg_into_flows()
|
||||||
|
self.replace_internal_links_with_placeholders()
|
||||||
|
self.insert_aid_attributes()
|
||||||
|
|
||||||
def dup_data(self):
|
def dup_data(self):
|
||||||
''' Duplicate data so that any changes we make to markup/CSS only
|
''' Duplicate data so that any changes we make to markup/CSS only
|
||||||
@ -112,7 +127,6 @@ class KF8Writer(object):
|
|||||||
|
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
root = self.data(item)
|
root = self.data(item)
|
||||||
if not hasattr(root, 'xpath'): continue
|
|
||||||
|
|
||||||
for link in XPath('//h:link[@href]')(root):
|
for link in XPath('//h:link[@href]')(root):
|
||||||
href = item.abshref(link.get('href'))
|
href = item.abshref(link.get('href'))
|
||||||
@ -143,7 +157,6 @@ class KF8Writer(object):
|
|||||||
def extract_svg_into_flows(self):
|
def extract_svg_into_flows(self):
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
root = self.data(item)
|
root = self.data(item)
|
||||||
if not hasattr(root, 'xpath'): continue
|
|
||||||
|
|
||||||
for svg in XPath('//svg:svg')(root):
|
for svg in XPath('//svg:svg')(root):
|
||||||
raw = etree.tostring(svg, encoding=unicode, with_tail=False)
|
raw = etree.tostring(svg, encoding=unicode, with_tail=False)
|
||||||
@ -156,4 +169,38 @@ class KF8Writer(object):
|
|||||||
p.insert(pos, img)
|
p.insert(pos, img)
|
||||||
extract(svg)
|
extract(svg)
|
||||||
|
|
||||||
|
def replace_internal_links_with_placeholders(self):
|
||||||
|
self.link_map = {}
|
||||||
|
count = 0
|
||||||
|
hrefs = {item.href for item in self.oeb.spine}
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
root = self.data(item)
|
||||||
|
|
||||||
|
for a in XPath('//h:a[@href]')(root):
|
||||||
|
count += 1
|
||||||
|
ref = item.abshref(a.get('href'))
|
||||||
|
href, _, frag = ref.partition('#')
|
||||||
|
href = urlnormalize(href)
|
||||||
|
if href in hrefs:
|
||||||
|
placeholder = 'kindle:pos:fid:0000:off:%s'%to_href(count)
|
||||||
|
self.link_map[placeholder] = (href, frag)
|
||||||
|
a.set('href', placeholder)
|
||||||
|
|
||||||
|
def insert_aid_attributes(self):
|
||||||
|
self.id_map = {}
|
||||||
|
for i, item in enumerate(self.oeb.spine):
|
||||||
|
root = self.data(item)
|
||||||
|
aidbase = i * int(1e6)
|
||||||
|
j = 0
|
||||||
|
for tag in root.iterdescendants(etree.Element):
|
||||||
|
id_ = tag.attrib.get('id', None)
|
||||||
|
if id_ is not None or barename(tag.tag).lower() in aid_able_tags:
|
||||||
|
aid = aidbase + j
|
||||||
|
tag.attrib['aid'] = to_base(aid, base=32)
|
||||||
|
if tag.tag == XHTML('body'):
|
||||||
|
self.id_map[(item.href, '')] = tag.attrib['aid']
|
||||||
|
if id_ is not None:
|
||||||
|
self.id_map[(item.href, id_)] = tag.attrib['aid']
|
||||||
|
|
||||||
|
j += 1
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user