mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix an encoding error in html2epub and make splitting code more robust
This commit is contained in:
parent
371c1bee5b
commit
f976ad63be
@ -48,8 +48,7 @@ from calibre.ebooks.epub import initialize_container, PROFILES
|
|||||||
from calibre.ebooks.epub.split import split
|
from calibre.ebooks.epub.split import split
|
||||||
from calibre.ebooks.epub.fonts import Rationalizer
|
from calibre.ebooks.epub.fonts import Rationalizer
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre import walk
|
from calibre import walk, CurrentDir, to_unicode
|
||||||
from calibre import CurrentDir
|
|
||||||
|
|
||||||
content = functools.partial(os.path.join, u'content')
|
content = functools.partial(os.path.join, u'content')
|
||||||
|
|
||||||
@ -79,7 +78,7 @@ def check(opf_path, pretty_print):
|
|||||||
base = os.path.dirname(path)
|
base = os.path.dirname(path)
|
||||||
root = html.fromstring(open(content(path), 'rb').read())
|
root = html.fromstring(open(content(path), 'rb').read())
|
||||||
for element, attribute, link, pos in list(root.iterlinks()):
|
for element, attribute, link, pos in list(root.iterlinks()):
|
||||||
link = link.decode('utf-8')
|
link = to_unicode(link)
|
||||||
plink = Link(link, base)
|
plink = Link(link, base)
|
||||||
bad = False
|
bad = False
|
||||||
if plink.path is not None and not os.path.exists(plink.path):
|
if plink.path is not None and not os.path.exists(plink.path):
|
||||||
|
@ -138,7 +138,9 @@ class Splitter(LoggingInterface):
|
|||||||
|
|
||||||
for t in self.do_split(tree, split_point, before):
|
for t in self.do_split(tree, split_point, before):
|
||||||
r = t.getroot()
|
r = t.getroot()
|
||||||
size = len(tostring(r))
|
if self.is_page_empty(r):
|
||||||
|
continue
|
||||||
|
size = len(tostring(r))
|
||||||
if size <= self.opts.profile.flow_size:
|
if size <= self.opts.profile.flow_size:
|
||||||
self.trees.append(t)
|
self.trees.append(t)
|
||||||
#print tostring(t.getroot(), pretty_print=True)
|
#print tostring(t.getroot(), pretty_print=True)
|
||||||
@ -384,6 +386,9 @@ class Splitter(LoggingInterface):
|
|||||||
frag = None
|
frag = None
|
||||||
if len(href) > 1:
|
if len(href) > 1:
|
||||||
frag = href[1]
|
frag = href[1]
|
||||||
|
if frag not in self.anchor_map:
|
||||||
|
self.log_warning('\t\tUnable to re-map OPF link', href)
|
||||||
|
continue
|
||||||
new_file = self.anchor_map[frag]
|
new_file = self.anchor_map[frag]
|
||||||
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
|
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
|
||||||
|
|
||||||
@ -410,7 +415,11 @@ def fix_content_links(html_files, changes, opts):
|
|||||||
anchor = href[1] if len(href) > 1 else None
|
anchor = href[1] if len(href) > 1 else None
|
||||||
href = href[0]
|
href = href[0]
|
||||||
if href in split_files:
|
if href in split_files:
|
||||||
newf = anchor_maps[split_files.index(href)][anchor]
|
try:
|
||||||
|
newf = anchor_maps[split_files.index(href)][anchor]
|
||||||
|
except:
|
||||||
|
print '\t\tUnable to remap HTML link:', href, anchor
|
||||||
|
continue
|
||||||
frag = ('#'+anchor) if anchor else ''
|
frag = ('#'+anchor) if anchor else ''
|
||||||
a.set('href', newf+frag)
|
a.set('href', newf+frag)
|
||||||
changed = True
|
changed = True
|
||||||
@ -431,7 +440,10 @@ def fix_ncx(path, changes):
|
|||||||
anchor = href[1] if len(href) > 1 else None
|
anchor = href[1] if len(href) > 1 else None
|
||||||
href = href[0].split('/')[-1]
|
href = href[0].split('/')[-1]
|
||||||
if href in split_files:
|
if href in split_files:
|
||||||
newf = anchor_maps[split_files.index(href)][anchor]
|
try:
|
||||||
|
newf = anchor_maps[split_files.index(href)][anchor]
|
||||||
|
except:
|
||||||
|
print 'Unable to remap NCX link:', href, anchor
|
||||||
frag = ('#'+anchor) if anchor else ''
|
frag = ('#'+anchor) if anchor else ''
|
||||||
content.set('src', 'content/'+newf+frag)
|
content.set('src', 'content/'+newf+frag)
|
||||||
changed = True
|
changed = True
|
||||||
@ -470,4 +482,4 @@ def split(pathtoopf, opts, stylesheet_map):
|
|||||||
fix_ncx(item.get('href'), changes)
|
fix_ncx(item.get('href'), changes)
|
||||||
break
|
break
|
||||||
|
|
||||||
open(pathtoopf, 'wb').write(opf.render())
|
open(pathtoopf, 'wb').write(opf.render())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user