Fix an encoding error in html2epub and make splitting code more robust

This commit is contained in:
Kovid Goyal 2008-12-11 17:44:18 -08:00
parent 371c1bee5b
commit f976ad63be
2 changed files with 18 additions and 7 deletions

View File

@ -48,8 +48,7 @@ from calibre.ebooks.epub import initialize_container, PROFILES
from calibre.ebooks.epub.split import split from calibre.ebooks.epub.split import split
from calibre.ebooks.epub.fonts import Rationalizer from calibre.ebooks.epub.fonts import Rationalizer
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre import walk from calibre import walk, CurrentDir, to_unicode
from calibre import CurrentDir
content = functools.partial(os.path.join, u'content') content = functools.partial(os.path.join, u'content')
@ -79,7 +78,7 @@ def check(opf_path, pretty_print):
base = os.path.dirname(path) base = os.path.dirname(path)
root = html.fromstring(open(content(path), 'rb').read()) root = html.fromstring(open(content(path), 'rb').read())
for element, attribute, link, pos in list(root.iterlinks()): for element, attribute, link, pos in list(root.iterlinks()):
link = link.decode('utf-8') link = to_unicode(link)
plink = Link(link, base) plink = Link(link, base)
bad = False bad = False
if plink.path is not None and not os.path.exists(plink.path): if plink.path is not None and not os.path.exists(plink.path):

View File

@ -138,7 +138,9 @@ class Splitter(LoggingInterface):
for t in self.do_split(tree, split_point, before): for t in self.do_split(tree, split_point, before):
r = t.getroot() r = t.getroot()
size = len(tostring(r)) if self.is_page_empty(r):
continue
size = len(tostring(r))
if size <= self.opts.profile.flow_size: if size <= self.opts.profile.flow_size:
self.trees.append(t) self.trees.append(t)
#print tostring(t.getroot(), pretty_print=True) #print tostring(t.getroot(), pretty_print=True)
@ -384,6 +386,9 @@ class Splitter(LoggingInterface):
frag = None frag = None
if len(href) > 1: if len(href) > 1:
frag = href[1] frag = href[1]
if frag not in self.anchor_map:
self.log_warning('\t\tUnable to re-map OPF link', href)
continue
new_file = self.anchor_map[frag] new_file = self.anchor_map[frag]
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag))) ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
@ -410,7 +415,11 @@ def fix_content_links(html_files, changes, opts):
anchor = href[1] if len(href) > 1 else None anchor = href[1] if len(href) > 1 else None
href = href[0] href = href[0]
if href in split_files: if href in split_files:
newf = anchor_maps[split_files.index(href)][anchor] try:
newf = anchor_maps[split_files.index(href)][anchor]
except:
print '\t\tUnable to remap HTML link:', href, anchor
continue
frag = ('#'+anchor) if anchor else '' frag = ('#'+anchor) if anchor else ''
a.set('href', newf+frag) a.set('href', newf+frag)
changed = True changed = True
@ -431,7 +440,10 @@ def fix_ncx(path, changes):
anchor = href[1] if len(href) > 1 else None anchor = href[1] if len(href) > 1 else None
href = href[0].split('/')[-1] href = href[0].split('/')[-1]
if href in split_files: if href in split_files:
newf = anchor_maps[split_files.index(href)][anchor] try:
newf = anchor_maps[split_files.index(href)][anchor]
except:
print 'Unable to remap NCX link:', href, anchor
frag = ('#'+anchor) if anchor else '' frag = ('#'+anchor) if anchor else ''
content.set('src', 'content/'+newf+frag) content.set('src', 'content/'+newf+frag)
changed = True changed = True
@ -470,4 +482,4 @@ def split(pathtoopf, opts, stylesheet_map):
fix_ncx(item.get('href'), changes) fix_ncx(item.get('href'), changes)
break break
open(pathtoopf, 'wb').write(opf.render()) open(pathtoopf, 'wb').write(opf.render())