diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index dd52388763..66a3cebbae 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -48,8 +48,7 @@ from calibre.ebooks.epub import initialize_container, PROFILES from calibre.ebooks.epub.split import split from calibre.ebooks.epub.fonts import Rationalizer from calibre.constants import preferred_encoding -from calibre import walk -from calibre import CurrentDir +from calibre import walk, CurrentDir, to_unicode content = functools.partial(os.path.join, u'content') @@ -79,7 +78,7 @@ def check(opf_path, pretty_print): base = os.path.dirname(path) root = html.fromstring(open(content(path), 'rb').read()) for element, attribute, link, pos in list(root.iterlinks()): - link = link.decode('utf-8') + link = to_unicode(link) plink = Link(link, base) bad = False if plink.path is not None and not os.path.exists(plink.path): diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py index 76cf6b427b..ed2a78826f 100644 --- a/src/calibre/ebooks/epub/split.py +++ b/src/calibre/ebooks/epub/split.py @@ -138,7 +138,9 @@ class Splitter(LoggingInterface): for t in self.do_split(tree, split_point, before): r = t.getroot() - size = len(tostring(r)) + if self.is_page_empty(r): + continue + size = len(tostring(r)) if size <= self.opts.profile.flow_size: self.trees.append(t) #print tostring(t.getroot(), pretty_print=True) @@ -384,6 +386,9 @@ class Splitter(LoggingInterface): frag = None if len(href) > 1: frag = href[1] + if frag not in self.anchor_map: + self.log_warning('\t\tUnable to re-map OPF link', href) + continue new_file = self.anchor_map[frag] ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag))) @@ -410,7 +415,11 @@ def fix_content_links(html_files, changes, opts): anchor = href[1] if len(href) > 1 else None href = href[0] if href in split_files: - newf = anchor_maps[split_files.index(href)][anchor] + try: + newf = anchor_maps[split_files.index(href)][anchor] + except: + print '\t\tUnable to remap HTML link:', href, anchor + continue frag = ('#'+anchor) if anchor else '' a.set('href', newf+frag) changed = True @@ -431,7 +440,10 @@ def fix_ncx(path, changes): anchor = href[1] if len(href) > 1 else None href = href[0].split('/')[-1] if href in split_files: - newf = anchor_maps[split_files.index(href)][anchor] + try: + newf = anchor_maps[split_files.index(href)][anchor] + except: + print 'Unable to remap NCX link:', href, anchor frag = ('#'+anchor) if anchor else '' content.set('src', 'content/'+newf+frag) changed = True @@ -470,4 +482,4 @@ def split(pathtoopf, opts, stylesheet_map): fix_ncx(item.get('href'), changes) break - open(pathtoopf, 'wb').write(opf.render()) \ No newline at end of file + open(pathtoopf, 'wb').write(opf.render())