diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index dd52388763..66a3cebbae 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -48,8 +48,7 @@ from calibre.ebooks.epub import initialize_container, PROFILES
from calibre.ebooks.epub.split import split
from calibre.ebooks.epub.fonts import Rationalizer
from calibre.constants import preferred_encoding
-from calibre import walk
-from calibre import CurrentDir
+from calibre import walk, CurrentDir, to_unicode
content = functools.partial(os.path.join, u'content')
@@ -79,7 +78,7 @@ def check(opf_path, pretty_print):
base = os.path.dirname(path)
root = html.fromstring(open(content(path), 'rb').read())
for element, attribute, link, pos in list(root.iterlinks()):
- link = link.decode('utf-8')
+ link = to_unicode(link)
plink = Link(link, base)
bad = False
if plink.path is not None and not os.path.exists(plink.path):
diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py
index 76cf6b427b..ed2a78826f 100644
--- a/src/calibre/ebooks/epub/split.py
+++ b/src/calibre/ebooks/epub/split.py
@@ -138,7 +138,9 @@ class Splitter(LoggingInterface):
for t in self.do_split(tree, split_point, before):
r = t.getroot()
- size = len(tostring(r))
+ if self.is_page_empty(r):
+ continue
+ size = len(tostring(r))
if size <= self.opts.profile.flow_size:
self.trees.append(t)
#print tostring(t.getroot(), pretty_print=True)
@@ -384,6 +386,9 @@ class Splitter(LoggingInterface):
frag = None
if len(href) > 1:
frag = href[1]
+ if frag not in self.anchor_map:
+ self.log_warning('\t\tUnable to re-map OPF link', href)
+ continue
new_file = self.anchor_map[frag]
ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
@@ -410,7 +415,11 @@ def fix_content_links(html_files, changes, opts):
anchor = href[1] if len(href) > 1 else None
href = href[0]
if href in split_files:
- newf = anchor_maps[split_files.index(href)][anchor]
+ try:
+ newf = anchor_maps[split_files.index(href)][anchor]
+ except:
+ print '\t\tUnable to remap HTML link:', href, anchor
+ continue
frag = ('#'+anchor) if anchor else ''
a.set('href', newf+frag)
changed = True
@@ -431,7 +440,10 @@ def fix_ncx(path, changes):
anchor = href[1] if len(href) > 1 else None
href = href[0].split('/')[-1]
if href in split_files:
- newf = anchor_maps[split_files.index(href)][anchor]
+ try:
+ newf = anchor_maps[split_files.index(href)][anchor]
+ except:
+ print 'Unable to remap NCX link:', href, anchor
frag = ('#'+anchor) if anchor else ''
content.set('src', 'content/'+newf+frag)
changed = True
@@ -470,4 +482,4 @@ def split(pathtoopf, opts, stylesheet_map):
fix_ncx(item.get('href'), changes)
break
- open(pathtoopf, 'wb').write(opf.render())
\ No newline at end of file
+ open(pathtoopf, 'wb').write(opf.render())