diff --git a/src/calibre/ebooks/oeb/iterator/book.py b/src/calibre/ebooks/oeb/iterator/book.py
index 28dd37a88e..4ebd543aab 100644
--- a/src/calibre/ebooks/oeb/iterator/book.py
+++ b/src/calibre/ebooks/oeb/iterator/book.py
@@ -125,7 +125,7 @@ class EbookIterator(BookmarksMixin):
                   [i for i in self.opf.spine if not i.is_linear]
         self.spine = []
         Spiny = partial(SpineItem, read_anchor_map=read_anchor_map,
-                run_char_count=run_char_count)
+                run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
         is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
         for i in ordered:
             spath = i.path
diff --git a/src/calibre/ebooks/oeb/iterator/spine.py b/src/calibre/ebooks/oeb/iterator/spine.py
index 7b404d4f74..86ab7bcf78 100644
--- a/src/calibre/ebooks/oeb/iterator/spine.py
+++ b/src/calibre/ebooks/oeb/iterator/spine.py
@@ -36,14 +36,30 @@ def anchor_map(html):
 class SpineItem(unicode):
 
     def __new__(cls, path, mime_type=None, read_anchor_map=True,
-            run_char_count=True):
+            run_char_count=True, from_epub=False):
         ppath = path.partition('#')[0]
         if not os.path.exists(path) and os.path.exists(ppath):
             path = ppath
         obj = super(SpineItem, cls).__new__(cls, path)
         with open(path, 'rb') as f:
             raw = f.read()
-        raw, obj.encoding = xml_to_unicode(raw)
+        if from_epub:
+            # According to the spec, HTML in EPUB must be encoded in utf-8 or
+            # utf-16. Furthermore, there exist epub files produced by the usual
+            # incompetents that have utf-8 encoded HTML files that contain
+            # incorrect encoding declarations. See
+            # http://www.idpf.org/epub/20/spec/OPS_2.0.1_draft.htm#Section1.4.1.2
+            # http://www.idpf.org/epub/30/spec/epub30-publications.html#confreq-xml-enc
+            # https://bugs.launchpad.net/bugs/1188843
+            # So we first decode with utf-8 and only if that fails we try xml_to_unicode. This
+            # is the same algorithm as that used by the conversion pipeline (modulo
+            # some BOM based detection). Sigh.
+            try:
+                raw, obj.encoding = raw.decode('utf-8'), 'utf-8'
+            except UnicodeDecodeError:
+                raw, obj.encoding = xml_to_unicode(raw)
+        else:
+            raw, obj.encoding = xml_to_unicode(raw)
         obj.character_count = character_count(raw) if run_char_count else 10000
         obj.anchor_map = anchor_map(raw) if read_anchor_map else {}
         obj.start_page = -1
@@ -100,22 +116,24 @@ class IndexEntry(object):
             self.end_anchor = None
 
 def create_indexing_data(spine, toc):
-    if not toc: return
+    if not toc:
+        return
     f = partial(IndexEntry, spine)
     index_entries = list(map(f,
         (t for t in toc.flat() if t is not toc),
         (i-1 for i, t in enumerate(toc.flat()) if t is not toc)
         ))
     index_entries.sort(key=attrgetter('sort_key'))
-    [ i.find_end(index_entries) for i in index_entries ]
+    [i.find_end(index_entries) for i in index_entries]
 
     ie = namedtuple('IndexEntry', 'entry start_anchor end_anchor')
 
     for spine_pos, spine_item in enumerate(spine):
         for i in index_entries:
             if i.end_spine_pos < spine_pos or i.spine_pos > spine_pos:
-                continue # Does not touch this file
+                continue  # Does not touch this file
             start = i.anchor if i.spine_pos == spine_pos else None
             end = i.end_anchor if i.spine_pos == spine_pos else None
             spine_item.index_entries.append(ie(i, start, end))
 
+