diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 2babb9182b..97a3842f1b 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -7,6 +7,7 @@ import sys, os, re, logging, time, mimetypes, \
 __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
 from htmlentitydefs import name2codepoint
 from math import floor
+from functools import partial
 
 warnings.simplefilter('ignore', DeprecationWarning)
 
@@ -446,6 +447,12 @@ def entity_to_unicode(match, exceptions=[], encoding='cp1252',
         return '&'+ent+';'
 
 _ent_pat = re.compile(r'&(\S+?);')
+xml_entity_to_unicode = partial(entity_to_unicode, result_exceptions = {
+    '"' : '&quot;',
+    "'" : '&apos;',
+    '<' : '&lt;',
+    '>' : '&gt;',
+    '&' : '&amp;'})
 
 def prepare_string_for_xml(raw, attribute=False):
     raw = _ent_pat.sub(entity_to_unicode, raw)
diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py
index 25341b120a..3afa6ce1f4 100644
--- a/src/calibre/ebooks/chardet/__init__.py
+++ b/src/calibre/ebooks/chardet/__init__.py
@@ -43,11 +43,8 @@ def strip_encoding_declarations(raw):
     return raw
 
 def substitute_entites(raw):
-    from calibre import entity_to_unicode
-    from functools import partial
-    f = partial(entity_to_unicode, exceptions=
-                ['amp', 'apos', 'quot', 'lt', 'gt'])
-    return ENTITY_PATTERN.sub(f, raw)
+    from calibre import xml_entity_to_unicode
+    return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw)
 
 _CHARSET_ALIASES = { "macintosh" : "mac-roman",
                         "x-sjis" : "shift-jis" }
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index 1b266740d7..15e6391812 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''
 
-import functools, shutil, os, re, struct, textwrap, cStringIO, sys
+import shutil, os, re, struct, textwrap, cStringIO, sys
 
 try:
     from PIL import Image as PILImage
@@ -14,7 +14,7 @@ except ImportError:
 
 from lxml import html, etree
 
-from calibre import entity_to_unicode, CurrentDir
+from calibre import xml_entity_to_unicode, CurrentDir, entity_to_unicode
 from calibre.utils.filenames import ascii_filename
 from calibre.utils.date import parse_date
 from calibre.ptempfile import TemporaryDirectory
@@ -302,14 +302,7 @@ class MobiReader(object):
 
         for pat in ENCODING_PATS:
             self.processed_html = pat.sub('', self.processed_html)
-        e2u = functools.partial(entity_to_unicode,
-            result_exceptions={
-                '<' : u'&lt;',
-                '>' : u'&gt;',
-                '&' : u'&amp;',
-                '"' : u'&quot;',
-                "'" : u'&apos;'})
-        self.processed_html = re.sub(r'&(\S+?);', e2u,
+        self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
             self.processed_html)
         self.extract_images(processed_records, output_dir)
         self.replace_page_breaks()
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index fc0a832528..f770622952 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -771,18 +771,12 @@ class Manifest(object):
                 % (self.id, self.href, self.media_type)
 
         def _parse_xml(self, data):
-            data = xml_to_unicode(data, strip_encoding_pats=True)[0]
+            data = xml_to_unicode(data, strip_encoding_pats=True,
+                    assume_utf8=True, resolve_entities=True)[0]
             if not data:
                 return None
             parser = etree.XMLParser(recover=True)
-            try:
-                return etree.fromstring(data, parser=parser)
-            except etree.XMLSyntaxError, err:
-                if getattr(err, 'code', 0) == 26 or str(err).startswith('Entity'):
-                    data = xml_to_unicode(data, strip_encoding_pats=True,
-                            resolve_entities=True)[0]
-                    return etree.fromstring(data)
-                raise
+            return etree.fromstring(data, parser=parser)
 
         def _parse_xhtml(self, data):
             self.oeb.log.debug('Parsing', self.href, '...')
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index d8ba3e5b77..d62c6353ea 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -115,7 +115,7 @@ class Split(object):
         for i, x in enumerate(page_breaks):
             x.set('id', x.get('id', 'calibre_pb_%d'%i))
             id = x.get('id')
-            page_breaks_.append((XPath('//*[@id="%s"]'%id),
+            page_breaks_.append((XPath('//*[@id=%r]'%id),
                 x.get('pb_before', False)))
             page_break_ids.append(id)