diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index 61abf7acb4..b205dac345 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -17,7 +17,7 @@ from lxml.html import soupparser
from lxml.etree import XPath
get_text = XPath("//text()")
-from calibre import LoggingInterface, unicode_path
+from calibre import LoggingInterface, unicode_path, entity_to_unicode
from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS
from calibre.utils.config import Config, StringConfig
from calibre.ebooks.metadata import MetaInformation
@@ -250,7 +250,7 @@ def opf_traverse(opf_reader, verbose=0, encoding=None):
class PreProcessor(object):
- PREPROCESS = []
+ PREPROCESS = [(re.compile(r'&(\S+?);'), entity_to_unicode)]
# Fix pdftohtml markup
PDFTOHTML = [