diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index ea968edd53..f13f5ee2f5 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -436,6 +436,7 @@ class Parser(PreProcessor, LoggingInterface):
''' Create lxml ElementTree from HTML '''
self.log_info('\tParsing '+os.sep.join(self.htmlfile.path.split(os.sep)[-3:]))
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
+ src = src[src.find('<'):]
src = self.preprocess(src)
# lxml chokes on unicode input when it contains encoding declarations
for pat in ENCODING_PATS: