Fix #1792 (non-standard line-endings for "\n" in .epubs)

This commit is contained in:
Kovid Goyal 2009-02-07 13:53:40 -08:00
parent 1145b768dc
commit 4df669b37c

View File

@ -467,7 +467,7 @@ class Parser(PreProcessor, LoggingInterface):
if self.htmlfile.is_binary:
raise ValueError('Not a valid HTML file: '+self.htmlfile.path)
src = open(self.htmlfile.path, 'rb').read().decode(self.htmlfile.encoding, 'replace').strip()
src = src.replace('\x00', '')
src = src.replace('\x00', '').replace('\r', ' ')
src = self.preprocess(src)
# lxml chokes on unicode input when it contains encoding declarations
for pat in ENCODING_PATS: