mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #213
This commit is contained in:
parent
5a21453397
commit
ffd3ac2b21
@ -196,10 +196,18 @@ class HTMLConverter(object):
|
||||
raw = open(self.file_name, 'rb').read()
|
||||
if self.pdftohtml:
|
||||
nmassage.extend(HTMLConverter.PDFTOHTML)
|
||||
#raw = unicode(raw, 'utf8', 'replace')
|
||||
try:
|
||||
soup = BeautifulSoup(raw,
|
||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||
markupMassage=nmassage)
|
||||
except ConversionError, err:
|
||||
if 'Failed to coerce to unicode' in str(err):
|
||||
raw = unicode(raw, 'utf8', 'replace')
|
||||
soup = BeautifulSoup(raw,
|
||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||
markupMassage=nmassage)
|
||||
|
||||
if not self.baen and self.is_baen(soup):
|
||||
self.baen = True
|
||||
self.logger.info('Baen file detected. Re-parsing...')
|
||||
|
Loading…
x
Reference in New Issue
Block a user