mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #213
This commit is contained in:
parent
5a21453397
commit
ffd3ac2b21
@ -196,10 +196,18 @@ class HTMLConverter(object):
|
|||||||
raw = open(self.file_name, 'rb').read()
|
raw = open(self.file_name, 'rb').read()
|
||||||
if self.pdftohtml:
|
if self.pdftohtml:
|
||||||
nmassage.extend(HTMLConverter.PDFTOHTML)
|
nmassage.extend(HTMLConverter.PDFTOHTML)
|
||||||
raw = unicode(raw, 'utf8', 'replace')
|
#raw = unicode(raw, 'utf8', 'replace')
|
||||||
soup = BeautifulSoup(raw,
|
try:
|
||||||
|
soup = BeautifulSoup(raw,
|
||||||
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||||
markupMassage=nmassage)
|
markupMassage=nmassage)
|
||||||
|
except ConversionError, err:
|
||||||
|
if 'Failed to coerce to unicode' in str(err):
|
||||||
|
raw = unicode(raw, 'utf8', 'replace')
|
||||||
|
soup = BeautifulSoup(raw,
|
||||||
|
convertEntities=BeautifulSoup.HTML_ENTITIES,
|
||||||
|
markupMassage=nmassage)
|
||||||
|
|
||||||
if not self.baen and self.is_baen(soup):
|
if not self.baen and self.is_baen(soup):
|
||||||
self.baen = True
|
self.baen = True
|
||||||
self.logger.info('Baen file detected. Re-parsing...')
|
self.logger.info('Baen file detected. Re-parsing...')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user