mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #5132 (CHM to EPUB for Nook fails)
This commit is contained in:
parent
f871846e18
commit
799c647c8d
@ -103,7 +103,8 @@ def _get_cover(soup, rdr):
|
|||||||
r[0] = img['src']
|
r[0] = img['src']
|
||||||
l = r.keys()
|
l = r.keys()
|
||||||
l.sort()
|
l.sort()
|
||||||
ans = r[l[0]]
|
if l:
|
||||||
|
ans = r[l[0]]
|
||||||
# this link comes from the internal html, which is in a subdir
|
# this link comes from the internal html, which is in a subdir
|
||||||
if ans is not None:
|
if ans is not None:
|
||||||
try:
|
try:
|
||||||
|
@ -7,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
|
|||||||
import os, re
|
import os, re
|
||||||
from mimetypes import guess_type as guess_mimetype
|
from mimetypes import guess_type as guess_mimetype
|
||||||
|
|
||||||
from BeautifulSoup import BeautifulSoup, NavigableString
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
from calibre.utils.chm.chm import CHMFile
|
from calibre.utils.chm.chm import CHMFile
|
||||||
from calibre.utils.chm.chmlib import (
|
from calibre.utils.chm.chmlib import (
|
||||||
@ -17,6 +17,7 @@ from calibre.utils.chm.chmlib import (
|
|||||||
|
|
||||||
from calibre.utils.config import OptionParser
|
from calibre.utils.config import OptionParser
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
|
|
||||||
|
|
||||||
def match_string(s1, s2_already_lowered):
|
def match_string(s1, s2_already_lowered):
|
||||||
@ -145,8 +146,9 @@ class CHMReader(CHMFile):
|
|||||||
|
|
||||||
def _reformat(self, data):
|
def _reformat(self, data):
|
||||||
try:
|
try:
|
||||||
|
data = xml_to_unicode(data, strip_encoding_pats=True)[0]
|
||||||
soup = BeautifulSoup(data)
|
soup = BeautifulSoup(data)
|
||||||
except UnicodeEncodeError:
|
except ValueError:
|
||||||
# hit some strange encoding problems...
|
# hit some strange encoding problems...
|
||||||
print "Unable to parse html for cleaning, leaving it :("
|
print "Unable to parse html for cleaning, leaving it :("
|
||||||
return data
|
return data
|
||||||
@ -184,7 +186,7 @@ class CHMReader(CHMFile):
|
|||||||
# and some don't even have a src= ?!
|
# and some don't even have a src= ?!
|
||||||
pass
|
pass
|
||||||
# now give back some pretty html.
|
# now give back some pretty html.
|
||||||
return soup.prettify()
|
return soup.prettify('utf-8')
|
||||||
|
|
||||||
def Contents(self):
|
def Contents(self):
|
||||||
if self._contents is not None:
|
if self._contents is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user