mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Fix #355
This commit is contained in:
parent
67c0062b03
commit
a16d26a116
@ -313,8 +313,10 @@ def Book(options, logger, font_delta=0, header=None,
|
||||
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
|
||||
return book, fonts
|
||||
|
||||
def entity_to_unicode(match):
|
||||
def entity_to_unicode(match, exceptions=[]):
|
||||
ent = match.group(1)
|
||||
if ent in exceptions:
|
||||
return '&'+ent+';'
|
||||
if ent.startswith(u'#x'):
|
||||
return unichr(int(ent[2:], 16))
|
||||
if ent.startswith(u'#'):
|
||||
@ -322,4 +324,4 @@ def entity_to_unicode(match):
|
||||
try:
|
||||
return unichr(name2codepoint[ent])
|
||||
except KeyError:
|
||||
return ent
|
||||
return '&'+ent+';'
|
||||
|
@ -25,6 +25,8 @@ from collections import deque
|
||||
from urllib import unquote
|
||||
from urlparse import urlparse
|
||||
from math import ceil, floor
|
||||
from functools import partial
|
||||
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
except ImportError:
|
||||
@ -63,7 +65,6 @@ def munge_paths(basepath, url):
|
||||
path = os.path.join(os.path.dirname(basepath), path)
|
||||
return os.path.normpath(path), fragment
|
||||
|
||||
|
||||
class HTMLConverter(object):
|
||||
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
||||
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
||||
@ -84,7 +85,7 @@ class HTMLConverter(object):
|
||||
# Workaround bug in BeautifulSoup handling
|
||||
(re.compile(u' | | |\xa0', re.IGNORECASE), lambda match : u'\uffff'),
|
||||
# Replace entities
|
||||
(re.compile(ur'&(\S+?);'), entity_to_unicode),
|
||||
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt'])),
|
||||
]
|
||||
# Fix Baen markup
|
||||
BAEN = [
|
||||
|
Loading…
x
Reference in New Issue
Block a user