mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Fix #355
This commit is contained in:
parent
67c0062b03
commit
a16d26a116
@ -313,8 +313,10 @@ def Book(options, logger, font_delta=0, header=None,
|
|||||||
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
|
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
|
||||||
return book, fonts
|
return book, fonts
|
||||||
|
|
||||||
def entity_to_unicode(match):
|
def entity_to_unicode(match, exceptions=[]):
|
||||||
ent = match.group(1)
|
ent = match.group(1)
|
||||||
|
if ent in exceptions:
|
||||||
|
return '&'+ent+';'
|
||||||
if ent.startswith(u'#x'):
|
if ent.startswith(u'#x'):
|
||||||
return unichr(int(ent[2:], 16))
|
return unichr(int(ent[2:], 16))
|
||||||
if ent.startswith(u'#'):
|
if ent.startswith(u'#'):
|
||||||
@ -322,4 +324,4 @@ def entity_to_unicode(match):
|
|||||||
try:
|
try:
|
||||||
return unichr(name2codepoint[ent])
|
return unichr(name2codepoint[ent])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return ent
|
return '&'+ent+';'
|
||||||
|
@ -25,6 +25,8 @@ from collections import deque
|
|||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
from math import ceil, floor
|
from math import ceil, floor
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -63,7 +65,6 @@ def munge_paths(basepath, url):
|
|||||||
path = os.path.join(os.path.dirname(basepath), path)
|
path = os.path.join(os.path.dirname(basepath), path)
|
||||||
return os.path.normpath(path), fragment
|
return os.path.normpath(path), fragment
|
||||||
|
|
||||||
|
|
||||||
class HTMLConverter(object):
|
class HTMLConverter(object):
|
||||||
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
||||||
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
||||||
@ -84,7 +85,7 @@ class HTMLConverter(object):
|
|||||||
# Workaround bug in BeautifulSoup handling
|
# Workaround bug in BeautifulSoup handling
|
||||||
(re.compile(u' | | |\xa0', re.IGNORECASE), lambda match : u'\uffff'),
|
(re.compile(u' | | |\xa0', re.IGNORECASE), lambda match : u'\uffff'),
|
||||||
# Replace entities
|
# Replace entities
|
||||||
(re.compile(ur'&(\S+?);'), entity_to_unicode),
|
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt'])),
|
||||||
]
|
]
|
||||||
# Fix Baen markup
|
# Fix Baen markup
|
||||||
BAEN = [
|
BAEN = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user