mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #545
This commit is contained in:
parent
70b2d0bec7
commit
190466ed59
@ -79,6 +79,11 @@ def strip_style_comments(match):
|
|||||||
src = src[:lindex] + src[rindex+2:]
|
src = src[:lindex] + src[rindex+2:]
|
||||||
return src
|
return src
|
||||||
|
|
||||||
|
def tag_regex(tagname):
|
||||||
|
'''Return non-grouping regular expressions that match the opening and closing tags for tagname'''
|
||||||
|
return dict(open=r'(?:<\s*%(t)s\s+[^<>]*?>|<\s*%(t)s\s*>)'%dict(t=tagname), \
|
||||||
|
close=r'</\s*%(t)s\s*>'%dict(t=tagname))
|
||||||
|
|
||||||
class HTMLConverter(object):
|
class HTMLConverter(object):
|
||||||
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
|
||||||
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
|
||||||
@ -94,8 +99,8 @@ class HTMLConverter(object):
|
|||||||
(re.compile(r"<\s*style.*?>(.*?)<\/\s*style\s*>", re.DOTALL|re.IGNORECASE),
|
(re.compile(r"<\s*style.*?>(.*?)<\/\s*style\s*>", re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: match.group().replace('<!--', '').replace('-->', '')),
|
lambda match: match.group().replace('<!--', '').replace('-->', '')),
|
||||||
# remove <p> tags from within <a> tags
|
# remove <p> tags from within <a> tags
|
||||||
(re.compile(r'<a.*?>(.*?)</a\s*>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'%(open)s(.*?)%(close)s'%tag_regex('a'), re.DOTALL|re.IGNORECASE),
|
||||||
lambda match: re.compile(r'<\s*?p.*?>', re.IGNORECASE).sub('', match.group())),
|
lambda match: re.compile(r'%(open)s|%(close)s'%tag_regex('p'), re.IGNORECASE).sub('', match.group())),
|
||||||
|
|
||||||
# Replace common line break patterns with line breaks
|
# Replace common line break patterns with line breaks
|
||||||
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user