mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion: Fix heuristics processing incorrectly removing some <br> tags. Fixes #1205637 [break line/italic - wrong conversion](https://bugs.launchpad.net/calibre/+bug/1205637)
This commit is contained in:
parent
6ae397126b
commit
ade57f1447
@ -436,12 +436,12 @@ class HeuristicProcessor(object):
|
|||||||
# Re-open self closing paragraph tags
|
# Re-open self closing paragraph tags
|
||||||
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
html = re.sub('<p[^>/]*/>', '<p> </p>', html)
|
||||||
# Get rid of empty span, bold, font, em, & italics tags
|
# Get rid of empty span, bold, font, em, & italics tags
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
fmt_tags = 'font|[ibu]|em|strong'
|
||||||
html = re.sub(
|
open_fmt_pat, close_fmt_pat = r'<(?:{})(?:\s[^>]*)?>'.format(fmt_tags), '</(?:{})>'.format(fmt_tags)
|
||||||
r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
for i in range(2):
|
||||||
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
|
||||||
html = re.sub(
|
html = re.sub(
|
||||||
r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*</(font|[ibu]|em|strong)>\s*){0,2}\s*</(font|[ibu]|em|strong)>", " ", html)
|
r"\s*{open}\s*({open}\s*{close}\s*){{0,2}}\s*{close}".format(open=open_fmt_pat, close=close_fmt_pat) , " ", html)
|
||||||
# delete surrounding divs from empty paragraphs
|
# delete surrounding divs from empty paragraphs
|
||||||
html = re.sub('<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
html = re.sub('<div[^>]*>\s*<p[^>]*>\s*</p>\s*</div>', '<p> </p>', html)
|
||||||
# Empty heading tags
|
# Empty heading tags
|
||||||
|
Loading…
x
Reference in New Issue
Block a user