mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1091631 (Private bug)
This commit is contained in:
parent
0797811191
commit
925afbf8c1
@ -357,21 +357,24 @@ class HeuristicProcessor(object):
|
|||||||
line_opening = "<(p|div)[^>]*>\s*(?P<style_open><(span|[iub])[^>]*>)?\s*"
|
line_opening = "<(p|div)[^>]*>\s*(?P<style_open><(span|[iub])[^>]*>)?\s*"
|
||||||
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
|
||||||
|
|
||||||
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
|
||||||
em_en_unwrap_regex = em_en_lookahead+line_ending+blanklines+line_opening
|
|
||||||
shy_unwrap_regex = soft_hyphen+line_ending+blanklines+line_opening
|
|
||||||
|
|
||||||
if format == 'txt':
|
if format == 'txt':
|
||||||
unwrap_regex = lookahead+txt_line_wrap
|
unwrap_regex = lookahead+txt_line_wrap
|
||||||
em_en_unwrap_regex = em_en_lookahead+txt_line_wrap
|
em_en_unwrap_regex = em_en_lookahead+txt_line_wrap
|
||||||
shy_unwrap_regex = soft_hyphen+txt_line_wrap
|
shy_unwrap_regex = soft_hyphen+txt_line_wrap
|
||||||
content = unwrap_regex.sub(' ', content)
|
|
||||||
content = em_en_unwrap_regex.sub('', content)
|
|
||||||
content = shy_unwrap_regex.sub('', content)
|
|
||||||
else:
|
else:
|
||||||
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
|
unwrap_regex = lookahead+line_ending+blanklines+line_opening
|
||||||
em_en_unwrap = re.compile(u"%s" % em_en_unwrap_regex, re.UNICODE)
|
em_en_unwrap_regex = em_en_lookahead+line_ending+blanklines+line_opening
|
||||||
shy_unwrap = re.compile(u"%s" % shy_unwrap_regex, re.UNICODE)
|
shy_unwrap_regex = soft_hyphen+line_ending+blanklines+line_opening
|
||||||
|
|
||||||
|
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
|
||||||
|
em_en_unwrap = re.compile(u"%s" % em_en_unwrap_regex, re.UNICODE)
|
||||||
|
shy_unwrap = re.compile(u"%s" % shy_unwrap_regex, re.UNICODE)
|
||||||
|
|
||||||
|
if format == 'txt':
|
||||||
|
content = unwrap.sub(' ', content)
|
||||||
|
content = em_en_unwrap.sub('', content)
|
||||||
|
content = shy_unwrap.sub('', content)
|
||||||
|
else:
|
||||||
content = unwrap.sub(style_unwrap, content)
|
content = unwrap.sub(style_unwrap, content)
|
||||||
content = em_en_unwrap.sub(style_unwrap, content)
|
content = em_en_unwrap.sub(style_unwrap, content)
|
||||||
content = shy_unwrap.sub(style_unwrap, content)
|
content = shy_unwrap.sub(style_unwrap, content)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user