insert horizontal rules for softbreaks when option is enabled

This commit is contained in:
ldolse 2011-01-17 19:00:14 +08:00
parent 2ebf94812e
commit 05730e1886
2 changed files with 14 additions and 6 deletions

View File

@ -23,6 +23,7 @@ class HeuristicProcessor(object):
self.min_chapters = 1 self.min_chapters = 1
self.chapters_no_title = 0 self.chapters_no_title = 0
self.chapters_with_title = 0 self.chapters_with_title = 0
self.blanks_deleted = False
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL) self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sid=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE) self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sid=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
@ -422,6 +423,7 @@ class HeuristicProcessor(object):
# blank paragraphs then delete blank lines to clean up spacing # blank paragraphs then delete blank lines to clean up spacing
if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False): if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
self.log.debug("deleting blank lines") self.log.debug("deleting blank lines")
self.blanks_deleted = True
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html) html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = self.blankreg.sub('', html) html = self.blankreg.sub('', html)
@ -479,6 +481,9 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'format_scene_breaks', False): if getattr(self.extra_opts, 'format_scene_breaks', False):
# Center separator lines # Center separator lines
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html) html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
if not self.blanks_deleted:
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = re.sub('<p\s+id="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
if self.deleted_nbsps: if self.deleted_nbsps:
# put back non-breaking spaces in empty paragraphs to preserve original formatting # put back non-breaking spaces in empty paragraphs to preserve original formatting

View File

@ -530,17 +530,18 @@ more blank lines are a paragraph boundary::
TXT input supports a number of options to differentiate how paragraphs are detected. TXT input supports a number of options to differentiate how paragraphs are detected.
:guilabel:`Auto` :guilabel:`Paragraph Style: Auto`
Analyzes the text file and attempts to determine how paragraphs are defined. Analyzes the text file and attempts to automatically determine how paragraphs are defined. This
option will generally work fine, if you achieve undesirable results try one of the manual options.
:guilabel:`Treat each line as a paragraph` :guilabel:`Paragraph Style: Single`
Assumes that every line is a paragraph:: Assumes that every line is a paragraph::
This is the first. This is the first.
This is the second. This is the second.
This is the third. This is the third.
:guilabel:`Assume print formatting` :guilabel:`Paragraph Style: Print`
Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when Assumes that every paragraph starts with an indent (either a tab or 2+ spaces). Paragraphs end when
the next line that starts with an indent is reached:: the next line that starts with an indent is reached::
@ -551,11 +552,13 @@ TXT input supports a number of options to differentiate how paragraphs are detec
This is the This is the
third. third.
:guilabel:`Unformatted` :guilabel:`Paragraph Style: Unformatted`
Assumes that the document has no formatting, but does use hard line breaks. Punctuation Assumes that the document has no formatting, but does use hard line breaks. Punctuation
and median line length are used to attempt to re-create paragraphs. and median line length are used to attempt to re-create paragraphs.
:guilabel:`Process using Textile` :guilabel:`Formatting Style: Auto`
:guilabel:`Formatting Style: Heuristic`
:guilabel:`Process using markdown` :guilabel:`Process using markdown`
|app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown |app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown