allow user applied styles to <hr> tags, updated comments/docs

2025-07-09 03:04:10 -04:00 · 2011-02-01 21:21:36 +08:00 · 2011-02-01 21:21:36 +08:00 · 48f202c7fd
commit 48f202c7fd
parent d75e17e6b4
2 changed files with 26 additions and 6 deletions
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@ -483,10 +483,23 @@ class HeuristicProcessor(object):
        return html

    def markup_user_break(self, replacement_break):
+        '''
+        Takes string a user supplies and wraps it in markup that will be centered with 
+        appropriate margins.  <hr> and <img> tags are allowed.  If the user specifies
+        a style with width attributes in the <hr> tag then the appropriate margins are
+        applied to wrapping divs.  This is because many ebook devices don't support margin:auto
+        All other html is converted to text.
+        '''
        hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
        if re.findall('(<|>)', replacement_break):
            if re.match('^<hr', replacement_break):
-                scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
+                if replacement_break.find('width') != -1:
+                   width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
+                   divpercent = (100 - width) / 2
+                   hr_open = re.sub('45', str(divpercent), hr_open)
+                   scene_break = hr_open+replacement_break+'</div>'
+                else:
+                    scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
            elif re.match('^<img', replacement_break):
                scene_break = self.scene_break_open+replacement_break+'</p>'
            else:
@ -622,9 +635,11 @@ class HeuristicProcessor(object):
            blanks_count = len(self.any_multi_blank.findall(html))
            if blanks_count >= 1:
                html = self.merge_blanks(html, blanks_count)
-            # Center separator lines, use a bit larger margin in this case
            scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
            scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
+            # If the user has enabled scene break replacement, then either softbreaks
+            # or 'hard' scene breaks are replaced, depending on which is in use
+            # Otherwise separator lines are centered, use a bit larger margin in this case
            replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
            if replacement_break is not None:
                replacement_break = self.markup_user_break(replacement_break)
--- a/src/calibre/manual/conversion.rst
+++ b/src/calibre/manual/conversion.rst
@ -311,10 +311,15 @@ remove all non-breaking-space entities, or may include false positive matches re

 :guilabel:`Ensure scene breaks are consistently formatted`
    With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.  
-    It also attempts to detect scene breaks defined by white space and replace them with a horizontal rule 15% of the
-    page width.  Some readers may find this desirable as these 'soft' scene breaks often become page breaks on readers, and 
-    thus become difficult to distinguish.
+    'Soft' scene break markers, i.e. scene breaks only defined by extra white space, are styled to ensure that they 
+    will not be displayed in conjunction with page breaks.

+:guilabel:`Replace scene breaks`
+    If this option is configured then |app| will replace scene break markers it finds with the replacement text specified by the
+    user. In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup.  <hr />
+    tags, i.e. horizontal rules, are an exception.  These can optionally be specified with styles, if you choose to add your own
+    style be sure to include the 'width' setting, otherwise the style information will be discarded.
+ 
 :guilabel:`Remove unnecessary hyphens`
    |app| will analyze all hyphenated content in the document when this option is enabled.  The document itself is used
    as a dictionary for analysis.  This allows |app| to accurately remove hyphens for any words in the document in any language, 
@ -628,7 +633,7 @@ between 0 and 1. The default is 0.45, just under the median line length. Lower t
 text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.

 Also, they often have headers and footers as part of the document that will become included with the text.
-Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not
+Use the Search and Replace panel to remove headers and footers to mitigate this issue. If the headers and footers are not
 removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read 
 :ref:`regexptutorial`.