mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
allow user applied styles to <hr> tags, updated comments/docs
This commit is contained in:
parent
d75e17e6b4
commit
48f202c7fd
@ -483,10 +483,23 @@ class HeuristicProcessor(object):
|
||||
return html
|
||||
|
||||
def markup_user_break(self, replacement_break):
|
||||
'''
|
||||
Takes string a user supplies and wraps it in markup that will be centered with
|
||||
appropriate margins. <hr> and <img> tags are allowed. If the user specifies
|
||||
a style with width attributes in the <hr> tag then the appropriate margins are
|
||||
applied to wrapping divs. This is because many ebook devices don't support margin:auto
|
||||
All other html is converted to text.
|
||||
'''
|
||||
hr_open = '<div id="scenebreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em">'
|
||||
if re.findall('(<|>)', replacement_break):
|
||||
if re.match('^<hr', replacement_break):
|
||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||
if replacement_break.find('width') != -1:
|
||||
width = int(re.sub('.*?width(:|=)(?P<wnum>\d+).*', '\g<wnum>', replacement_break))
|
||||
divpercent = (100 - width) / 2
|
||||
hr_open = re.sub('45', str(divpercent), hr_open)
|
||||
scene_break = hr_open+replacement_break+'</div>'
|
||||
else:
|
||||
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
|
||||
elif re.match('^<img', replacement_break):
|
||||
scene_break = self.scene_break_open+replacement_break+'</p>'
|
||||
else:
|
||||
@ -622,9 +635,11 @@ class HeuristicProcessor(object):
|
||||
blanks_count = len(self.any_multi_blank.findall(html))
|
||||
if blanks_count >= 1:
|
||||
html = self.merge_blanks(html, blanks_count)
|
||||
# Center separator lines, use a bit larger margin in this case
|
||||
scene_break_regex = self.line_open+'(?![\w\'\"])(?P<break>((?P<break_char>((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close
|
||||
scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
|
||||
# If the user has enabled scene break replacement, then either softbreaks
|
||||
# or 'hard' scene breaks are replaced, depending on which is in use
|
||||
# Otherwise separator lines are centered, use a bit larger margin in this case
|
||||
replacement_break = getattr(self.extra_opts, 'replace_scene_breaks', None)
|
||||
if replacement_break is not None:
|
||||
replacement_break = self.markup_user_break(replacement_break)
|
||||
|
@ -311,10 +311,15 @@ remove all non-breaking-space entities, or may include false positive matches re
|
||||
|
||||
:guilabel:`Ensure scene breaks are consistently formatted`
|
||||
With this option |app| will attempt to detect common scene-break markers and ensure that they are center aligned.
|
||||
It also attempts to detect scene breaks defined by white space and replace them with a horizontal rule 15% of the
|
||||
page width. Some readers may find this desirable as these 'soft' scene breaks often become page breaks on readers, and
|
||||
thus become difficult to distinguish.
|
||||
'Soft' scene break markers, i.e. scene breaks only defined by extra white space, are styled to ensure that they
|
||||
will not be displayed in conjunction with page breaks.
|
||||
|
||||
:guilabel:`Replace scene breaks`
|
||||
If this option is configured then |app| will replace scene break markers it finds with the replacement text specified by the
|
||||
user. In general you should avoid using html tags, |app| will discard any tags and use pre-defined markup. <hr />
|
||||
tags, i.e. horizontal rules, are an exception. These can optionally be specified with styles, if you choose to add your own
|
||||
style be sure to include the 'width' setting, otherwise the style information will be discarded.
|
||||
|
||||
:guilabel:`Remove unnecessary hyphens`
|
||||
|app| will analyze all hyphenated content in the document when this option is enabled. The document itself is used
|
||||
as a dictionary for analysis. This allows |app| to accurately remove hyphens for any words in the document in any language,
|
||||
@ -628,7 +633,7 @@ between 0 and 1. The default is 0.45, just under the median line length. Lower t
|
||||
text in the unwrapping. Increase to include less. You can adjust this value in the conversion settings under :guilabel:`PDF Input`.
|
||||
|
||||
Also, they often have headers and footers as part of the document that will become included with the text.
|
||||
Use the options to remove headers and footers to mitigate this issue. If the headers and footers are not
|
||||
Use the Search and Replace panel to remove headers and footers to mitigate this issue. If the headers and footers are not
|
||||
removed from the text it can throw off the paragraph unwrapping. To learn how to use the header and footer removal options, read
|
||||
:ref:`regexptutorial`.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user