mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
merged new heuristics options
This commit is contained in:
commit
eda2122b7b
@ -126,8 +126,21 @@ def add_pipeline_options(parser, plumber):
|
||||
'margin_top', 'margin_left', 'margin_right',
|
||||
'margin_bottom', 'change_justification',
|
||||
'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size',
|
||||
'asciiize', 'remove_header', 'header_regex',
|
||||
'remove_footer', 'footer_regex',
|
||||
'asciiize',
|
||||
]
|
||||
),
|
||||
|
||||
'HEURISTICS' : (
|
||||
_('Modify the document text and strucutre using common patterns.'),
|
||||
[
|
||||
'enable_heuristics', 'markup_chapter_headings',
|
||||
'italicize_common_cases', 'fix_indents',
|
||||
'html_unwrap_factor', 'unwrap_lines',
|
||||
'delete_blank_paragraphs', 'format_scene_breaks',
|
||||
'dehyphenate',
|
||||
'sr1_search', 'sr1_replace',
|
||||
'sr2_search', 'sr2_replace',
|
||||
'sr3_search', 'sr3_replace',
|
||||
]
|
||||
),
|
||||
|
||||
@ -137,7 +150,6 @@ def add_pipeline_options(parser, plumber):
|
||||
'chapter', 'chapter_mark',
|
||||
'prefer_metadata_cover', 'remove_first_image',
|
||||
'insert_metadata', 'page_breaks_before',
|
||||
'preprocess_html', 'html_unwrap_factor',
|
||||
]
|
||||
),
|
||||
|
||||
@ -164,8 +176,9 @@ def add_pipeline_options(parser, plumber):
|
||||
|
||||
}
|
||||
|
||||
group_order = ['', 'LOOK AND FEEL', 'STRUCTURE DETECTION',
|
||||
'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
|
||||
group_order = ['', 'LOOK AND FEEL', 'HEURISTICS',
|
||||
'STRUCTURE DETECTION', 'TABLE OF CONTENTS',
|
||||
'METADATA', 'DEBUG']
|
||||
|
||||
for group in group_order:
|
||||
desc, options = groups[group]
|
||||
|
@ -376,23 +376,6 @@ OptionRecommendation(name='insert_metadata',
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='preprocess_html',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Attempt to detect and correct hard line breaks and other '
|
||||
'problems in the source file. This may make things worse, so use '
|
||||
'with care.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='html_unwrap_factor',
|
||||
recommended_value=0.40, level=OptionRecommendation.LOW,
|
||||
help=_('Scale used to determine the length at which a line should '
|
||||
'be unwrapped if preprocess is enabled. Valid values are a decimal between 0 and 1. The '
|
||||
'default is 0.40, just below the median line length. This will unwrap typical books '
|
||||
' with hard line breaks, but should be reduced if the line length is variable.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='smarten_punctuation',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Convert plain quotes, dashes and ellipsis to their '
|
||||
@ -401,32 +384,6 @@ OptionRecommendation(name='smarten_punctuation',
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='remove_header',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Use a regular expression to try and remove the header.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='header_regex',
|
||||
recommended_value='(?i)(?<=<hr>)((\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?\d+<br>\s*.*?\s*)|(\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?.*?<br>\s*\d+))(?=<br>)',
|
||||
level=OptionRecommendation.LOW,
|
||||
help=_('The regular expression to use to remove the header.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='remove_footer',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Use a regular expression to try and remove the footer.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='footer_regex',
|
||||
recommended_value='(?i)(?<=<hr>)((\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?\d+<br>\s*.*?\s*)|(\s*<a name=\d+></a>((<img.+?>)*<br>\s*)?.*?<br>\s*\d+))(?=<br>)',
|
||||
level=OptionRecommendation.LOW,
|
||||
help=_('The regular expression to use to remove the footer.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='read_metadata_from_opf',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
short_switch='m',
|
||||
@ -527,6 +484,80 @@ OptionRecommendation(name='timestamp',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the book timestamp (used by the date column in calibre).')),
|
||||
|
||||
OptionRecommendation(name='enable_heuristics',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Enable heurisic processing. This option must be set for any '
|
||||
'heuristic processing to take place.')),
|
||||
|
||||
OptionRecommendation(name='markup_chapter_headings',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Detect chapter headings and sub headings. Change '
|
||||
'them to h1 and h2 tags.')),
|
||||
|
||||
OptionRecommendation(name='italicize_common_cases',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Look for common words and patterns that denote '
|
||||
'italics and italicize them.')),
|
||||
|
||||
OptionRecommendation(name='fix_indents',
|
||||
recommended_value=True, level=OptionRecommendation.LOW,
|
||||
help=_('Turn indentation created from multiple entities '
|
||||
'into CSS indents.')),
|
||||
|
||||
OptionRecommendation(name='html_unwrap_factor',
|
||||
recommended_value=0.40, level=OptionRecommendation.LOW,
|
||||
help=_('Scale used to determine the length at which a line should '
|
||||
'be unwrapped. Valid values are a decimal between 0 and 1. The '
|
||||
'default is 0.4, just below the median line length.')),
|
||||
|
||||
OptionRecommendation(name='unwrap_lines',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Unwrap lines.')),
|
||||
|
||||
OptionRecommendation(name='delete_blank_paragraphs',
|
||||
recommended_value=True, level=OptionRecommendation.LOW,
|
||||
help=_('Remove empyt paragraphs from the document')),
|
||||
|
||||
OptionRecommendation(name='format_scene_breaks',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Replace soft scene breaks that use multiple blank lines '
|
||||
'with horizontal rules.')),
|
||||
|
||||
OptionRecommendation(name='dehyphenate',
|
||||
recommended_value=True, level=OptionRecommendation.LOW,
|
||||
help=_('Combine words that are separated by a hyphen. '
|
||||
'This is for cases where a word is hyphenated across '
|
||||
'two lines to denote the characters from a single word.')),
|
||||
|
||||
OptionRecommendation(name='sr1_search',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Search pattern (regular expression) to be replaced with '
|
||||
'sr1-replace.')),
|
||||
|
||||
OptionRecommendation(name='sr1_replace',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Replace characters (can be lambda expression) to '
|
||||
'replace the text found with sr1-search.')),
|
||||
|
||||
OptionRecommendation(name='sr2_search',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Search pattern (regular expression) to be replaced with '
|
||||
'sr2-replace.')),
|
||||
|
||||
OptionRecommendation(name='sr2_replace',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Replace characters (can be lambda expression) to '
|
||||
'replace the text found with sr2-search.')),
|
||||
|
||||
OptionRecommendation(name='sr3_search',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Search pattern (regular expression) to be replaced with '
|
||||
'sr3-replace.')),
|
||||
|
||||
OptionRecommendation(name='sr3_replace',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Replace characters (can be lambda expression) to '
|
||||
'replace the text found with sr3-search.')),
|
||||
]
|
||||
# }}}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user