diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 1034511016..e227ad2c8e 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -411,6 +411,18 @@ OptionRecommendation(name='asciiize', ) ), +OptionRecommendation(name='keep_ligatures', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Preserve ligatures present in the input document. ' + 'A ligature is a special rendering of a pair of ' + 'characters like ff, fi, fl et cetera. ' + 'Most readers do not have support for ' + 'ligatures in their default fonts, so they are ' + 'unlikely to render correctly. By default, calibre ' + 'will turn a ligature into the corresponding pair of normal ' + 'characters. This option will preserve them instead.') + ), + OptionRecommendation(name='title', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the title.')), diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index ada4f1a3af..a42f0fc73b 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -18,6 +18,24 @@ convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', 'apos', 'lt', 'gt', 'amp', '#60', '#62']) _span_pat = re.compile('', re.DOTALL|re.IGNORECASE) +LIGATURES = { + u'\u00c6': u'AE', + u'\u00e6': u'ae', + u'\u0152': u'OE', + u'\u0153': u'oe', + u'\u0132': u'IJ', + u'\u0133': u'ij', + u'\u1D6B': u'ue', + u'\uFB00': u'ff', + u'\uFB01': u'fi', + u'\uFB02': u'fl', + u'\uFB03': u'ffi', + u'\uFB04': u'ffl', + u'\uFB05': u'ft', + u'\uFB06': u'st', + } + +_ligpat = re.compile(u'|'.join(LIGATURES)) def sanitize_head(match): x = match.group(1) @@ -228,6 +246,9 @@ class HTMLPreProcessor(object): else: rules = [] + if not self.extra_opts.keep_ligatures: + html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) + end_rules = [] if getattr(self.extra_opts, 'remove_header', None): try: diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py index 8ef1f77351..e18657cf69 100644 --- a/src/calibre/gui2/convert/look_and_feel.py +++ b/src/calibre/gui2/convert/look_and_feel.py @@ -24,7 +24,7 @@ class LookAndFeelWidget(Widget, Ui_Form): 'linearize_tables', 'disable_font_rescaling', 'insert_blank_line', 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding', - 'asciiize'] + 'asciiize', 'keep_ligatures'] ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui index 6fbf4e11cd..764226012b 100644 --- a/src/calibre/gui2/convert/look_and_feel.ui +++ b/src/calibre/gui2/convert/look_and_feel.ui @@ -31,7 +31,7 @@ - + pt @@ -63,7 +63,7 @@ - + @@ -84,7 +84,7 @@ ... - + :/images/wizard.svg:/images/wizard.svg @@ -107,7 +107,7 @@ - + pt @@ -127,60 +127,50 @@ - + - - + + + + Remove &spacing between paragraphs + + + + + - + - Remove &spacing between paragraphs + Indent size: + + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - Qt::Horizontal + + + <p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent. - - - 40 - 20 - + + em - - - - - - - - Indent size: - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - - - - - <p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent. - - - em - - - 1 - - - - + + 1 + + + + + + Text justification: + + + @@ -188,14 +178,7 @@ - - - - &Transliterate unicode characters to ASCII. - - - - + Extra &CSS @@ -207,21 +190,7 @@ - - - - Insert &blank line - - - - - - - Text justification: - - - - + 2 @@ -243,6 +212,27 @@ + + + + &Transliterate unicode characters to ASCII + + + + + + + Insert &blank line + + + + + + + Keep &ligatures + + +