mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline: calibre will now automatically replace all ligatures in the input document with the normal character sequence they are menat to represent. This can be turned off via an option under Look & Feel, in the Conversion settings. Fixes #5553 (Text errors converting from .pdf)
This commit is contained in:
parent
12a466e2ff
commit
a747879afc
@ -411,6 +411,18 @@ OptionRecommendation(name='asciiize',
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
|
||||||
|
OptionRecommendation(name='keep_ligatures',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Preserve ligatures present in the input document. '
|
||||||
|
'A ligature is a special rendering of a pair of '
|
||||||
|
'characters like ff, fi, fl et cetera. '
|
||||||
|
'Most readers do not have support for '
|
||||||
|
'ligatures in their default fonts, so they are '
|
||||||
|
'unlikely to render correctly. By default, calibre '
|
||||||
|
'will turn a ligature into the corresponding pair of normal '
|
||||||
|
'characters. This option will preserve them instead.')
|
||||||
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='title',
|
OptionRecommendation(name='title',
|
||||||
recommended_value=None, level=OptionRecommendation.LOW,
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the title.')),
|
help=_('Set the title.')),
|
||||||
|
@ -18,6 +18,24 @@ convert_entities = functools.partial(entity_to_unicode, exceptions=['quot',
|
|||||||
'apos', 'lt', 'gt', 'amp', '#60', '#62'])
|
'apos', 'lt', 'gt', 'amp', '#60', '#62'])
|
||||||
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
|
||||||
|
|
||||||
|
LIGATURES = {
|
||||||
|
u'\u00c6': u'AE',
|
||||||
|
u'\u00e6': u'ae',
|
||||||
|
u'\u0152': u'OE',
|
||||||
|
u'\u0153': u'oe',
|
||||||
|
u'\u0132': u'IJ',
|
||||||
|
u'\u0133': u'ij',
|
||||||
|
u'\u1D6B': u'ue',
|
||||||
|
u'\uFB00': u'ff',
|
||||||
|
u'\uFB01': u'fi',
|
||||||
|
u'\uFB02': u'fl',
|
||||||
|
u'\uFB03': u'ffi',
|
||||||
|
u'\uFB04': u'ffl',
|
||||||
|
u'\uFB05': u'ft',
|
||||||
|
u'\uFB06': u'st',
|
||||||
|
}
|
||||||
|
|
||||||
|
_ligpat = re.compile(u'|'.join(LIGATURES))
|
||||||
|
|
||||||
def sanitize_head(match):
|
def sanitize_head(match):
|
||||||
x = match.group(1)
|
x = match.group(1)
|
||||||
@ -228,6 +246,9 @@ class HTMLPreProcessor(object):
|
|||||||
else:
|
else:
|
||||||
rules = []
|
rules = []
|
||||||
|
|
||||||
|
if not self.extra_opts.keep_ligatures:
|
||||||
|
html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
|
||||||
|
|
||||||
end_rules = []
|
end_rules = []
|
||||||
if getattr(self.extra_opts, 'remove_header', None):
|
if getattr(self.extra_opts, 'remove_header', None):
|
||||||
try:
|
try:
|
||||||
|
@ -24,7 +24,7 @@ class LookAndFeelWidget(Widget, Ui_Form):
|
|||||||
'linearize_tables',
|
'linearize_tables',
|
||||||
'disable_font_rescaling', 'insert_blank_line',
|
'disable_font_rescaling', 'insert_blank_line',
|
||||||
'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding',
|
'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding',
|
||||||
'asciiize']
|
'asciiize', 'keep_ligatures']
|
||||||
)
|
)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="2">
|
<item row="1" column="1" colspan="2">
|
||||||
<widget class="QDoubleSpinBox" name="opt_base_font_size">
|
<widget class="QDoubleSpinBox" name="opt_base_font_size">
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> pt</string>
|
<string> pt</string>
|
||||||
@ -63,7 +63,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="2" column="1" colspan="2">
|
<item row="2" column="1" colspan="3">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QLineEdit" name="opt_font_size_mapping">
|
<widget class="QLineEdit" name="opt_font_size_mapping">
|
||||||
@ -84,7 +84,7 @@
|
|||||||
<string>...</string>
|
<string>...</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="icon">
|
<property name="icon">
|
||||||
<iconset>
|
<iconset resource="../../../../resources/images.qrc">
|
||||||
<normaloff>:/images/wizard.svg</normaloff>:/images/wizard.svg</iconset>
|
<normaloff>:/images/wizard.svg</normaloff>:/images/wizard.svg</iconset>
|
||||||
</property>
|
</property>
|
||||||
<property name="iconSize">
|
<property name="iconSize">
|
||||||
@ -107,7 +107,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="2">
|
<item row="3" column="1" colspan="2">
|
||||||
<widget class="QDoubleSpinBox" name="opt_line_height">
|
<widget class="QDoubleSpinBox" name="opt_line_height">
|
||||||
<property name="suffix">
|
<property name="suffix">
|
||||||
<string> pt</string>
|
<string> pt</string>
|
||||||
@ -127,60 +127,50 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="1" colspan="2">
|
<item row="4" column="1" colspan="3">
|
||||||
<widget class="QLineEdit" name="opt_input_encoding"/>
|
<widget class="QLineEdit" name="opt_input_encoding"/>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0" colspan="3">
|
<item row="5" column="0" colspan="2">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_3">
|
<widget class="QCheckBox" name="opt_remove_paragraph_spacing">
|
||||||
|
<property name="text">
|
||||||
|
<string>Remove &spacing between paragraphs</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="5" column="2" colspan="2">
|
||||||
|
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="opt_remove_paragraph_spacing">
|
<widget class="QLabel" name="label_4">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Remove &spacing between paragraphs</string>
|
<string>Indent size:</string>
|
||||||
|
</property>
|
||||||
|
<property name="alignment">
|
||||||
|
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<spacer name="horizontalSpacer">
|
<widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size">
|
||||||
<property name="orientation">
|
<property name="toolTip">
|
||||||
<enum>Qt::Horizontal</enum>
|
<string><p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string>
|
||||||
</property>
|
</property>
|
||||||
<property name="sizeHint" stdset="0">
|
<property name="suffix">
|
||||||
<size>
|
<string> em</string>
|
||||||
<width>40</width>
|
|
||||||
<height>20</height>
|
|
||||||
</size>
|
|
||||||
</property>
|
</property>
|
||||||
</spacer>
|
<property name="decimals">
|
||||||
</item>
|
<number>1</number>
|
||||||
<item>
|
</property>
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
</widget>
|
||||||
<item>
|
|
||||||
<widget class="QLabel" name="label_4">
|
|
||||||
<property name="text">
|
|
||||||
<string>Indent size:</string>
|
|
||||||
</property>
|
|
||||||
<property name="alignment">
|
|
||||||
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item>
|
|
||||||
<widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size">
|
|
||||||
<property name="toolTip">
|
|
||||||
<string><p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string>
|
|
||||||
</property>
|
|
||||||
<property name="suffix">
|
|
||||||
<string> em</string>
|
|
||||||
</property>
|
|
||||||
<property name="decimals">
|
|
||||||
<number>1</number>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
</layout>
|
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="6" column="0">
|
||||||
|
<widget class="QLabel" name="label_5">
|
||||||
|
<property name="text">
|
||||||
|
<string>Text justification:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item row="7" column="0">
|
<item row="7" column="0">
|
||||||
<widget class="QCheckBox" name="opt_linearize_tables">
|
<widget class="QCheckBox" name="opt_linearize_tables">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
@ -188,14 +178,7 @@
|
|||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="9" column="0">
|
<item row="9" column="0" colspan="4">
|
||||||
<widget class="QCheckBox" name="opt_asciiize">
|
|
||||||
<property name="text">
|
|
||||||
<string>&Transliterate unicode characters to ASCII.</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="10" column="0" colspan="3">
|
|
||||||
<widget class="QGroupBox" name="groupBox">
|
<widget class="QGroupBox" name="groupBox">
|
||||||
<property name="title">
|
<property name="title">
|
||||||
<string>Extra &CSS</string>
|
<string>Extra &CSS</string>
|
||||||
@ -207,21 +190,7 @@
|
|||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="8" column="0">
|
<item row="6" column="2" colspan="2">
|
||||||
<widget class="QCheckBox" name="opt_insert_blank_line">
|
|
||||||
<property name="text">
|
|
||||||
<string>Insert &blank line</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="6" column="0">
|
|
||||||
<widget class="QLabel" name="label_5">
|
|
||||||
<property name="text">
|
|
||||||
<string>Text justification:</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="6" column="2">
|
|
||||||
<widget class="QComboBox" name="opt_change_justification">
|
<widget class="QComboBox" name="opt_change_justification">
|
||||||
<property name="currentIndex">
|
<property name="currentIndex">
|
||||||
<number>2</number>
|
<number>2</number>
|
||||||
@ -243,6 +212,27 @@
|
|||||||
</item>
|
</item>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="7" column="1" colspan="3">
|
||||||
|
<widget class="QCheckBox" name="opt_asciiize">
|
||||||
|
<property name="text">
|
||||||
|
<string>&Transliterate unicode characters to ASCII</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="8" column="0">
|
||||||
|
<widget class="QCheckBox" name="opt_insert_blank_line">
|
||||||
|
<property name="text">
|
||||||
|
<string>Insert &blank line</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="8" column="1" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="opt_keep_ligatures">
|
||||||
|
<property name="text">
|
||||||
|
<string>Keep &ligatures</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<resources>
|
<resources>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user