From 90aac28ab2426e435406a8b22baec37eadd858f4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 Nov 2021 11:24:51 +0530 Subject: [PATCH] Allow using HTML transform rules during conversion as well --- src/calibre/ebooks/conversion/cli.py | 24 ++++++++++++++++++++-- src/calibre/ebooks/conversion/config.py | 2 +- src/calibre/ebooks/conversion/plumber.py | 15 +++++++++++++- src/calibre/ebooks/html_transform_rules.py | 9 ++++++++ src/calibre/gui2/convert/look_and_feel.py | 6 +++--- src/calibre/gui2/convert/look_and_feel.ui | 18 +++++++++++++++- src/calibre/gui2/css_transform_rules.py | 6 ++++-- src/calibre/gui2/html_transform_rules.py | 2 +- 8 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index f05067bafb..6929811b10 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -101,7 +101,7 @@ def option_recommendation_to_cli_option(add_option, rec): attrs.pop('type', '') if opt.name == 'read_metadata_from_opf': switches.append('--from-opf') - if opt.name == 'transform_css_rules': + elif opt.name == 'transform_css_rules': attrs['help'] = _( 'Path to a file containing rules to transform the CSS styles' ' in this book. The easiest way to create such a file is to' @@ -110,6 +110,15 @@ def option_recommendation_to_cli_option(add_option, rec): ' dialog. Once you create the rules, you can use the "Export" button' ' to save them to a file.' ) + elif opt.name == 'transform_html_rules': + attrs['help'] = _( + 'Path to a file containing rules to transform the HTML' + ' in this book. The easiest way to create such a file is to' + ' use the wizard for creating rules in the calibre GUI. Access' + ' it in the "Look & feel->Transform HTML" section of the conversion' + ' dialog. Once you create the rules, you can use the "Export" button' + ' to save them to a file.' + ) if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True: switches = ['--disable-'+opt.long_switch] add_option(Option(*switches, **attrs)) @@ -192,7 +201,7 @@ def add_pipeline_options(parser, plumber): 'font_size_mapping', 'embed_font_family', 'subset_embedded_fonts', 'embed_all_fonts', 'line_height', 'minimum_line_height', - 'linearize_tables', + 'linearize_tables', 'transform_html_rules', 'extra_css', 'filter_css', 'transform_css_rules', 'expand_css', 'smarten_punctuation', 'unsmarten_punctuation', 'margin_top', 'margin_left', 'margin_right', @@ -388,6 +397,17 @@ def main(args=sys.argv): log.error(title) log.error(msg) return 1 + if opts.transform_html_rules: + from calibre.ebooks.html_transform_rules import import_rules, validate_rule + with open(opts.transform_html_rules, 'rb') as tcr: + opts.transform_html_rules = rules = list(import_rules(tcr.read())) + for rule in rules: + title, msg = validate_rule(rule) + if title and msg: + log.error('Failed to parse HTML transform rules') + log.error(title) + log.error(msg) + return 1 recommendations = [(n.dest, getattr(opts, n.dest), OptionRecommendation.HIGH) diff --git a/src/calibre/ebooks/conversion/config.py b/src/calibre/ebooks/conversion/config.py index 3d0828da1e..e09a369dcc 100644 --- a/src/calibre/ebooks/conversion/config.py +++ b/src/calibre/ebooks/conversion/config.py @@ -246,7 +246,7 @@ OPTIONS = { 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size', 'insert_blank_line_size', 'input_encoding', 'filter_css', 'expand_css', 'asciiize', 'keep_ligatures', 'linearize_tables', - 'transform_css_rules'), + 'transform_css_rules', 'transform_html_rules'), 'metadata': ('prefer_metadata_cover',), diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 8de61ce6a6..53e428425f 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -369,6 +369,12 @@ OptionRecommendation(name='transform_css_rules', ' rules are applied after all other CSS processing is done.') ), +OptionRecommendation(name='transform_html_rules', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Rules for transforming the HTML in this book. These' + ' rules are applied after the HTML is parsed, but before any other transformations.') + ), + OptionRecommendation(name='filter_css', recommended_value=None, level=OptionRecommendation.LOW, help=_('A comma separated list of CSS properties that ' @@ -881,7 +887,7 @@ OptionRecommendation(name='search_replace', if name in {'sr1_search', 'sr1_replace', 'sr2_search', 'sr2_replace', 'sr3_search', 'sr3_replace', 'filter_css', 'comments'}: if not a and not b: return True - if name in {'transform_css_rules', 'search_replace'}: + if name in {'transform_css_rules', 'transform_html_rules', 'search_replace'}: if b == '[]': b = None return a == b @@ -1133,6 +1139,13 @@ OptionRecommendation(name='search_replace', self.oeb.plumber_output_format = self.output_fmt or '' + if self.opts.transform_html_rules: + transform_html_rules = self.opts.transform_html_rules + if isinstance(transform_html_rules, string_or_bytes): + transform_html_rules = json.loads(transform_html_rules) + from calibre.ebooks.html_transform_rules import transform_conversion_book + transform_conversion_book(self.oeb, self.opts, transform_html_rules) + from calibre.ebooks.oeb.transforms.data_url import DataURL DataURL()(self.oeb, self.opts) from calibre.ebooks.oeb.transforms.guide import Clean diff --git a/src/calibre/ebooks/html_transform_rules.py b/src/calibre/ebooks/html_transform_rules.py index 117cbed21c..f2db71440b 100644 --- a/src/calibre/ebooks/html_transform_rules.py +++ b/src/calibre/ebooks/html_transform_rules.py @@ -438,6 +438,15 @@ def transform_container(container, serialized_rules, names=()): return doc_changed +def transform_conversion_book(oeb, opts, serialized_rules): + rules = tuple(Rule(r) for r in serialized_rules) + for item in oeb.spine: + root = item.data + if not hasattr(root, 'xpath'): + continue + transform_doc(root, rules) + + def rule_to_text(rule): text = _('If the tag {match_type} {query}').format( match_type=MATCH_TYPE_MAP[rule['match_type']].text, query=rule.get('query') or '') diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py index 516cdde411..5556626238 100644 --- a/src/calibre/gui2/convert/look_and_feel.py +++ b/src/calibre/gui2/convert/look_and_feel.py @@ -70,7 +70,7 @@ class LookAndFeelWidget(Widget, Ui_Form): val = str(g.text()).strip() val = [x.strip() for x in val.split(',' if ',' in val else ' ') if x.strip()] return ', '.join(val) or None - if g is self.opt_transform_css_rules: + if g is self.opt_transform_css_rules or g is self.opt_transform_html_rules: return json.dumps(g.rules) return Widget.get_value_handler(self, g) @@ -95,7 +95,7 @@ class LookAndFeelWidget(Widget, Ui_Form): w.setChecked(False) self.filter_css_others.setText(', '.join(items)) return True - if g is self.opt_transform_css_rules: + if g is self.opt_transform_css_rules or g is self.opt_transform_html_rules: g.rules = json.loads(val) if val else [] return True @@ -106,7 +106,7 @@ class LookAndFeelWidget(Widget, Ui_Form): w.stateChanged.connect(slot) self.filter_css_others.textChanged.connect(slot) return - if gui_obj is self.opt_transform_css_rules: + if gui_obj is self.opt_transform_css_rules or gui_obj is self.opt_transform_html_rules: gui_obj.changed.connect(slot) return raise NotImplementedError() diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui index 48a49cb04d..6962ad6439 100644 --- a/src/calibre/gui2/convert/look_and_feel.ui +++ b/src/calibre/gui2/convert/look_and_feel.ui @@ -10,7 +10,7 @@ 619 - + 0 @@ -503,6 +503,16 @@ + + + Transform &HTML + + + + + + + @@ -524,6 +534,12 @@
calibre/gui2/css_transform_rules.h
1 + + HtmlRulesWidget + QWidget +
calibre/gui2/html_transform_rules.h
+ 1 +
diff --git a/src/calibre/gui2/css_transform_rules.py b/src/calibre/gui2/css_transform_rules.py index 2073cc0b18..d769441662 100644 --- a/src/calibre/gui2/css_transform_rules.py +++ b/src/calibre/gui2/css_transform_rules.py @@ -317,15 +317,17 @@ class RulesWidget(QWidget, SaveLoadMixin): # {{{ 'There are no rules to export'), show=True) path = choose_save_file(self, self.DIR_SAVE_NAME, _('Choose file for exported rules'), initial_filename=self.INITIAL_FILE_NAME) if path: - raw = self.export_func(rules) + f = self.__class__.export_func + raw = f(rules) with open(path, 'wb') as f: f.write(raw) def import_rules(self): paths = choose_files(self, self.DIR_SAVE_NAME, _('Choose file to import rules from'), select_only_single_file=True) if paths: + func = self.__class__.import_func with open(paths[0], 'rb') as f: - rules = self.import_func(f.read()) + rules = func(f.read()) self.rules_widget.rules = list(rules) + list(self.rules_widget.rules) self.changed.emit() diff --git a/src/calibre/gui2/html_transform_rules.py b/src/calibre/gui2/html_transform_rules.py index 89e5b1a9d8..61a41eff12 100644 --- a/src/calibre/gui2/html_transform_rules.py +++ b/src/calibre/gui2/html_transform_rules.py @@ -357,7 +357,7 @@ class RulesDialog(RulesDialogBase): # {{{ # }}} -class RulesWidget(RulesWidgetBase): # {{{ +class HtmlRulesWidget(RulesWidgetBase): # {{{ PREFS_NAME = 'html-transform-rules' INITIAL_FILE_NAME = 'html-rules.txt' DIR_SAVE_NAME = 'export-html-transform-rules'