From f85ba4e3261b4e64c84722087471824fbf12278e Mon Sep 17 00:00:00 2001 From: John Schember Date: Fri, 14 Jan 2011 21:15:34 -0500 Subject: [PATCH] Fix sr key. Change footer and header removal to generic search and replace options. --- src/calibre/ebooks/conversion/cli.py | 2 +- src/calibre/ebooks/conversion/plumber.py | 9 ++--- src/calibre/ebooks/conversion/preprocess.py | 42 +++++++++++---------- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 91f0f95348..db1ec0857d 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -183,7 +183,7 @@ def add_pipeline_options(parser, plumber): } group_order = ['', 'LOOK AND FEEL', 'HEURISTICS', - 'SEARCH AND REPLACE' 'STRUCTURE DETECTION', + 'SEARCH AND REPLACE', 'STRUCTURE DETECTION', 'TABLE OF CONTENTS', 'METADATA', 'DEBUG'] for group in group_order: diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 2e88baea4e..a12dbd48e1 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -540,8 +540,7 @@ OptionRecommendation(name='sr1_search', OptionRecommendation(name='sr1_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters (can be lambda expression) to ' - 'replace the text found with sr1-search.')), + help=_('Replace characters to replace the text found with sr1-search.')), OptionRecommendation(name='sr2_search', recommended_value='', level=OptionRecommendation.LOW, @@ -550,8 +549,7 @@ OptionRecommendation(name='sr2_search', OptionRecommendation(name='sr2_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters (can be lambda expression) to ' - 'replace the text found with sr2-search.')), + help=_('Replace characters to replace the text found with sr2-search.')), OptionRecommendation(name='sr3_search', recommended_value='', level=OptionRecommendation.LOW, @@ -560,8 +558,7 @@ OptionRecommendation(name='sr3_search', OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, - help=_('Replace characters (can be lambda expression) to ' - 'replace the text found with sr3-search.')), + help=_('Replace characters to replace the text found with sr3-search.')), ] # }}} diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 08a46cb8d9..35a311d58f 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -436,27 +436,29 @@ class HTMLPreProcessor(object): if not getattr(self.extra_opts, 'keep_ligatures', False): html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) + if getattr(self.extra_opts, 'sr3_search', None): + try: + rules.insert(0, (re.compile(self.extra_opts.sr3_search), self.extra_opts.sr3_replace)) + except: + import traceback + print 'Failed to parse sr3-search regexp' + traceback.print_exc() + if getattr(self.extra_opts, 'sr2_search', None): + try: + rules.insert(0, (re.compile(self.extra_opts.sr2_search), self.extra_opts.sr2_replace)) + except: + import traceback + print 'Failed to parse sr2-search regexp' + traceback.print_exc() + if getattr(self.extra_opts, 'sr1_search', None): + try: + rules.insert(0, (re.compile(self.extra_opts.sr1_search), self.extra_opts.sr1_replace)) + except: + import traceback + print 'Failed to parse sr1-search regexp' + traceback.print_exc() + end_rules = [] - if getattr(self.extra_opts, 'remove_header', None): - try: - rules.insert(0, - (re.compile(self.extra_opts.header_regex), lambda match : '') - ) - except: - import traceback - print 'Failed to parse remove_header regexp' - traceback.print_exc() - - if getattr(self.extra_opts, 'remove_footer', None): - try: - rules.insert(0, - (re.compile(self.extra_opts.footer_regex), lambda match : '') - ) - except: - import traceback - print 'Failed to parse remove_footer regexp' - traceback.print_exc() - # delete soft hyphens - moved here so it's executed after header/footer removal if is_pdftohtml: # unwrap/delete soft hyphens