diff --git a/setup/hosting.py b/setup/hosting.py index 394d32702e..33bb3bff04 100644 --- a/setup/hosting.py +++ b/setup/hosting.py @@ -26,7 +26,7 @@ def login_to_google(username, password): br.form['Email'] = username br.form['Passwd'] = password raw = br.submit().read() - if re.search(br'.*?Account Settings', raw) is None: + if re.search(br'(?i).*?Account Settings', raw) is None: x = re.search(br'(?is).*?', raw) if x is not None: print ('Title of post login page: %s'%x.group()) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 559402ca1c..877b15c24a 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -156,9 +156,10 @@ def add_pipeline_options(parser, plumber): 'SEARCH AND REPLACE' : ( _('Modify the document text and structure using user defined patterns.'), [ - 'sr1_search', 'sr1_replace', - 'sr2_search', 'sr2_replace', - 'sr3_search', 'sr3_replace', + 'sr1_search', 'sr1_replace', + 'sr2_search', 'sr2_replace', + 'sr3_search', 'sr3_replace', + 'search_replace', ] ), @@ -211,6 +212,7 @@ def add_pipeline_options(parser, plumber): if rec.level < rec.HIGH: option_recommendation_to_cli_option(add_option, rec) + def option_parser(): parser = OptionParser(usage=USAGE) parser.add_option('--list-recipes', default=False, action='store_true', @@ -271,6 +273,34 @@ def abspath(x): return x return os.path.abspath(os.path.expanduser(x)) +def read_sr_patterns(path, log=None): + import json, re, codecs + pats = [] + with codecs.open(path, 'r', 'utf-8') as f: + pat = None + for line in f.readlines(): + if line.endswith(u'\n'): + line = line[:-1] + + if pat is None: + if not line.strip(): + continue + try: + re.compile(line) + except: + msg = u'Invalid regular expression: %r from file: %r'%( + line, path) + if log is not None: + log.error(msg) + raise SystemExit(1) + else: + raise ValueError(msg) + pat = line + else: + pats.append((pat, line)) + pat = None + return json.dumps(pats) + def main(args=sys.argv): log = Log() parser, plumber = create_option_parser(args, log) @@ -278,6 +308,9 @@ def main(args=sys.argv): for x in ('read_metadata_from_opf', 'cover'): if getattr(opts, x, None) is not None: setattr(opts, x, abspath(getattr(opts, x))) + if opts.search_replace: + opts.search_replace = read_sr_patterns(opts.search_replace, log) + recommendations = [(n.dest, getattr(opts, n.dest), OptionRecommendation.HIGH) \ for n in parser.options_iter() diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 8bb4fdd891..dbba38e987 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -626,6 +626,14 @@ OptionRecommendation(name='sr3_search', OptionRecommendation(name='sr3_replace', recommended_value='', level=OptionRecommendation.LOW, help=_('Replacement to replace the text found with sr3-search.')), + +OptionRecommendation(name='search_replace', + recommended_value=None, level=OptionRecommendation.LOW, help=_( + 'Path to a file containing search and replace regular expressions. ' + 'The file must contain alternating lines of regular expression ' + 'followed by replacement pattern (which can be an empty line). ' + 'The regular expression must be in the python regex syntax and ' + 'the file must be UTF-8 encoded.')), ] # }}} diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 617de18555..c526cba8a9 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import functools, re +import functools, re, json from calibre import entity_to_unicode, as_unicode @@ -515,18 +515,31 @@ class HTMLPreProcessor(object): if not getattr(self.extra_opts, 'keep_ligatures', False): html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) - for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]: + # Function for processing search and replace + def do_search_replace(search_pattern, replace_txt): + try: + search_re = re.compile(search_pattern) + if not replace_txt: + replace_txt = '' + rules.insert(0, (search_re, replace_txt)) + except Exception as e: + self.log.error('Failed to parse %r regexp because %s' % + (search, as_unicode(e))) + + # search / replace using the sr?_search / sr?_replace options + for i in range(1, 4): + search, replace = 'sr%d_search'%i, 'sr%d_replace'%i search_pattern = getattr(self.extra_opts, search, '') + replace_txt = getattr(self.extra_opts, replace, '') if search_pattern: - try: - search_re = re.compile(search_pattern) - replace_txt = getattr(self.extra_opts, replace, '') - if not replace_txt: - replace_txt = '' - rules.insert(0, (search_re, replace_txt)) - except Exception as e: - self.log.error('Failed to parse %r regexp because %s' % - (search, as_unicode(e))) + do_search_replace(search_pattern, replace_txt) + + # multi-search / replace using the search_replace option + search_replace = getattr(self.extra_opts, 'search_replace', None) + if search_replace: + search_replace = json.loads(search_replace) + for search_pattern, replace_txt in search_replace: + do_search_replace(search_pattern, replace_txt) end_rules = [] # delete soft hyphens - moved here so it's executed after header/footer removal diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index d334816985..b27a97d899 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -641,6 +641,26 @@ def choose_files(window, name, title, return fd.get_files() return None +def choose_save_file(window, name, title, filters=[], all_files=True): + ''' + Ask user to choose a file to save to. Can be a non-existent file. + :param filters: list of allowable extensions. Each element of the list + must be a 2-tuple with first element a string describing + the type of files to be filtered and second element a list + of extensions. + :param all_files: If True add All files to filters. + ''' + mode = QFileDialog.AnyFile + fd = FileDialog(title=title, name=name, filters=filters, + parent=window, add_all_files_filter=all_files, mode=mode) + fd.setParent(None) + ans = None + if fd.accepted: + ans = fd.get_files() + if ans: + ans = ans[0] + return ans + def choose_images(window, name, title, select_only_single_file=True): mode = QFileDialog.ExistingFile if select_only_single_file else QFileDialog.ExistingFiles fd = FileDialog(title=title, name=name, diff --git a/src/calibre/gui2/convert/__init__.py b/src/calibre/gui2/convert/__init__.py index 222d410522..84667aa740 100644 --- a/src/calibre/gui2/convert/__init__.py +++ b/src/calibre/gui2/convert/__init__.py @@ -233,19 +233,22 @@ class Widget(QWidget): pass def setup_help(self, help_provider): - w = textwrap.TextWrapper(80) for name in self._options: g = getattr(self, 'opt_'+name, None) if g is None: continue help = help_provider(name) if not help: continue + if self.setup_help_handler(g, help): continue g._help = help - htext = u'
%s
'%prepare_string_for_xml( - '\n'.join(w.wrap(help))) - g.setToolTip(htext) - g.setWhatsThis(htext) - g.__class__.enterEvent = lambda obj, event: self.set_help(getattr(obj, '_help', obj.toolTip())) + self.setup_widget_help(g) + + def setup_widget_help(self, g): + w = textwrap.TextWrapper(80) + htext = u'
%s
'%prepare_string_for_xml('\n'.join(w.wrap(g._help))) + g.setToolTip(htext) + g.setWhatsThis(htext) + g.__class__.enterEvent = lambda obj, event: self.set_help(getattr(obj, '_help', obj.toolTip())) def set_value_handler(self, g, val): @@ -261,6 +264,9 @@ class Widget(QWidget): def post_get_value(self, g): pass + def setup_help_handler(self, g, help): + return False + def break_cycles(self): self.db = None diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py index bbbef7e741..bd699f08a1 100644 --- a/src/calibre/gui2/convert/regex_builder.py +++ b/src/calibre/gui2/convert/regex_builder.py @@ -129,6 +129,8 @@ class RegexBuilder(QDialog, Ui_RegexBuilder): d.exec_() if d.result() == QDialog.Accepted: format = d.format() + else: + return False if not format: error_dialog(self, _('No formats available'), @@ -226,6 +228,9 @@ class RegexEdit(QWidget, Ui_Edit): def set_doc(self, doc): self.doc_cache = doc + def set_regex(self, regex): + self.edit.setText(regex) + def break_cycles(self): self.db = self.doc_cache = None diff --git a/src/calibre/gui2/convert/search_and_replace.py b/src/calibre/gui2/convert/search_and_replace.py index b9e2644008..704b851eda 100644 --- a/src/calibre/gui2/convert/search_and_replace.py +++ b/src/calibre/gui2/convert/search_and_replace.py @@ -1,14 +1,18 @@ # -*- coding: utf-8 -*- __license__ = 'GPL 3' -__copyright__ = '2011, John Schember ' +__copyright__ = '2011, John Schember , 2012 Eli Algranti ' __docformat__ = 'restructuredtext en' -import re +import re, codecs, json + +from PyQt4.Qt import Qt, QTableWidgetItem from calibre.gui2.convert.search_and_replace_ui import Ui_Form from calibre.gui2.convert import Widget -from calibre.gui2 import error_dialog +from calibre.gui2 import (error_dialog, question_dialog, choose_files, + choose_save_file) +from calibre import as_unicode class SearchAndReplaceWidget(Widget, Ui_Form): @@ -19,26 +23,113 @@ class SearchAndReplaceWidget(Widget, Ui_Form): STRIP_TEXT_FIELDS = False def __init__(self, parent, get_option, get_help, db=None, book_id=None): + # Dummy attributes to fool the Widget() option handler code. We handle + # everything in our *handler methods. + for i in range(1, 4): + x = 'sr%d_'%i + for y in ('search', 'replace'): + z = x + y + setattr(self, 'opt_'+z, z) + self.opt_search_replace = 'search_replace' + Widget.__init__(self, parent, - ['sr1_search', 'sr1_replace', + ['search_replace', + 'sr1_search', 'sr1_replace', 'sr2_search', 'sr2_replace', 'sr3_search', 'sr3_replace'] ) self.db, self.book_id = db, book_id - self.initialize_options(get_option, get_help, db, book_id) - self.opt_sr1_search.set_msg(_('&Search Regular Expression')) - self.opt_sr1_search.set_book_id(book_id) - self.opt_sr1_search.set_db(db) - self.opt_sr2_search.set_msg(_('&Search Regular Expression')) - self.opt_sr2_search.set_book_id(book_id) - self.opt_sr2_search.set_db(db) - self.opt_sr3_search.set_msg(_('&Search Regular Expression')) - self.opt_sr3_search.set_book_id(book_id) - self.opt_sr3_search.set_db(db) - self.opt_sr1_search.doc_update.connect(self.update_doc) - self.opt_sr2_search.doc_update.connect(self.update_doc) - self.opt_sr3_search.doc_update.connect(self.update_doc) + self.sr_search.set_msg(_('&Search Regular Expression')) + self.sr_search.set_book_id(book_id) + self.sr_search.set_db(db) + + self.sr_search.doc_update.connect(self.update_doc) + + proto = QTableWidgetItem() + proto.setFlags(Qt.ItemFlags(Qt.ItemIsSelectable + Qt.ItemIsEnabled)) + self.search_replace.setItemPrototype(proto) + self.search_replace.setColumnCount(2) + self.search_replace.setColumnWidth(0, 300) + self.search_replace.setColumnWidth(1, 300) + self.search_replace.setHorizontalHeaderLabels([ + _('Search Regular Expression'), _('Replacement Text')]) + + self.sr_add.clicked.connect(self.sr_add_clicked) + self.sr_change.clicked.connect(self.sr_change_clicked) + self.sr_remove.clicked.connect(self.sr_remove_clicked) + self.sr_load.clicked.connect(self.sr_load_clicked) + self.sr_save.clicked.connect(self.sr_save_clicked) + self.search_replace.currentCellChanged.connect(self.sr_currentCellChanged) + + self.initialize_options(get_option, get_help, db, book_id) + + def sr_add_clicked(self): + if self.sr_search.regex: + row = self.sr_add_row(self.sr_search.regex, self.sr_replace.text()) + self.search_replace.setCurrentCell(row, 0) + + def sr_add_row(self, search, replace): + row = self.search_replace.rowCount() + self.search_replace.setRowCount(row + 1) + newItem = self.search_replace.itemPrototype().clone() + newItem.setText(search) + self.search_replace.setItem(row,0, newItem) + newItem = self.search_replace.itemPrototype().clone() + newItem.setText(replace) + self.search_replace.setItem(row,1, newItem) + return row + + def sr_change_clicked(self): + row = self.search_replace.currentRow() + if row >= 0: + self.search_replace.item(row, 0).setText(self.sr_search.regex) + self.search_replace.item(row, 1).setText(self.sr_replace.text()) + self.search_replace.setCurrentCell(row, 0) + + def sr_remove_clicked(self): + row = self.search_replace.currentRow() + if row >= 0: + self.search_replace.removeRow(row) + self.search_replace.setCurrentCell(row-1, 0) + + def sr_load_clicked(self): + files = choose_files(self, 'sr_saved_patterns', + _('Load Calibre Search-Replace definitions file'), + filters=[ + (_('Calibre Search-Replace definitions file'), ['csr']) + ], select_only_single_file=True) + if files: + from calibre.ebooks.conversion.cli import read_sr_patterns + try: + self.set_value(self.opt_search_replace, + read_sr_patterns(files[0])) + except Exception as e: + error_dialog(self, _('Failed to read'), + _('Failed to load patterns from %s, click Show details' + ' to learn more.')%files[0], det_msg=as_unicode(e), + show=True) + + def sr_save_clicked(self): + filename = choose_save_file(self, 'sr_saved_patterns', + _('Save Calibre Search-Replace definitions file'), + filters=[ + (_('Calibre Search-Replace definitions file'), ['csr']) + ]) + if filename: + with codecs.open(filename, 'w', 'utf-8') as f: + for search, replace in self.get_definitions(): + f.write(search + u'\n' + replace + u'\n\n') + + def sr_currentCellChanged(self, row, column, previousRow, previousColumn) : + if row >= 0: + self.sr_change.setEnabled(True) + self.sr_remove.setEnabled(True) + self.sr_search.set_regex(self.search_replace.item(row, 0).text()) + self.sr_replace.setText(self.search_replace.item(row, 1).text()) + else: + self.sr_change.setEnabled(False) + self.sr_remove.setEnabled(False) def break_cycles(self): Widget.break_cycles(self) @@ -49,29 +140,121 @@ class SearchAndReplaceWidget(Widget, Ui_Form): except: pass - d(self.opt_sr1_search) - d(self.opt_sr2_search) - d(self.opt_sr3_search) + d(self.sr_search) - self.opt_sr1_search.break_cycles() - self.opt_sr2_search.break_cycles() - self.opt_sr3_search.break_cycles() + self.sr_search.break_cycles() def update_doc(self, doc): - self.opt_sr1_search.set_doc(doc) - self.opt_sr2_search.set_doc(doc) - self.opt_sr3_search.set_doc(doc) + self.sr_search.set_doc(doc) def pre_commit_check(self): - for x in ('sr1_search', 'sr2_search', 'sr3_search'): - x = getattr(self, 'opt_'+x) + definitions = self.get_definitions() + + # Verify the search/replace in the edit widgets has been + # included to the list of search/replace definitions + + edit_search = self.sr_search.regex + + if edit_search: + edit_replace = unicode(self.sr_replace.text()) + found = False + for search, replace in definitions: + if search == edit_search and replace == edit_replace: + found = True + break + if not found and not question_dialog(self, + _('Unused Search & Replace definition'), + _('The search / replace definition being edited ' + ' has not been added to the list of definitions. ' + 'Do you wish to continue with the conversion ' + '(the definition will not be used)?')): + return False + + # Verify all search expressions are valid + for search, replace in definitions: try: - pat = unicode(x.regex) - re.compile(pat) + re.compile(search) except Exception as err: error_dialog(self, _('Invalid regular expression'), _('Invalid regular expression: %s')%err, show=True) return False + return True + # Options handling + + def connect_gui_obj_handler(self, g, slot): + if g is self.opt_search_replace: + self.search_replace.cellChanged.connect(slot) + + def get_value_handler(self, g): + if g is self.opt_search_replace: + return json.dumps(self.get_definitions()) + return None + + def get_definitions(self): + ans = [] + for row in xrange(0, self.search_replace.rowCount()): + colItems = [] + for col in xrange(0, self.search_replace.columnCount()): + colItems.append(unicode(self.search_replace.item(row, col).text())) + ans.append(colItems) + return ans + + def set_value_handler(self, g, val): + if g is not self.opt_search_replace: + return True + + try: + rowItems = json.loads(val) + if not isinstance(rowItems, list): + rowItems = [] + except: + rowItems = [] + + if len(rowItems) == 0: + self.search_replace.clearContents() + + self.search_replace.setRowCount(len(rowItems)) + + for row, colItems in enumerate(rowItems): + for col, cellValue in enumerate(colItems): + newItem = self.search_replace.itemPrototype().clone() + newItem.setText(cellValue) + self.search_replace.setItem(row,col, newItem) + return True + + def apply_recommendations(self, recs): + ''' + Handle the legacy sr* options that may have been previously saved. They + are applied only if the new search_replace option has not been set in + recs. + ''' + new_val = None + legacy = {} + for name, val in recs.items(): + if name == 'search_replace': + new_val = val + if name in getattr(recs, 'disabled_options', []): + self.search_replace.setDisabled(True) + elif name.startswith('sr'): + legacy[name] = val if val else '' + + if new_val is None and legacy: + for i in range(1, 4): + x = 'sr%d'%i + s, r = x+'_search', x+'_replace' + s, r = legacy.get(s, ''), legacy.get(r, '') + if s: + self.sr_add_row(s, r) + if new_val is not None: + self.set_value(self.opt_search_replace, new_val) + + def setup_help_handler(self, g, help): + if g is self.opt_search_replace: + self.search_replace._help = _( + 'The list of search/replace definitions that will be applied ' + 'to this conversion.') + self.setup_widget_help(self.search_replace) + return True diff --git a/src/calibre/gui2/convert/search_and_replace.ui b/src/calibre/gui2/convert/search_and_replace.ui index 03a74b5ebd..453de79053 100644 --- a/src/calibre/gui2/convert/search_and_replace.ui +++ b/src/calibre/gui2/convert/search_and_replace.ui @@ -6,7 +6,7 @@ 0 0 - 468 + 667 451 @@ -32,14 +32,14 @@ - First expression + Search/Replace Definition Edit QLayout::SetMinimumSize - + 0 @@ -60,12 +60,12 @@ &Replacement Text - opt_sr1_replace + sr_replace - + 0 @@ -78,117 +78,70 @@ - - - - 0 - 0 - + + + -1 - - Second Expression + + 0 - - - QLayout::SetMinimumSize - - - - - - 0 - 0 - - - - - - - - - 0 - 0 - - - - &Replacement Text - - - opt_sr2_replace - - - - - - - - 0 - 0 - - - - - - + + + + Add + + + + + + + false + + + Change + + + + + + + false + + + Remove + + + + + + + Load + + + + + + + Save + + + + - - - - 0 - 0 - + + + QAbstractItemView::SingleSelection - - Third expression + + QAbstractItemView::SelectRows - - - QLayout::SetMinimumSize - - - - - - 0 - 0 - - - - - - - - - 0 - 0 - - - - &Replacement Text - - - opt_sr3_replace - - - - - - - - 0 - 0 - - - - - - <p>Search and replace uses <i>regular expressions</i>. See the <a href="http://manual.calibre-ebook.com/regexp.html">regular expressions tutorial</a> to get started with regular expressions. Also clicking the wizard buttons below will allow you to test your regular expression against the current input document. + <p>Search and replace uses <i>regular expressions</i>. See the <a href="http://manual.calibre-ebook.com/regexp.html">regular expressions tutorial</a> to get started with regular expressions. Also clicking the wizard button below will allow you to test your regular expression against the current input document. When you are happy with an expression, click the Add button to add it to the list of expressions. true