Conversion: Use the same regular expression engine as is used by the Edit Book tool. The new engine has much better support for unicode characters/character classes.

This commit is contained in:
Kovid Goyal 2017-04-16 17:43:17 +05:30
parent cb77fecb5c
commit c6d46ceffa
5 changed files with 34 additions and 21 deletions

View File

@ -541,8 +541,9 @@ class HTMLPreProcessor(object):
# Function for processing search and replace
def do_search_replace(search_pattern, replace_txt):
from calibre.ebooks.conversion.search_replace import compile_regular_expression
try:
search_re = re.compile(search_pattern)
search_re = compile_regular_expression(search_pattern)
if not replace_txt:
replace_txt = ''
rules.insert(0, (search_re, replace_txt))
@ -617,7 +618,7 @@ class HTMLPreProcessor(object):
for rule in rules + end_rules:
try:
html = rule[0].sub(rule[1], html)
except re.error as e:
except Exception as e:
if rule in user_sr_rules:
self.log.error(
'User supplied search & replace rule: %s -> %s '
@ -678,5 +679,3 @@ class HTMLPreProcessor(object):
html = html.replace(char, asciichar)
return html

View File

@ -0,0 +1,19 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import regex
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
regex_cache = {}
def compile_regular_expression(text, flags=REGEX_FLAGS):
key = flags, text
ans = regex_cache.get(key)
if ans is None:
ans = regex_cache[key] = regex.compile(text, flags=flags)
return regex.compile(text, flags=flags)

View File

@ -4,7 +4,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re, os
import os
from PyQt5.Qt import (QDialog, QWidget, QDialogButtonBox,
QBrush, QTextCursor, QTextEdit, QByteArray, Qt, pyqtSignal)
@ -15,6 +15,7 @@ from calibre.gui2 import error_dialog, choose_files, gprefs
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.constants import iswindows
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
from calibre.ebooks.conversion.search_replace import compile_regular_expression
from calibre.ptempfile import TemporaryFile
@ -60,7 +61,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
regex = unicode(self.regex.text())
if regex:
try:
re.compile(regex)
compile_regular_expression(regex)
self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgba(0,255,0,20%); }')
return True
except:
@ -87,7 +88,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
extsel.cursor = cursor
extsel.format.setBackground(QBrush(Qt.yellow))
try:
for match in re.finditer(regex, text):
for match in compile_regular_expression(regex).finditer(text):
es = QTextEdit.ExtraSelection(extsel)
es.cursor.setPosition(match.start(), QTextCursor.MoveAnchor)
es.cursor.setPosition(match.end(), QTextCursor.KeepAnchor)

View File

@ -4,7 +4,7 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>, 2012 Eli Algranti <idea00@hotmail.com>'
__docformat__ = 'restructuredtext en'
import re, codecs, json
import codecs, json
from PyQt5.Qt import Qt, QTableWidgetItem
@ -14,6 +14,7 @@ from calibre.gui2 import (error_dialog, question_dialog, choose_files,
choose_save_file)
from calibre import as_unicode
from calibre.utils.localization import localize_user_manual_link
from calibre.ebooks.conversion.search_replace import compile_regular_expression
class SearchAndReplaceWidget(Widget, Ui_Form):
@ -209,7 +210,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
# Verify all search expressions are valid
for search, replace in definitions:
try:
re.compile(search)
compile_regular_expression(search)
except Exception as err:
error_dialog(self, _('Invalid regular expression'),
_('Invalid regular expression: %s')%err, show=True)
@ -300,4 +301,3 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
'to this conversion.')
self.setup_widget_help(self.search_replace)
return True

View File

@ -33,10 +33,9 @@ from calibre.gui2.tweak_book.function_replace import (
from calibre.gui2.tweak_book.widgets import BusyCursor
from calibre.gui2.widgets2 import FlowLayout, HistoryComboBox
from calibre.utils.icu import primary_contains
from calibre.ebooks.conversion.search_replace import REGEX_FLAGS, compile_regular_expression
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
# The search panel {{{
@ -454,9 +453,6 @@ class SearchWidget(QWidget):
# }}}
regex_cache = {}
class SearchPanel(QWidget): # {{{
search_triggered = pyqtSignal(object)
@ -1295,10 +1291,8 @@ def get_search_regex(state):
flags |= regex.DOTALL
if state['direction'] == 'up':
flags |= regex.REVERSE
ans = regex_cache.get((flags, raw), None)
if ans is None:
try:
ans = regex_cache[(flags, raw)] = regex.compile(raw, flags=flags)
ans = compile_regular_expression(raw, flags=flags)
except regex.error as e:
raise InvalidRegex(raw, e)