mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion: Use the same regular expression engine as is used by the Edit Book tool. The new engine has much better support for unicode characters/character classes.
This commit is contained in:
parent
cb77fecb5c
commit
c6d46ceffa
@ -541,8 +541,9 @@ class HTMLPreProcessor(object):
|
||||
# Function for processing search and replace
|
||||
|
||||
def do_search_replace(search_pattern, replace_txt):
|
||||
from calibre.ebooks.conversion.search_replace import compile_regular_expression
|
||||
try:
|
||||
search_re = re.compile(search_pattern)
|
||||
search_re = compile_regular_expression(search_pattern)
|
||||
if not replace_txt:
|
||||
replace_txt = ''
|
||||
rules.insert(0, (search_re, replace_txt))
|
||||
@ -617,7 +618,7 @@ class HTMLPreProcessor(object):
|
||||
for rule in rules + end_rules:
|
||||
try:
|
||||
html = rule[0].sub(rule[1], html)
|
||||
except re.error as e:
|
||||
except Exception as e:
|
||||
if rule in user_sr_rules:
|
||||
self.log.error(
|
||||
'User supplied search & replace rule: %s -> %s '
|
||||
@ -678,5 +679,3 @@ class HTMLPreProcessor(object):
|
||||
html = html.replace(char, asciichar)
|
||||
|
||||
return html
|
||||
|
||||
|
||||
|
19
src/calibre/ebooks/conversion/search_replace.py
Normal file
19
src/calibre/ebooks/conversion/search_replace.py
Normal file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import regex
|
||||
|
||||
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
|
||||
|
||||
regex_cache = {}
|
||||
|
||||
|
||||
def compile_regular_expression(text, flags=REGEX_FLAGS):
|
||||
key = flags, text
|
||||
ans = regex_cache.get(key)
|
||||
if ans is None:
|
||||
ans = regex_cache[key] = regex.compile(text, flags=flags)
|
||||
return regex.compile(text, flags=flags)
|
@ -4,7 +4,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, os
|
||||
import os
|
||||
|
||||
from PyQt5.Qt import (QDialog, QWidget, QDialogButtonBox,
|
||||
QBrush, QTextCursor, QTextEdit, QByteArray, Qt, pyqtSignal)
|
||||
@ -15,6 +15,7 @@ from calibre.gui2 import error_dialog, choose_files, gprefs
|
||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||
from calibre.constants import iswindows
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.ebooks.conversion.search_replace import compile_regular_expression
|
||||
from calibre.ptempfile import TemporaryFile
|
||||
|
||||
|
||||
@ -60,7 +61,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
regex = unicode(self.regex.text())
|
||||
if regex:
|
||||
try:
|
||||
re.compile(regex)
|
||||
compile_regular_expression(regex)
|
||||
self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgba(0,255,0,20%); }')
|
||||
return True
|
||||
except:
|
||||
@ -87,7 +88,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||
extsel.cursor = cursor
|
||||
extsel.format.setBackground(QBrush(Qt.yellow))
|
||||
try:
|
||||
for match in re.finditer(regex, text):
|
||||
for match in compile_regular_expression(regex).finditer(text):
|
||||
es = QTextEdit.ExtraSelection(extsel)
|
||||
es.cursor.setPosition(match.start(), QTextCursor.MoveAnchor)
|
||||
es.cursor.setPosition(match.end(), QTextCursor.KeepAnchor)
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>, 2012 Eli Algranti <idea00@hotmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, codecs, json
|
||||
import codecs, json
|
||||
|
||||
from PyQt5.Qt import Qt, QTableWidgetItem
|
||||
|
||||
@ -14,6 +14,7 @@ from calibre.gui2 import (error_dialog, question_dialog, choose_files,
|
||||
choose_save_file)
|
||||
from calibre import as_unicode
|
||||
from calibre.utils.localization import localize_user_manual_link
|
||||
from calibre.ebooks.conversion.search_replace import compile_regular_expression
|
||||
|
||||
|
||||
class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||
@ -209,7 +210,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||
# Verify all search expressions are valid
|
||||
for search, replace in definitions:
|
||||
try:
|
||||
re.compile(search)
|
||||
compile_regular_expression(search)
|
||||
except Exception as err:
|
||||
error_dialog(self, _('Invalid regular expression'),
|
||||
_('Invalid regular expression: %s')%err, show=True)
|
||||
@ -300,4 +301,3 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||
'to this conversion.')
|
||||
self.setup_widget_help(self.search_replace)
|
||||
return True
|
||||
|
||||
|
@ -33,10 +33,9 @@ from calibre.gui2.tweak_book.function_replace import (
|
||||
from calibre.gui2.tweak_book.widgets import BusyCursor
|
||||
from calibre.gui2.widgets2 import FlowLayout, HistoryComboBox
|
||||
from calibre.utils.icu import primary_contains
|
||||
from calibre.ebooks.conversion.search_replace import REGEX_FLAGS, compile_regular_expression
|
||||
|
||||
|
||||
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
|
||||
|
||||
# The search panel {{{
|
||||
|
||||
|
||||
@ -454,9 +453,6 @@ class SearchWidget(QWidget):
|
||||
# }}}
|
||||
|
||||
|
||||
regex_cache = {}
|
||||
|
||||
|
||||
class SearchPanel(QWidget): # {{{
|
||||
|
||||
search_triggered = pyqtSignal(object)
|
||||
@ -1295,10 +1291,8 @@ def get_search_regex(state):
|
||||
flags |= regex.DOTALL
|
||||
if state['direction'] == 'up':
|
||||
flags |= regex.REVERSE
|
||||
ans = regex_cache.get((flags, raw), None)
|
||||
if ans is None:
|
||||
try:
|
||||
ans = regex_cache[(flags, raw)] = regex.compile(raw, flags=flags)
|
||||
ans = compile_regular_expression(raw, flags=flags)
|
||||
except regex.error as e:
|
||||
raise InvalidRegex(raw, e)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user