mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion: Use the same regular expression engine as is used by the Edit Book tool. The new engine has much better support for unicode characters/character classes.
This commit is contained in:
parent
cb77fecb5c
commit
c6d46ceffa
@ -541,8 +541,9 @@ class HTMLPreProcessor(object):
|
|||||||
# Function for processing search and replace
|
# Function for processing search and replace
|
||||||
|
|
||||||
def do_search_replace(search_pattern, replace_txt):
|
def do_search_replace(search_pattern, replace_txt):
|
||||||
|
from calibre.ebooks.conversion.search_replace import compile_regular_expression
|
||||||
try:
|
try:
|
||||||
search_re = re.compile(search_pattern)
|
search_re = compile_regular_expression(search_pattern)
|
||||||
if not replace_txt:
|
if not replace_txt:
|
||||||
replace_txt = ''
|
replace_txt = ''
|
||||||
rules.insert(0, (search_re, replace_txt))
|
rules.insert(0, (search_re, replace_txt))
|
||||||
@ -617,7 +618,7 @@ class HTMLPreProcessor(object):
|
|||||||
for rule in rules + end_rules:
|
for rule in rules + end_rules:
|
||||||
try:
|
try:
|
||||||
html = rule[0].sub(rule[1], html)
|
html = rule[0].sub(rule[1], html)
|
||||||
except re.error as e:
|
except Exception as e:
|
||||||
if rule in user_sr_rules:
|
if rule in user_sr_rules:
|
||||||
self.log.error(
|
self.log.error(
|
||||||
'User supplied search & replace rule: %s -> %s '
|
'User supplied search & replace rule: %s -> %s '
|
||||||
@ -678,5 +679,3 @@ class HTMLPreProcessor(object):
|
|||||||
html = html.replace(char, asciichar)
|
html = html.replace(char, asciichar)
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
19
src/calibre/ebooks/conversion/search_replace.py
Normal file
19
src/calibre/ebooks/conversion/search_replace.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
import regex
|
||||||
|
|
||||||
|
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
|
||||||
|
|
||||||
|
regex_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
def compile_regular_expression(text, flags=REGEX_FLAGS):
|
||||||
|
key = flags, text
|
||||||
|
ans = regex_cache.get(key)
|
||||||
|
if ans is None:
|
||||||
|
ans = regex_cache[key] = regex.compile(text, flags=flags)
|
||||||
|
return regex.compile(text, flags=flags)
|
@ -4,7 +4,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, os
|
import os
|
||||||
|
|
||||||
from PyQt5.Qt import (QDialog, QWidget, QDialogButtonBox,
|
from PyQt5.Qt import (QDialog, QWidget, QDialogButtonBox,
|
||||||
QBrush, QTextCursor, QTextEdit, QByteArray, Qt, pyqtSignal)
|
QBrush, QTextCursor, QTextEdit, QByteArray, Qt, pyqtSignal)
|
||||||
@ -15,6 +15,7 @@ from calibre.gui2 import error_dialog, choose_files, gprefs
|
|||||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||||
from calibre.constants import iswindows
|
from calibre.constants import iswindows
|
||||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||||
|
from calibre.ebooks.conversion.search_replace import compile_regular_expression
|
||||||
from calibre.ptempfile import TemporaryFile
|
from calibre.ptempfile import TemporaryFile
|
||||||
|
|
||||||
|
|
||||||
@ -60,7 +61,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
|||||||
regex = unicode(self.regex.text())
|
regex = unicode(self.regex.text())
|
||||||
if regex:
|
if regex:
|
||||||
try:
|
try:
|
||||||
re.compile(regex)
|
compile_regular_expression(regex)
|
||||||
self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgba(0,255,0,20%); }')
|
self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgba(0,255,0,20%); }')
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
@ -87,7 +88,7 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
|||||||
extsel.cursor = cursor
|
extsel.cursor = cursor
|
||||||
extsel.format.setBackground(QBrush(Qt.yellow))
|
extsel.format.setBackground(QBrush(Qt.yellow))
|
||||||
try:
|
try:
|
||||||
for match in re.finditer(regex, text):
|
for match in compile_regular_expression(regex).finditer(text):
|
||||||
es = QTextEdit.ExtraSelection(extsel)
|
es = QTextEdit.ExtraSelection(extsel)
|
||||||
es.cursor.setPosition(match.start(), QTextCursor.MoveAnchor)
|
es.cursor.setPosition(match.start(), QTextCursor.MoveAnchor)
|
||||||
es.cursor.setPosition(match.end(), QTextCursor.KeepAnchor)
|
es.cursor.setPosition(match.end(), QTextCursor.KeepAnchor)
|
||||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>, 2012 Eli Algranti <idea00@hotmail.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>, 2012 Eli Algranti <idea00@hotmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, codecs, json
|
import codecs, json
|
||||||
|
|
||||||
from PyQt5.Qt import Qt, QTableWidgetItem
|
from PyQt5.Qt import Qt, QTableWidgetItem
|
||||||
|
|
||||||
@ -14,6 +14,7 @@ from calibre.gui2 import (error_dialog, question_dialog, choose_files,
|
|||||||
choose_save_file)
|
choose_save_file)
|
||||||
from calibre import as_unicode
|
from calibre import as_unicode
|
||||||
from calibre.utils.localization import localize_user_manual_link
|
from calibre.utils.localization import localize_user_manual_link
|
||||||
|
from calibre.ebooks.conversion.search_replace import compile_regular_expression
|
||||||
|
|
||||||
|
|
||||||
class SearchAndReplaceWidget(Widget, Ui_Form):
|
class SearchAndReplaceWidget(Widget, Ui_Form):
|
||||||
@ -209,7 +210,7 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
|||||||
# Verify all search expressions are valid
|
# Verify all search expressions are valid
|
||||||
for search, replace in definitions:
|
for search, replace in definitions:
|
||||||
try:
|
try:
|
||||||
re.compile(search)
|
compile_regular_expression(search)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
error_dialog(self, _('Invalid regular expression'),
|
error_dialog(self, _('Invalid regular expression'),
|
||||||
_('Invalid regular expression: %s')%err, show=True)
|
_('Invalid regular expression: %s')%err, show=True)
|
||||||
@ -300,4 +301,3 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
|
|||||||
'to this conversion.')
|
'to this conversion.')
|
||||||
self.setup_widget_help(self.search_replace)
|
self.setup_widget_help(self.search_replace)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -33,10 +33,9 @@ from calibre.gui2.tweak_book.function_replace import (
|
|||||||
from calibre.gui2.tweak_book.widgets import BusyCursor
|
from calibre.gui2.tweak_book.widgets import BusyCursor
|
||||||
from calibre.gui2.widgets2 import FlowLayout, HistoryComboBox
|
from calibre.gui2.widgets2 import FlowLayout, HistoryComboBox
|
||||||
from calibre.utils.icu import primary_contains
|
from calibre.utils.icu import primary_contains
|
||||||
|
from calibre.ebooks.conversion.search_replace import REGEX_FLAGS, compile_regular_expression
|
||||||
|
|
||||||
|
|
||||||
REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.MULTILINE | regex.UNICODE
|
|
||||||
|
|
||||||
# The search panel {{{
|
# The search panel {{{
|
||||||
|
|
||||||
|
|
||||||
@ -454,9 +453,6 @@ class SearchWidget(QWidget):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
regex_cache = {}
|
|
||||||
|
|
||||||
|
|
||||||
class SearchPanel(QWidget): # {{{
|
class SearchPanel(QWidget): # {{{
|
||||||
|
|
||||||
search_triggered = pyqtSignal(object)
|
search_triggered = pyqtSignal(object)
|
||||||
@ -1295,10 +1291,8 @@ def get_search_regex(state):
|
|||||||
flags |= regex.DOTALL
|
flags |= regex.DOTALL
|
||||||
if state['direction'] == 'up':
|
if state['direction'] == 'up':
|
||||||
flags |= regex.REVERSE
|
flags |= regex.REVERSE
|
||||||
ans = regex_cache.get((flags, raw), None)
|
|
||||||
if ans is None:
|
|
||||||
try:
|
try:
|
||||||
ans = regex_cache[(flags, raw)] = regex.compile(raw, flags=flags)
|
ans = compile_regular_expression(raw, flags=flags)
|
||||||
except regex.error as e:
|
except regex.error as e:
|
||||||
raise InvalidRegex(raw, e)
|
raise InvalidRegex(raw, e)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user