From 7213c1e4b61cb13ca40d01040461a08915be7573 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jun 2010 18:40:12 -0600 Subject: [PATCH] Regex builder: Convert entities so people don't use them in building their regexes. Fixes #5549 (Not removing header/footer) --- src/calibre/gui2/convert/regex_builder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/calibre/gui2/convert/regex_builder.py b/src/calibre/gui2/convert/regex_builder.py index 58e1d1ae45..6fa0fa5fe4 100644 --- a/src/calibre/gui2/convert/regex_builder.py +++ b/src/calibre/gui2/convert/regex_builder.py @@ -14,6 +14,7 @@ from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit from calibre.gui2 import error_dialog, choose_files from calibre.ebooks.oeb.iterator import EbookIterator +from calibre.ebooks.conversion.preprocess import convert_entities from calibre.gui2.dialogs.choose_format import ChooseFormatDialog class RegexBuilder(QDialog, Ui_RegexBuilder): @@ -87,8 +88,10 @@ class RegexBuilder(QDialog, Ui_RegexBuilder): self.iterator = EbookIterator(pathtoebook) self.iterator.__enter__(only_input_plugin=True) text = [u''] + ent_pat = re.compile(r'&(\S+?);') for path in self.iterator.spine: html = open(path, 'rb').read().decode('utf-8', 'replace') + html = ent_pat.sub(convert_entities, html) text.append(html) self.preview.setPlainText('\n---\n'.join(text))