mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Regex builder: Convert entities so people don't use them in building their regexes. Fixes #5549 (Not removing header/footer)
This commit is contained in:
parent
900ff7204b
commit
7213c1e4b6
@ -14,6 +14,7 @@ from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
|
|||||||
from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
|
from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
|
||||||
from calibre.gui2 import error_dialog, choose_files
|
from calibre.gui2 import error_dialog, choose_files
|
||||||
from calibre.ebooks.oeb.iterator import EbookIterator
|
from calibre.ebooks.oeb.iterator import EbookIterator
|
||||||
|
from calibre.ebooks.conversion.preprocess import convert_entities
|
||||||
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
|
||||||
|
|
||||||
class RegexBuilder(QDialog, Ui_RegexBuilder):
|
class RegexBuilder(QDialog, Ui_RegexBuilder):
|
||||||
@ -87,8 +88,10 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
|
|||||||
self.iterator = EbookIterator(pathtoebook)
|
self.iterator = EbookIterator(pathtoebook)
|
||||||
self.iterator.__enter__(only_input_plugin=True)
|
self.iterator.__enter__(only_input_plugin=True)
|
||||||
text = [u'']
|
text = [u'']
|
||||||
|
ent_pat = re.compile(r'&(\S+?);')
|
||||||
for path in self.iterator.spine:
|
for path in self.iterator.spine:
|
||||||
html = open(path, 'rb').read().decode('utf-8', 'replace')
|
html = open(path, 'rb').read().decode('utf-8', 'replace')
|
||||||
|
html = ent_pat.sub(convert_entities, html)
|
||||||
text.append(html)
|
text.append(html)
|
||||||
self.preview.setPlainText('\n---\n'.join(text))
|
self.preview.setPlainText('\n---\n'.join(text))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user