GUI: Regex builder for removing header and footer.

This commit is contained in:
John Schember 2009-09-05 16:48:32 -04:00
parent 1e9d9dc242
commit ec48f4029b
7 changed files with 247 additions and 33 deletions

View File

@ -77,6 +77,7 @@ class Widget(QWidget):
def get_value(self, g):
from calibre.gui2.convert.xpath_wizard import XPathEdit
from calibre.gui2.convert.regex_builder import RegexEdit
ret = self.get_value_handler(g)
if ret != 'this is a dummy return value, xcswx1avcx4x':
return ret
@ -94,12 +95,15 @@ class Widget(QWidget):
return bool(g.isChecked())
elif isinstance(g, XPathEdit):
return g.xpath if g.xpath else None
elif isinstance(g, RegexEdit):
return g.regex if g.regex else None
else:
raise Exception('Can\'t get value from %s'%type(g))
def set_value(self, g, val):
from calibre.gui2.convert.xpath_wizard import XPathEdit
from calibre.gui2.convert.regex_builder import RegexEdit
if self.set_value_handler(g, val):
return
if isinstance(g, (QSpinBox, QDoubleSpinBox)):
@ -116,7 +120,7 @@ class Widget(QWidget):
g.setCurrentIndex(idx)
elif isinstance(g, QCheckBox):
g.setCheckState(Qt.Checked if bool(val) else Qt.Unchecked)
elif isinstance(g, XPathEdit):
elif isinstance(g, (XPathEdit, RegexEdit)):
g.edit.setText(val if val else '')
else:
raise Exception('Can\'t set value %s in %s'%(repr(val),

View File

@ -0,0 +1,138 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re
from PyQt4.QtCore import SIGNAL, Qt
from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, QFileDialog, \
QBrush, QSyntaxHighlighter, QTextCharFormat
from calibre.gui2.convert.regex_builder_ui import Ui_RegexBuilder
from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
from calibre.gui2 import qstring_to_unicode
from calibre.gui2 import error_dialog
from calibre.ebooks.oeb.iterator import EbookIterator
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
class RegexHighlighter(QSyntaxHighlighter):
def __init__(self, *args):
QSyntaxHighlighter.__init__(self, *args)
self.regex = u''
def update_regex(self, regex):
self.regex = qstring_to_unicode(regex)
self.rehighlight()
def highlightBlock(self, text):
valid_regex = True
text = qstring_to_unicode(text)
format = QTextCharFormat()
format.setBackground(QBrush(Qt.yellow))
if self.regex:
try:
for mo in re.finditer(self.regex, text):
self.setFormat(mo.start(), mo.end() - mo.start(), format)
except:
valid_regex = False
self.emit(SIGNAL('regex_valid(PyQt_PyObject)'), valid_regex)
class RegexBuilder(QDialog, Ui_RegexBuilder):
def __init__(self, db, book_id, regex, *args):
QDialog.__init__(self, *args)
self.setupUi(self)
self.regex.setText(regex)
self.regex_valid(True)
self.highlighter = RegexHighlighter(self.preview.document())
if not db or not book_id:
self.button_box.addButton(QDialogButtonBox.Open)
else:
self.select_format(db, book_id)
self.connect(self.button_box, SIGNAL('clicked(QAbstractButton*)'), self.button_clicked)
self.connect(self.regex, SIGNAL('textChanged(QString)'), self.highlighter.update_regex)
self.connect(self.highlighter, SIGNAL('regex_valid(PyQt_PyObject)'), self.regex_valid)
def regex_valid(self, valid):
if valid:
self.regex.setStyleSheet('QLineEdit { color: black; background-color: white; }')
else:
self.regex.setStyleSheet('QLineEdit { color: black; background-color: rgb(255,0,0,20%); }')
def select_format(self, db, book_id):
format = None
formats = db.formats(book_id, index_is_id=True).upper().split(',')
if len(formats) == 1:
format = formats[0]
elif len(formats) > 1:
d = ChooseFormatDialog(self, _('Choose the format to view'), formats)
d.exec_()
if d.result() == QDialog.Accepted:
format = d.format()
if not format:
error_dialog(self, _('No formats available'), _('Cannot build regex using the GUI builder without a book.'))
QDialog.reject()
else:
self.open_book(db.format_abspath(book_id, format, index_is_id=True))
def open_book(self, pathtoebook):
self.iterator = EbookIterator(pathtoebook)
self.iterator.__enter__()
text = [u'']
for path in self.iterator.spine:
html = open(path, 'rb').read().decode(path.encoding, 'replace')
text.append(html)
self.preview.setPlainText('\n\n'.join(text))
def button_clicked(self, button):
if button == self.button_box.button(QDialogButtonBox.Open):
name = QFileDialog.getOpenFileName(self, _('Open book'), _('~'))
if name:
self.open_book(qstring_to_unicode(name))
if button == self.button_box.button(QDialogButtonBox.Ok):
self.accept()
class RegexEdit(QWidget, Ui_Edit):
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.setupUi(self)
self.book_id = None
self.db = None
self.connect(self.button, SIGNAL('clicked()'), self.builder)
def builder(self):
bld = RegexBuilder(self.db, self.book_id, self.edit.text(), self)
if bld.exec_() == bld.Accepted:
self.edit.setText(bld.regex.text())
def set_msg(self, msg):
self.msg.setText(msg)
def set_book_id(self, book_id):
self.book_id = book_id
def set_db(self, db):
self.db = db
@property
def text(self):
return unicode(self.edit.text())
@property
def regex(self):
return self.text
def check(self):
return True

View File

@ -0,0 +1,80 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>RegexBuilder</class>
<widget class="QDialog" name="RegexBuilder">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>662</width>
<height>505</height>
</rect>
</property>
<property name="windowTitle">
<string>Regex Builder</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="1" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Preview</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QPlainTextEdit" name="preview">
<property name="undoRedoEnabled">
<bool>false</bool>
</property>
<property name="readOnly">
<bool>true</bool>
</property>
<property name="textInteractionFlags">
<set>Qt::TextSelectableByMouse</set>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item row="2" column="1">
<widget class="QDialogButtonBox" name="button_box">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="standardButtons">
<set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Regex:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QLineEdit" name="regex"/>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>button_box</sender>
<signal>rejected()</signal>
<receiver>RegexBuilder</receiver>
<slot>reject()</slot>
<hints>
<hint type="sourcelabel">
<x>316</x>
<y>260</y>
</hint>
<hint type="destinationlabel">
<x>286</x>
<y>274</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -34,11 +34,18 @@ class StructureDetectionWidget(Widget, Ui_Form):
self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):'))
self.opt_page_breaks_before.set_msg(_('Insert page breaks before '
'(XPath expression):'))
self.opt_header_regex.set_msg(_('Header regular expression:'))
self.opt_header_regex.set_book_id(book_id)
self.opt_header_regex.set_db(db)
self.opt_footer_regex.set_msg(_('Footer regular expression:'))
self.opt_footer_regex.set_book_id(book_id)
self.opt_footer_regex.set_db(db)
def pre_commit_check(self):
for x in ('header_regex', 'footer_regex'):
x = getattr(self, 'opt_'+x)
try:
pat = unicode(x.text())
pat = unicode(x.regex)
re.compile(pat)
except Exception, err:
error_dialog(self, _('Invalid regular expression'),

View File

@ -28,8 +28,7 @@
</widget>
</item>
<item row="1" column="1">
<widget class="QComboBox" name="opt_chapter_mark">
</widget>
<widget class="QComboBox" name="opt_chapter_mark"/>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="opt_remove_first_image">
@ -45,27 +44,17 @@
</property>
</widget>
</item>
<item row="8" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>&amp;Footer regular expression:</string>
</property>
<property name="buddy">
<cstring>opt_footer_regex</cstring>
</property>
</widget>
</item>
<item row="10" column="0" colspan="2">
<item row="8" column="0" colspan="2">
<widget class="QCheckBox" name="opt_preprocess_html">
<property name="text">
<string>&amp;Preprocess input file to possibly improve structure detection</string>
</property>
</widget>
</item>
<item row="11" column="0" colspan="2">
<item row="9" column="0" colspan="2">
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
</item>
<item row="12" column="0">
<item row="10" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -78,17 +67,7 @@
</property>
</spacer>
</item>
<item row="5" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>&amp;Header regular expression:</string>
</property>
<property name="buddy">
<cstring>opt_header_regex</cstring>
</property>
</widget>
</item>
<item row="7" column="0">
<item row="6" column="0">
<widget class="QCheckBox" name="opt_remove_footer">
<property name="text">
<string>Remove F&amp;ooter</string>
@ -102,11 +81,11 @@
</property>
</widget>
</item>
<item row="9" column="0" colspan="2">
<widget class="QLineEdit" name="opt_footer_regex"/>
<item row="5" column="0" colspan="2">
<widget class="RegexEdit" name="opt_header_regex" native="true"/>
</item>
<item row="6" column="0" colspan="2">
<widget class="QLineEdit" name="opt_header_regex"/>
<item row="7" column="0" colspan="2">
<widget class="RegexEdit" name="opt_footer_regex" native="true"/>
</item>
</layout>
</widget>
@ -117,6 +96,12 @@
<header>convert/xpath_wizard.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>RegexEdit</class>
<extends>QWidget</extends>
<header>regex_builder.h</header>
<container>1</container>
</customwidget>
</customwidgets>
<resources/>
<connections/>

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QDialog, QWidget, SIGNAL, Qt, QDialogButtonBox, QVBoxLayout
from calibre.gui2.convert.xpath_wizard_ui import Ui_Form
from calibre.gui2.convert.xpath_edit_ui import Ui_Form as Ui_Edit
from calibre.gui2.convert.xexp_edit_ui import Ui_Form as Ui_Edit
class WizardWidget(QWidget, Ui_Form):