sync heuristics GUI

This commit is contained in:
ldolse 2011-01-15 10:48:36 +08:00
commit e99ab61dd0
11 changed files with 391 additions and 153 deletions

View File

@ -131,13 +131,19 @@ def add_pipeline_options(parser, plumber):
),
'HEURISTICS' : (
_('Modify the document text and strucutre using common patterns.'),
_('Modify the document text and structure using common patterns.'),
[
'enable_heuristics', 'markup_chapter_headings',
'italicize_common_cases', 'fix_indents',
'html_unwrap_factor', 'unwrap_lines',
'delete_blank_paragraphs', 'format_scene_breaks',
'dehyphenate',
]
),
'SEARCH AND REPLACE' : (
_('Modify the document text and structure using user defined patterns.'),
[
'sr1_search', 'sr1_replace',
'sr2_search', 'sr2_replace',
'sr3_search', 'sr3_replace',
@ -177,8 +183,8 @@ def add_pipeline_options(parser, plumber):
}
group_order = ['', 'LOOK AND FEEL', 'HEURISTICS',
'STRUCTURE DETECTION', 'TABLE OF CONTENTS',
'METADATA', 'DEBUG']
'SEARCH AND REPLACE' 'STRUCTURE DETECTION',
'TABLE OF CONTENTS', 'METADATA', 'DEBUG']
for group in group_order:
desc, options = groups[group]

View File

@ -500,7 +500,7 @@ OptionRecommendation(name='italicize_common_cases',
'italics and italicize them.')),
OptionRecommendation(name='fix_indents',
recommended_value=True, level=OptionRecommendation.LOW,
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Turn indentation created from multiple   entities '
'into CSS indents.')),
@ -517,7 +517,7 @@ OptionRecommendation(name='unwrap_lines',
help=_('Unwrap lines using punctuation and other formatting clues.')),
OptionRecommendation(name='delete_blank_paragraphs',
recommended_value=True, level=OptionRecommendation.LOW,
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Remove empty paragraphs from the document when they exist between '
'every other paragraph')),
@ -528,7 +528,7 @@ OptionRecommendation(name='format_scene_breaks',
'horizontal rules.')),
OptionRecommendation(name='dehyphenate',
recommended_value=True, level=OptionRecommendation.LOW,
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Analyses hyphenated words throughout the document. The '
'document itself is used as a dictionary to determine whether hyphens '
'should be retained or removed.')),

View File

@ -11,6 +11,8 @@ from calibre.gui2.convert.single import Config, sort_formats_by_preference, \
from calibre.customize.ui import available_output_formats
from calibre.gui2 import ResizableDialog
from calibre.gui2.convert.look_and_feel import LookAndFeelWidget
from calibre.gui2.convert.heuristics import HeuristicsWidget
from calibre.gui2.convert.search_and_replace import SearchAndReplaceWidget
from calibre.gui2.convert.page_setup import PageSetupWidget
from calibre.gui2.convert.structure_detection import StructureDetectionWidget
from calibre.gui2.convert.toc import TOCWidget
@ -69,6 +71,8 @@ class BulkConfig(Config):
self.setWindowTitle(_('Bulk Convert'))
lf = widget_factory(LookAndFeelWidget)
hw = widget_factory(HeuristicsWidget)
sr = widget_factory(SearchAndReplaceWidget)
ps = widget_factory(PageSetupWidget)
sd = widget_factory(StructureDetectionWidget)
toc = widget_factory(TOCWidget)
@ -90,7 +94,7 @@ class BulkConfig(Config):
if not c: break
self.stack.removeWidget(c)
widgets = [lf, ps, sd, toc]
widgets = [lf, hw, sr, ps, sd, toc]
if output_widget is not None:
widgets.append(output_widget)
for w in widgets:

View File

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import Qt
from calibre.gui2.convert.heuristics_ui import Ui_Form
from calibre.gui2.convert import Widget
class HeuristicsWidget(Widget, Ui_Form):
TITLE = _('Heuristics')
HELP = _('Modify the document text and structure using common patterns.')
COMMIT_NAME = 'heuristics'
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['enable_heuristics', 'markup_chapter_headings',
'italicize_common_cases', 'fix_indents',
'html_unwrap_factor', 'unwrap_lines',
'delete_blank_paragraphs', 'format_scene_breaks',
'dehyphenate']
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
self.enable_heuristics(self.opt_enable_heuristics.checkState())
def break_cycles(self):
Widget.break_cycles(self)
self.opt_enable_heuristics.stateChanged.disconnect()
self.opt_unwrap_lines.stateChanged.disconnect()
def set_value_handler(self, g, val):
if val is None and g is self.opt_html_unwrap_factor:
g.setValue(0.0)
return True
def enable_heuristics(self, state):
if state == Qt.Checked:
state = True
else:
state = False
self.opt_markup_chapter_headings.setEnabled(state)
self.opt_italicize_common_cases.setEnabled(state)
self.opt_fix_indents.setEnabled(state)
self.opt_delete_blank_paragraphs.setEnabled(state)
self.opt_format_scene_breaks.setEnabled(state)
self.opt_dehyphenate.setEnabled(state)
self.opt_unwrap_lines.setEnabled(state)
if state and self.opt_unwrap_lines.checkState() == Qt.Checked:
self.opt_html_unwrap_factor.setEnabled(True)
else:
self.opt_html_unwrap_factor.setEnabled(False)
def enable_unwrap(self, state):
if state == Qt.Checked:
state = True
else:
state = False
self.opt_html_unwrap_factor.setEnabled(state)

View File

@ -0,0 +1,184 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>657</width>
<height>479</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QCheckBox" name="opt_enable_heuristics">
<property name="text">
<string>&amp;Preprocess input file to possibly improve structure detection</string>
</property>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Heuristics</string>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="0" colspan="2">
<widget class="QCheckBox" name="opt_unwrap_lines">
<property name="text">
<string>Unwrap lines</string>
</property>
</widget>
</item>
<item row="1" column="0">
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>131</width>
<height>22</height>
</size>
</property>
</spacer>
</item>
<item row="1" column="1">
<widget class="QLabel" name="huf_label">
<property name="text">
<string>Line &amp;un-wrap factor during preprocess:</string>
</property>
<property name="buddy">
<cstring>opt_html_unwrap_factor</cstring>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
<property name="toolTip">
<string/>
</property>
<property name="maximum">
<double>1.000000000000000</double>
</property>
<property name="singleStep">
<double>0.050000000000000</double>
</property>
<property name="value">
<double>0.400000000000000</double>
</property>
</widget>
</item>
<item row="2" column="0" colspan="3">
<widget class="QCheckBox" name="opt_markup_chapter_headings">
<property name="text">
<string>Detect and markup unformatted chapter headings and sub headings</string>
</property>
</widget>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_delete_blank_paragraphs">
<property name="text">
<string>Delete blank lines between paragraphs</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_format_scene_breaks">
<property name="text">
<string>Ensure scene breaks are consistently formatted</string>
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<widget class="QCheckBox" name="opt_dehyphenate">
<property name="text">
<string>Remove unnecessary hyphens</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<widget class="QCheckBox" name="opt_italicize_common_cases">
<property name="text">
<string>Italicize common words and patterns</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="2">
<widget class="QCheckBox" name="opt_fix_indents">
<property name="text">
<string>Replace entity indents with CSS indents</string>
</property>
</widget>
</item>
<item row="8" column="0">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>131</width>
<height>35</height>
</size>
</property>
</spacer>
</item>
<item row="1" column="3">
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<resources/>
<connections>
<connection>
<sender>opt_enable_heuristics</sender>
<signal>toggled(bool)</signal>
<receiver>opt_html_unwrap_factor</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>328</x>
<y>87</y>
</hint>
<hint type="destinationlabel">
<x>481</x>
<y>113</y>
</hint>
</hints>
</connection>
<connection>
<sender>opt_enable_heuristics</sender>
<signal>toggled(bool)</signal>
<receiver>huf_label</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>295</x>
<y>88</y>
</hint>
<hint type="destinationlabel">
<x>291</x>
<y>105</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
import re
from calibre.gui2.convert.search_and_replace_ui import Ui_Form
from calibre.gui2.convert import Widget
from calibre.gui2 import error_dialog
class SearchAndReplaceWidget(Widget, Ui_Form):
TITLE = _('Search and Replace')
HELP = _('Modify the document text and structure using user defined patterns.')
COMMIT_NAME = 'search_and_replace'
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
['sr1_search', 'sr1_replace',
'sr2_search', 'sr2_replace',
'sr3_search', 'sr3_replace']
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
self.opt_sr1_search.set_msg(_('Search regular expression 1:'))
self.opt_sr1_replace.set_msg(_('Replace regular expression 1:'))
self.opt_sr2_search.set_msg(_('Search regular expression 2:'))
self.opt_sr2_replace.set_msg(_('Replace regular expression 2:'))
self.opt_sr3_search.set_msg(_('Search regular expression 3:'))
self.opt_sr3_replace.set_msg(_('Replace regular expression 3:'))
def break_cycles(self):
Widget.break_cycles(self)
self.opt_sr1_search.break_cycles()
self.opt_sr1_replace.break_cycles()
self.opt_sr2_search.break_cycles()
self.opt_sr2_replace.break_cycles()
self.opt_sr3_search.break_cycles()
self.opt_sr3_replace.break_cycles()
def pre_commit_check(self):
for x in ('sr1-search', 'sr1-replace', 'sr2-search', 'sr2-replace', 'sr3-search', 'sr3-replace',):
x = getattr(self, 'opt_'+x)
try:
pat = unicode(x.regex)
re.compile(pat)
except Exception, err:
error_dialog(self, _('Invalid regular expression'),
_('Invalid regular expression: %s')%err).exec_()
return False

View File

@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>657</width>
<height>479</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="RegexEdit" name="opt_sr1_search" native="true"/>
</item>
<item>
<widget class="RegexEdit" name="opt_sr1_replace" native="true"/>
</item>
<item>
<widget class="RegexEdit" name="opt_sr2_search" native="true"/>
</item>
<item>
<widget class="RegexEdit" name="opt_sr2_replace" native="true"/>
</item>
<item>
<widget class="RegexEdit" name="opt_sr3_search" native="true"/>
</item>
<item>
<widget class="RegexEdit" name="opt_sr3_replace" native="true"/>
</item>
</layout>
</widget>
<customwidgets>
<customwidget>
<class>RegexEdit</class>
<extends>QWidget</extends>
<header>regex_builder.h</header>
<container>1</container>
</customwidget>
</customwidgets>
<resources/>
<connections/>
</ui>

View File

@ -16,6 +16,8 @@ from calibre.ebooks.conversion.config import GuiRecommendations, save_specifics,
from calibre.gui2.convert.single_ui import Ui_Dialog
from calibre.gui2.convert.metadata import MetadataWidget
from calibre.gui2.convert.look_and_feel import LookAndFeelWidget
from calibre.gui2.convert.heuristics import HeuristicsWidget
from calibre.gui2.convert.search_and_replace import SearchAndReplaceWidget
from calibre.gui2.convert.page_setup import PageSetupWidget
from calibre.gui2.convert.structure_detection import StructureDetectionWidget
from calibre.gui2.convert.toc import TOCWidget
@ -170,6 +172,8 @@ class Config(ResizableDialog, Ui_Dialog):
self.mw = widget_factory(MetadataWidget)
self.setWindowTitle(_('Convert')+ ' ' + unicode(self.mw.title.text()))
lf = widget_factory(LookAndFeelWidget)
hw = widget_factory(HeuristicsWidget)
sr = widget_factory(SearchAndReplaceWidget)
ps = widget_factory(PageSetupWidget)
sd = widget_factory(StructureDetectionWidget)
toc = widget_factory(TOCWidget)
@ -203,7 +207,7 @@ class Config(ResizableDialog, Ui_Dialog):
if not c: break
self.stack.removeWidget(c)
widgets = [self.mw, lf, ps, sd, toc]
widgets = [self.mw, lf, hw, sr, ps, sd, toc]
if input_widget is not None:
widgets.append(input_widget)
if output_widget is not None:

View File

@ -6,8 +6,6 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.gui2.convert.structure_detection_ui import Ui_Form
from calibre.gui2.convert import Widget
from calibre.gui2 import error_dialog
@ -24,12 +22,8 @@ class StructureDetectionWidget(Widget, Ui_Form):
Widget.__init__(self, parent,
['chapter', 'chapter_mark',
'remove_first_image',
'insert_metadata', 'page_breaks_before',
'preprocess_html', 'remove_header', 'header_regex',
'remove_footer', 'footer_regex','html_unwrap_factor']
'insert_metadata', 'page_breaks_before']
)
self.opt_html_unwrap_factor.setEnabled(False)
self.huf_label.setEnabled(False)
self.db, self.book_id = db, book_id
for x in ('pagebreak', 'rule', 'both', 'none'):
self.opt_chapter_mark.addItem(x)
@ -37,28 +31,11 @@ class StructureDetectionWidget(Widget, Ui_Form):
self.opt_chapter.set_msg(_('Detect chapters at (XPath expression):'))
self.opt_page_breaks_before.set_msg(_('Insert page breaks before '
'(XPath expression):'))
self.opt_header_regex.set_msg(_('Header regular expression:'))
self.opt_header_regex.set_book_id(book_id)
self.opt_header_regex.set_db(db)
self.opt_footer_regex.set_msg(_('Footer regular expression:'))
self.opt_footer_regex.set_book_id(book_id)
self.opt_footer_regex.set_db(db)
def break_cycles(self):
Widget.break_cycles(self)
self.opt_header_regex.break_cycles()
self.opt_footer_regex.break_cycles()
def pre_commit_check(self):
for x in ('header_regex', 'footer_regex'):
x = getattr(self, 'opt_'+x)
try:
pat = unicode(x.regex)
re.compile(pat)
except Exception, err:
error_dialog(self, _('Invalid regular expression'),
_('Invalid regular expression: %s')%err).exec_()
return False
for x in ('chapter', 'page_breaks_before'):
x = getattr(self, 'opt_'+x)
if not x.check():
@ -66,8 +43,3 @@ class StructureDetectionWidget(Widget, Ui_Form):
_('The XPath expression %s is invalid.')%x.text).exec_()
return False
return True
def set_value_handler(self, g, val):
if val is None and g is self.opt_html_unwrap_factor:
g.setValue(0.0)
return True

View File

@ -14,10 +14,10 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="1" colspan="2">
<item row="0" column="0" colspan="3">
<widget class="XPathEdit" name="opt_chapter" native="true"/>
</item>
<item row="1" column="0" colspan="2">
<item row="1" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>Chapter &amp;mark:</string>
@ -27,7 +27,7 @@
</property>
</widget>
</item>
<item row="1" column="2">
<item row="1" column="1" colspan="2">
<widget class="QComboBox" name="opt_chapter_mark">
<property name="minimumContentsLength">
<number>20</number>
@ -41,17 +41,17 @@
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_insert_metadata">
<property name="text">
<string>Insert &amp;metadata as page at start of book</string>
</property>
</widget>
</item>
<item row="11" column="0" colspan="3">
<item row="5" column="0" colspan="3">
<widget class="XPathEdit" name="opt_page_breaks_before" native="true"/>
</item>
<item row="12" column="0" colspan="3">
<item row="6" column="0" colspan="3">
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
@ -64,72 +64,6 @@
</property>
</spacer>
</item>
<item row="8" column="0" colspan="2">
<widget class="QCheckBox" name="opt_remove_footer">
<property name="text">
<string>Remove F&amp;ooter</string>
</property>
</widget>
</item>
<item row="6" column="0" colspan="2">
<widget class="QCheckBox" name="opt_remove_header">
<property name="text">
<string>Remove H&amp;eader</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="3">
<widget class="RegexEdit" name="opt_header_regex" native="true"/>
</item>
<item row="9" column="0" colspan="3">
<widget class="RegexEdit" name="opt_footer_regex" native="true"/>
</item>
<item row="4" column="1">
<widget class="QLabel" name="huf_label">
<property name="text">
<string>Line &amp;un-wrap factor during preprocess:</string>
</property>
<property name="buddy">
<cstring>opt_html_unwrap_factor</cstring>
</property>
</widget>
</item>
<item row="4" column="2">
<widget class="QDoubleSpinBox" name="opt_html_unwrap_factor">
<property name="toolTip">
<string/>
</property>
<property name="maximum">
<double>1.000000000000000</double>
</property>
<property name="singleStep">
<double>0.050000000000000</double>
</property>
<property name="value">
<double>0.400000000000000</double>
</property>
</widget>
</item>
<item row="4" column="0">
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_preprocess_html">
<property name="text">
<string>&amp;Preprocess input file to possibly improve structure detection</string>
</property>
</widget>
</item>
</layout>
</widget>
<customwidgets>
@ -139,46 +73,7 @@
<header>convert/xpath_wizard.h</header>
<container>1</container>
</customwidget>
<customwidget>
<class>RegexEdit</class>
<extends>QWidget</extends>
<header>regex_builder.h</header>
<container>1</container>
</customwidget>
</customwidgets>
<resources/>
<connections>
<connection>
<sender>opt_preprocess_html</sender>
<signal>toggled(bool)</signal>
<receiver>opt_html_unwrap_factor</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>328</x>
<y>87</y>
</hint>
<hint type="destinationlabel">
<x>481</x>
<y>113</y>
</hint>
</hints>
</connection>
<connection>
<sender>opt_preprocess_html</sender>
<signal>toggled(bool)</signal>
<receiver>huf_label</receiver>
<slot>setEnabled(bool)</slot>
<hints>
<hint type="sourcelabel">
<x>295</x>
<y>88</y>
</hint>
<hint type="destinationlabel">
<x>291</x>
<y>105</y>
</hint>
</hints>
</connection>
</connections>
<connections/>
</ui>

View File

@ -12,6 +12,8 @@ from calibre.ebooks.conversion.plumber import Plumber
from calibre.utils.logging import Log
from calibre.gui2.preferences.conversion_ui import Ui_Form
from calibre.gui2.convert.look_and_feel import LookAndFeelWidget
from calibre.gui2.convert.heuristics import HeuristicsWidget
from calibre.gui2.convert.search_and_replace import SearchAndReplaceWidget
from calibre.gui2.convert.page_setup import PageSetupWidget
from calibre.gui2.convert.structure_detection import StructureDetectionWidget
from calibre.gui2.convert.toc import TOCWidget
@ -82,7 +84,8 @@ class Base(ConfigWidgetBase, Ui_Form):
class CommonOptions(Base):
def load_conversion_widgets(self):
self.conversion_widgets = [LookAndFeelWidget, PageSetupWidget,
self.conversion_widgets = [LookAndFeelWidget, HeuristicsWidget,
SearchAndReplaceWidget, PageSetupWidget,
StructureDetectionWidget, TOCWidget]
class InputOptions(Base):