diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index dc6c0f8b52..3c52ec2d7b 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -315,6 +315,31 @@ OptionRecommendation(name='preprocess_html',
)
),
+OptionRecommendation(name='remove_header',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Use a regular expression to try and remove the header.'
+ )
+ ),
+
+OptionRecommendation(name='header_regex',
+ recommended_value='(?i)(?<=
)((\s*(()*
\s*)?\d+
\s*.*?\s*)|(\s*(()*
\s*)?.*?
\s*\d+))(?=
)',
+ level=OptionRecommendation.LOW,
+ help=_('The regular expression to use to remove the header.'
+ )
+ ),
+
+OptionRecommendation(name='remove_footer',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Use a regular expression to try and remove the footer.'
+ )
+ ),
+
+OptionRecommendation(name='footer_regex',
+ recommended_value='(?i)(?<=
)((\s*(()*
\s*)?\d+
\s*.*?\s*)|(\s*(()*
\s*)?.*?
\s*\d+))(?=
)',
+ level=OptionRecommendation.LOW,
+ help=_('The regular expression to use to remove the footer.'
+ )
+ ),
OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW,
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index f9788fdba8..69d6f1e511 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -185,17 +185,7 @@ class HTMLPreProcessor(object):
elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html):
- start_rules = []
end_rules = []
-
- if getattr(self.extra_opts, 'remove_header', None):
- start_rules.append(
- (re.compile(getattr(self.extra_opts, 'header_regex')), lambda match : '')
- )
- if getattr(self.extra_opts, 'remove_footer', None):
- start_rules.append(
- (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
- )
if getattr(self.extra_opts, 'unwrap_factor', None):
length = line_length(html, getattr(self.extra_opts, 'unwrap_factor'))
if length:
@@ -204,10 +194,21 @@ class HTMLPreProcessor(object):
(re.compile(r'(?<=.{%i}[a-z\.,;:)-IA])\s*(?P(i|b|u)>)?\s*()\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
)
- rules = start_rules + self.PDFTOHTML + end_rules
+ rules = self.PDFTOHTML + end_rules
else:
rules = []
- for rule in self.PREPROCESS + rules:
+
+ pre_rules = []
+ if getattr(self.extra_opts, 'remove_header', None):
+ pre_rules.append(
+ (re.compile(getattr(self.extra_opts, 'header_regex')), lambda match : '')
+ )
+ if getattr(self.extra_opts, 'remove_footer', None):
+ pre_rules.append(
+ (re.compile(getattr(self.extra_opts, 'footer_regex')), lambda match : '')
+ )
+
+ for rule in self.PREPROCESS + pre_rules + rules:
html = rule[0].sub(rule[1], html)
# Handle broken XHTML w/ SVG (ugh)
diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py
index e17d50869e..58abbd635c 100644
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@@ -24,16 +24,6 @@ class PDFInput(InputFormatPlugin):
help=_('Scale used to determine the length at which a line should '
'be unwrapped. Valid values are a decimal between 0 and 1. The '
'default is 0.5, this is the median line length.')),
- OptionRecommendation(name='remove_header', recommended_value=False,
- help=_('Use a regular expression to try and remove the header.')),
- OptionRecommendation(name='header_regex',
- recommended_value='(?i)(?<=
)((\s*(()*
\s*)?\d+
\s*.*?\s*)|(\s*(()*
\s*)?.*?
\s*\d+))(?=
)',
- help=_('The regular expression to use to remove the header.')),
- OptionRecommendation(name='remove_footer', recommended_value=False,
- help=_('Use a regular expression to try and remove the footer.')),
- OptionRecommendation(name='footer_regex',
- recommended_value='(?i)(?<=
)((\s*(()*
\s*)?\d+
\s*.*?\s*)|(\s*(()*
\s*)?.*?
\s*\d+))(?=
)',
- help=_('The regular expression to use to remove the footer.')),
])
def convert(self, stream, options, file_ext, log,
diff --git a/src/calibre/gui2/convert/pdf_input.py b/src/calibre/gui2/convert/pdf_input.py
index bfd658526c..e4a9541823 100644
--- a/src/calibre/gui2/convert/pdf_input.py
+++ b/src/calibre/gui2/convert/pdf_input.py
@@ -4,13 +4,8 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember '
__docformat__ = 'restructuredtext en'
-import re
-
-from PyQt4.Qt import SIGNAL
-
from calibre.gui2.convert.pdf_input_ui import Ui_Form
from calibre.gui2.convert import Widget
-from calibre.gui2 import qstring_to_unicode, error_dialog
class PluginWidget(Widget, Ui_Form):
@@ -19,31 +14,6 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'pdf_input',
- ['no_images', 'unwrap_factor', 'remove_header', 'header_regex',
- 'remove_footer', 'footer_regex'])
+ ['no_images', 'unwrap_factor'])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
-
- self.opt_header_regex.setEnabled(self.opt_remove_header.isChecked())
- self.opt_footer_regex.setEnabled(self.opt_remove_footer.isChecked())
-
- self.connect(self.opt_remove_header, SIGNAL('stateChanged(int)'), self.header_regex_state)
- self.connect(self.opt_remove_footer, SIGNAL('stateChanged(int)'), self.footer_regex_state)
-
- def header_regex_state(self, state):
- self.opt_header_regex.setEnabled(state)
-
- def footer_regex_state(self, state):
- self.opt_footer_regex.setEnabled(state)
-
- def pre_commit_check(self):
- for x in ('header_regex', 'footer_regex'):
- x = getattr(self, 'opt_'+x)
- try:
- pat = qstring_to_unicode(x.text())
- re.compile(pat)
- except Exception, err:
- error_dialog(self, _('Invalid regular expression'),
- _('Invalid regular expression: %s')%err).exec_()
- return False
- return True
diff --git a/src/calibre/gui2/convert/pdf_input.ui b/src/calibre/gui2/convert/pdf_input.ui
index d34c6d404b..40f480b15d 100644
--- a/src/calibre/gui2/convert/pdf_input.ui
+++ b/src/calibre/gui2/convert/pdf_input.ui
@@ -14,14 +14,14 @@
Form
- -
+
-
Line Un-Wrapping Factor:
- -
+
-
Qt::Vertical
@@ -34,7 +34,7 @@
- -
+
-
1.000000000000000
@@ -47,33 +47,13 @@
- -
+
-
No Images
- -
-
-
- Remove Header
-
-
-
- -
-
-
- Remove Footer
-
-
-
- -
-
-
- -
-
-
diff --git a/src/calibre/gui2/convert/structure_detection.py b/src/calibre/gui2/convert/structure_detection.py
index 66dff86aca..ee0a389478 100644
--- a/src/calibre/gui2/convert/structure_detection.py
+++ b/src/calibre/gui2/convert/structure_detection.py
@@ -6,10 +6,13 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+import re
+
+from PyQt4.Qt import SIGNAL
from calibre.gui2.convert.structure_detection_ui import Ui_Form
from calibre.gui2.convert import Widget
-from calibre.gui2 import error_dialog
+from calibre.gui2 import error_dialog, qstring_to_unicode
class StructureDetectionWidget(Widget, Ui_Form):
@@ -23,7 +26,8 @@ class StructureDetectionWidget(Widget, Ui_Form):
['chapter', 'chapter_mark',
'remove_first_image',
'insert_metadata', 'page_breaks_before',
- 'preprocess_html']
+ 'preprocess_html', 'remove_header', 'header_regex',
+ 'remove_footer', 'footer_regex']
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
@@ -31,8 +35,16 @@ class StructureDetectionWidget(Widget, Ui_Form):
self.opt_page_breaks_before.set_msg(_('Insert page breaks before '
'(XPath expression):'))
-
def pre_commit_check(self):
+ for x in ('header_regex', 'footer_regex'):
+ x = getattr(self, 'opt_'+x)
+ try:
+ pat = qstring_to_unicode(x.text())
+ re.compile(pat)
+ except Exception, err:
+ error_dialog(self, _('Invalid regular expression'),
+ _('Invalid regular expression: %s')%err).exec_()
+ return False
for x in ('chapter', 'page_breaks_before'):
x = getattr(self, 'opt_'+x)
if not x.check():
diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui
index 768b430c5a..eebc0f0d53 100644
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@@ -14,6 +14,9 @@
Form
+ -
+
+
-
@@ -62,20 +65,27 @@
- -
+
-
+
+
+ &Footer regular expression:
+
+
+ opt_footer_regex
+
+
+
+ -
&Preprocess input file to possibly improve structure detection
- -
+
-
- -
-
-
- -
+
-
Qt::Vertical
@@ -88,6 +98,36 @@
+ -
+
+
+ &Header regular expression:
+
+
+ opt_header_regex
+
+
+
+ -
+
+
+ Remove F&ooter
+
+
+
+ -
+
+
+ Remove H&eader
+
+
+
+ -
+
+
+ -
+
+