diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 0ceed67bf9..b35a163044 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -454,18 +454,13 @@ class HTMLPreProcessor(object): html = _ligpat.sub(lambda m:LIGATURES[m.group()], html) for search, replace in [['sr3_search', 'sr3_replace'], ['sr2_search', 'sr2_replace'], ['sr1_search', 'sr1_replace']]: - replace_pattern = '' - if getattr(self.extra_opts, search, None): - search_pattern = getattr(self.extra_opts, search, None) - if getattr(self.extra_opts, replace, None): - replace_pattern = getattr(self.extra_opts, replace, None) + search_pattern = getattr(self.extra_opts, search, '') + if search_pattern: try: - rules.insert(0, (re.compile(search_pattern), replace_pattern)) - except: - import traceback - print 'Failed to parse sr3-search regexp' - traceback.print_exc() - + search_re = re.compile(search_pattern) + rules.insert(0, (search_re, getattr(self.extra_opts, replace, ''))) + except Exception as e: + self.log.error('Failed to parse %s regexp because %s' % (search, e)) end_rules = [] # delete soft hyphens - moved here so it's executed after header/footer removal diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 4a118d423c..15522d25e6 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -35,12 +35,12 @@ class HeuristicProcessor(object): title = match.group('title') if not title: self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + unicode(self.html_preprocess_sections) + + self.log.debug("marked " + unicode(self.html_preprocess_sections) + " chapters. - " + unicode(chap)) return '

'+chap+'

\n' else: self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + unicode(self.html_preprocess_sections) + + self.log.debug("marked " + unicode(self.html_preprocess_sections) + " chapters & titles. - " + unicode(chap) + ", " + unicode(title)) return '

'+chap+'

\n

'+title+'

\n' @@ -48,7 +48,7 @@ class HeuristicProcessor(object): chap = match.group('section') styles = match.group('styles') self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log("marked " + unicode(self.html_preprocess_sections) + + self.log.debug("marked " + unicode(self.html_preprocess_sections) + " section markers based on punctuation. - " + unicode(chap)) return '<'+styles+' style="page-break-before:always">'+chap @@ -91,7 +91,7 @@ class HeuristicProcessor(object): line_end = line_end_ere.findall(raw) tot_htm_ends = len(htm_end) tot_ln_fds = len(line_end) - self.log("There are " + unicode(tot_ln_fds) + " total Line feeds, and " + + self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " + unicode(tot_htm_ends) + " marked up endings") if percent > 1: @@ -100,7 +100,7 @@ class HeuristicProcessor(object): percent = 0 min_lns = tot_ln_fds * percent - self.log("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup") + self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup") if min_lns > tot_htm_ends: return True @@ -171,7 +171,7 @@ class HeuristicProcessor(object): #print "minimum chapters required are: "+str(self.min_chapters) heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) - self.log("found " + unicode(self.html_preprocess_sections) + " pre-existing headings") + self.log.debug("found " + unicode(self.html_preprocess_sections) + " pre-existing headings") # Build the Regular Expressions in pieces init_lookahead = "(?=<(p|div))" @@ -221,7 +221,7 @@ class HeuristicProcessor(object): break full_chapter_line = chapter_line_open+chapter_header_open+chapter_type+chapter_header_close+chapter_line_close n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) - self.log("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message) + self.log.debug("Marked " + unicode(self.html_preprocess_sections) + " headings, " + log_message) if lookahead_ignorecase: chapter_marker = init_lookahead+full_chapter_line+blank_lines+n_lookahead_open+n_lookahead+n_lookahead_close+opt_title_open+title_line_open+title_header_open+default_title+title_header_close+title_line_close+opt_title_close chapdetect = re.compile(r'%s' % chapter_marker, re.IGNORECASE) @@ -244,7 +244,7 @@ class HeuristicProcessor(object): words_per_chptr = wordcount if words_per_chptr > 0 and self.html_preprocess_sections > 0: words_per_chptr = wordcount / self.html_preprocess_sections - self.log("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") + self.log.debug("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") return html def punctuation_unwrap(self, length, content, format): @@ -291,7 +291,7 @@ class HeuristicProcessor(object): def markup_pre(self, html): pre = re.compile(r'
', re.IGNORECASE)
         if len(pre.findall(html)) >= 1:
-            self.log("Running Text Processing")
+            self.log.debug("Running Text Processing")
             outerhtml = re.compile(r'.*?(?<=
)(?P.*)(?=
).*', re.IGNORECASE|re.DOTALL) html = outerhtml.sub(self.txt_process, html) else: @@ -311,7 +311,7 @@ class HeuristicProcessor(object): txtindent = re.compile(ur'[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE) html = txtindent.sub(self.insert_indent, html) if self.found_indents > 1: - self.log("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles") + self.log.debug("replaced "+unicode(self.found_indents)+ " nbsp indents with inline styles") return html def cleanup_markup(self, html): @@ -351,7 +351,7 @@ class HeuristicProcessor(object): blanklines = self.blankreg.findall(html) lines = self.linereg.findall(html) if len(lines) > 1: - self.log("There are " + unicode(len(blanklines)) + " blank lines. " + + self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " + unicode(float(len(blanklines)) / float(len(lines))) + " percent blank") if float(len(blanklines)) / float(len(lines)) > 0.40: @@ -367,18 +367,18 @@ class HeuristicProcessor(object): def __call__(self, html): - self.log("********* Heuristic processing HTML *********") + self.log.debug("********* Heuristic processing HTML *********") # Count the words in the document to estimate how many chapters to look for and whether # other types of processing are attempted try: self.totalwords = self.get_word_count(html) except: - self.log("Can't get wordcount") + self.log.warn("Can't get wordcount") print "found "+unicode(self.totalwords)+" words in the flow" if self.totalwords < 50: - self.log("flow is too short, not running heuristics") + self.log.warn("flow is too short, not running heuristics") return html # Arrange line feeds and

tags so the line_length and no_markup functions work correctly @@ -391,7 +391,7 @@ class HeuristicProcessor(object): #
 tags), check and  mark up line endings if required before proceeding
             # fix indents must run after this step
             if self.no_markup(html, 0.1):
-                self.log("not enough paragraph markers, adding now")
+                self.log.debug("not enough paragraph markers, adding now")
                 # markup using text processing
                 html = self.markup_pre(html)
 
@@ -421,7 +421,7 @@ class HeuristicProcessor(object):
         # If more than 40% of the lines are empty paragraphs and the user has enabled delete
         # blank paragraphs then delete blank lines to clean up spacing
         if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
-            self.log("deleting blank lines")
+            self.log.debug("deleting blank lines")
             html = self.multi_blank.sub('\n

', html) html = self.blankreg.sub('', html) @@ -435,18 +435,18 @@ class HeuristicProcessor(object): # more of the lines break in the same region of the document then unwrapping is required docanalysis = DocAnalysis(format, html) hardbreaks = docanalysis.line_histogram(.50) - self.log("Hard line breaks check returned "+unicode(hardbreaks)) + self.log.debug("Hard line breaks check returned "+unicode(hardbreaks)) # Calculate Length unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) - self.log("Median line length is " + unicode(length) + ", calculated with " + format + " format") + self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format") ###### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor if hardbreaks or unwrap_factor < 0.4: - self.log("Unwrapping required, unwrapping Lines") + self.log.debug("Unwrapping required, unwrapping Lines") # Dehyphenate with line length limiters dehyphenator = Dehyphenator() html = dehyphenator(html,'html', length) @@ -457,14 +457,14 @@ class HeuristicProcessor(object): if getattr(self.extra_opts, 'dehyphenate', False): # dehyphenate in cleanup mode to fix anything previous conversions/editing missed - self.log("Fixing hyphenated content") + self.log.debug("Fixing hyphenated content") dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log) html = dehyphenator(html,'html_cleanup', length) html = dehyphenator(html, 'individual_words', length) # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): - self.log("Looking for more split points based on punctuation," + self.log.debug("Looking for more split points based on punctuation," " currently have " + unicode(self.html_preprocess_sections)) chapdetect3 = re.compile(r'<(?P(p|div)[^>]*)>\s*(?P
(]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*()?(){0,2}\s*()?\s*(){0,2}\s*()?\s*)', re.IGNORECASE) html = chapdetect3.sub(self.chapter_break, html) diff --git a/src/calibre/gui2/convert/pdb_output.py b/src/calibre/gui2/convert/pdb_output.py index ec6b7abb08..bf1d5048e2 100644 --- a/src/calibre/gui2/convert/pdb_output.py +++ b/src/calibre/gui2/convert/pdb_output.py @@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en' from calibre.gui2.convert.pdb_output_ui import Ui_Form from calibre.gui2.convert import Widget -from calibre.ebooks.pdb import FORMAT_WRITERS -from calibre.gui2.widgets import BasicComboModel format_model = None @@ -21,17 +19,8 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, ['format', 'inline_toc', 'pdb_output_encoding']) self.db, self.book_id = db, book_id + + for x in get_option('format').option.choices: + self.opt_format.addItem(x) + self.initialize_options(get_option, get_help, db, book_id) - - default = self.opt_format.currentText() - - global format_model - if format_model is None: - format_model = BasicComboModel(FORMAT_WRITERS.keys()) - self.format_model = format_model - self.opt_format.setModel(self.format_model) - - default_index = self.opt_format.findText(default) - format_index = self.opt_format.findText('doc') - self.opt_format.setCurrentIndex(default_index if default_index != -1 else format_index if format_index != -1 else 0) - diff --git a/src/calibre/gui2/convert/pdf_output.py b/src/calibre/gui2/convert/pdf_output.py index 5d6a595079..1c526939c2 100644 --- a/src/calibre/gui2/convert/pdf_output.py +++ b/src/calibre/gui2/convert/pdf_output.py @@ -6,8 +6,6 @@ __docformat__ = 'restructuredtext en' from calibre.gui2.convert.pdf_output_ui import Ui_Form from calibre.gui2.convert import Widget -from calibre.ebooks.pdf.pageoptions import PAPER_SIZES, ORIENTATIONS -from calibre.gui2.widgets import BasicComboModel paper_size_model = None orientation_model = None @@ -23,28 +21,11 @@ class PluginWidget(Widget, Ui_Form): Widget.__init__(self, parent, ['paper_size', 'orientation', 'preserve_cover_aspect_ratio']) self.db, self.book_id = db, book_id + + for x in get_option('paper_size').option.choices: + self.opt_paper_size.addItem(x) + for x in get_option('orientation').option.choices: + self.opt_orientation.addItem(x) + self.initialize_options(get_option, get_help, db, book_id) - - default_paper_size = self.opt_paper_size.currentText() - default_orientation = self.opt_orientation.currentText() - - global paper_size_model - if paper_size_model is None: - paper_size_model = BasicComboModel(PAPER_SIZES.keys()) - self.paper_size_model = paper_size_model - self.opt_paper_size.setModel(self.paper_size_model) - - default_paper_size_index = self.opt_paper_size.findText(default_paper_size) - letter_index = self.opt_paper_size.findText('letter') - self.opt_paper_size.setCurrentIndex(default_paper_size_index if default_paper_size_index != -1 else letter_index if letter_index != -1 else 0) - - global orientation_model - if orientation_model is None: - orientation_model = BasicComboModel(ORIENTATIONS.keys()) - self.orientation_model = orientation_model - self.opt_orientation.setModel(self.orientation_model) - - default_orientation_index = self.opt_orientation.findText(default_orientation) - orientation_index = self.opt_orientation.findText('portrait') - self.opt_orientation.setCurrentIndex(default_orientation_index if default_orientation_index != -1 else orientation_index if orientation_index != -1 else 0) - + \ No newline at end of file diff --git a/src/calibre/gui2/convert/txt_output.py b/src/calibre/gui2/convert/txt_output.py index 9a228bd4cf..a16dd68014 100644 --- a/src/calibre/gui2/convert/txt_output.py +++ b/src/calibre/gui2/convert/txt_output.py @@ -4,10 +4,10 @@ __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' +from PyQt4.Qt import Qt + from calibre.gui2.convert.txt_output_ui import Ui_Form from calibre.gui2.convert import Widget -from calibre.ebooks.txt.newlines import TxtNewlines -from calibre.gui2.widgets import BasicComboModel newline_model = None @@ -23,17 +23,27 @@ class PluginWidget(Widget, Ui_Form): ['newline', 'max_line_length', 'force_max_line_length', 'inline_toc', 'markdown_format', 'keep_links', 'keep_image_references', 'txt_output_encoding']) - self.db, self.book_id = db, book_id + self.db, self.book_id = db, book_id + for x in get_option('newline').option.choices: + self.opt_newline.addItem(x) self.initialize_options(get_option, get_help, db, book_id) - default = self.opt_newline.currentText() + self.opt_markdown_format.stateChanged.connect(self.enable_markdown_format) + self.enable_markdown_format(self.opt_markdown_format.checkState()) - global newline_model - if newline_model is None: - newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys()) - self.newline_model = newline_model - self.opt_newline.setModel(self.newline_model) - - default_index = self.opt_newline.findText(default) - system_index = self.opt_newline.findText('system') - self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0) + def break_cycles(self): + Widget.break_cycles(self) + + try: + self.opt_markdown_format.stateChanged.disconnect() + except: + pass + + def enable_markdown_format(self, state): + if state == Qt.Checked: + state = True + else: + state = False + self.opt_keep_links.setEnabled(state) + self.opt_keep_image_references.setEnabled(state) + \ No newline at end of file diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py index f2ff783a76..28c5de4322 100644 --- a/src/calibre/gui2/widgets.py +++ b/src/calibre/gui2/widgets.py @@ -311,32 +311,6 @@ class FontFamilyModel(QAbstractListModel): def index_of(self, family): return self.families.index(family.strip()) -class BasicComboModel(QAbstractListModel): - - def __init__(self, items, *args): - QAbstractListModel.__init__(self, *args) - self.items = [i for i in items] - self.items.sort() - - def rowCount(self, *args): - return len(self.items) - - def data(self, index, role): - try: - item = self.items[index.row()] - except: - traceback.print_exc() - return NONE - if role == Qt.DisplayRole: - return QVariant(item) - if role == Qt.FontRole: - return QVariant(QFont(item)) - return NONE - - def index_of(self, item): - return self.items.index(item.strip()) - - class BasicListItem(QListWidgetItem): def __init__(self, text, user_data=None):