diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 4663eeccdf..aabb1b8bc4 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -54,7 +54,7 @@ class HeuristicProcessor(object): return '<'+styles+' style="page-break-before:always">'+chap def analyze_title_matches(self, match): - chap = match.group('chap') + #chap = match.group('chap') title = match.group('title') if not title: self.chapters_no_title = self.chapters_no_title + 1 @@ -102,8 +102,7 @@ class HeuristicProcessor(object): min_lns = tot_ln_fds * percent #self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup") - if min_lns > tot_htm_ends: - return True + return min_lns > tot_htm_ends def dump(self, raw, where): import os @@ -136,7 +135,7 @@ class HeuristicProcessor(object): 'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.', 'Mlle.', 'Mons.', 'PS.', 'PPS.', ] - + ITALICIZE_STYLE_PATS = [ r'(?msu)(?<=\s)_(?P\S[^_]{0,40}?\S)?_(?=\s)', r'(?msu)(?<=\s)/(?P\S[^/]{0,40}?\S)?/(?=\s)', @@ -150,7 +149,7 @@ class HeuristicProcessor(object): r'(?msu)(?<=\s)/:(?P\S[^:/]{0,40}?\S)?:/(?=\s)', r'(?msu)(?<=\s)\|:(?P\S[^:\|]{0,40}?\S)?:\|(?=\s)', ] - + for word in ITALICIZE_WORDS: html = html.replace(word, '%s' % word) @@ -242,7 +241,7 @@ class HeuristicProcessor(object): lp_title = default_title else: lp_title = simple_title - + if ignorecase: arg_ignorecase = r'(?i)' else: @@ -250,7 +249,7 @@ class HeuristicProcessor(object): if title_req: lp_opt_title_open = '' - lp_opt_title_close = '' + lp_opt_title_close = '' else: lp_opt_title_open = opt_title_open lp_opt_title_close = opt_title_close @@ -399,7 +398,7 @@ class HeuristicProcessor(object): if len(lines) > 1: self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " + unicode(float(len(blanklines)) / float(len(lines))) + " percent blank") - + if float(len(blanklines)) / float(len(lines)) > 0.40: return True else: @@ -460,7 +459,7 @@ class HeuristicProcessor(object): if getattr(self.extra_opts, 'markup_chapter_headings', False): html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs) - if getattr(self.extra_opts, 'italicize_common_cases', False): + if getattr(self.extra_opts, 'italicize_common_cases', False): html = self.markup_italicis(html) # If more than 40% of the lines are empty paragraphs and the user has enabled delete @@ -487,7 +486,7 @@ class HeuristicProcessor(object): unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format") - + ###### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index ff8955939e..ff901c3715 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.utils import HeuristicProcessor - class LITInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 8188027e01..4ce3618441 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,7 +3,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py index dd14de2d20..5b99b19e74 100644 --- a/src/calibre/ebooks/txt/input.py +++ b/src/calibre/ebooks/txt/input.py @@ -53,6 +53,7 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + self.log = log log.debug('Reading text from file...') txt = stream.read() @@ -106,7 +107,7 @@ class TXTInput(InputFormatPlugin): log.debug('Auto detected paragraph type as %s' % options.paragraph_type) # Dehyphenate - dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None)) + dehyphenator = Dehyphenator(options.verbose, log=self.log) txt = dehyphenator(txt,'txt', length) # We don't check for block because the processor assumes block. diff --git a/src/calibre/gui2/convert/heuristics.py b/src/calibre/gui2/convert/heuristics.py index 6739c199b7..0655d7400f 100644 --- a/src/calibre/gui2/convert/heuristics.py +++ b/src/calibre/gui2/convert/heuristics.py @@ -25,21 +25,21 @@ class HeuristicsWidget(Widget, Ui_Form): ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) - + self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics) self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap) - + self.enable_heuristics(self.opt_enable_heuristics.checkState()) def break_cycles(self): Widget.break_cycles(self) - + try: self.opt_enable_heuristics.stateChanged.disconnect() self.opt_unwrap_lines.stateChanged.disconnect() except: pass - + def set_value_handler(self, g, val): if val is None and g is self.opt_html_unwrap_factor: g.setValue(0.0) @@ -57,7 +57,7 @@ class HeuristicsWidget(Widget, Ui_Form): self.opt_format_scene_breaks.setEnabled(state) self.opt_dehyphenate.setEnabled(state) self.opt_renumber_headings.setEnabled(state) - + self.opt_unwrap_lines.setEnabled(state) if state and self.opt_unwrap_lines.checkState() == Qt.Checked: self.opt_html_unwrap_factor.setEnabled(True) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 734d8cd56c..28b5e178ac 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -19,7 +19,7 @@ from calibre.devices.scanner import DeviceScanner from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \ warning_dialog, info_dialog, choose_dir from calibre.ebooks.metadata import authors_to_string -from calibre import preferred_encoding, prints, force_unicode +from calibre import preferred_encoding, prints, force_unicode, as_unicode from calibre.utils.filenames import ascii_filename from calibre.devices.errors import FreeSpaceError from calibre.devices.apple.driver import ITUNES_ASYNC @@ -68,13 +68,7 @@ class DeviceJob(BaseJob): # {{{ if self._aborted: return self.failed = True - try: - ex = unicode(err) - except: - try: - ex = str(err).decode(preferred_encoding, 'replace') - except: - ex = repr(err) + ex = as_unicode(err) self._details = ex + '\n\n' + \ traceback.format_exc() self.exception = err