This commit is contained in:
Sengian 2011-01-18 23:24:21 +01:00
commit a827fcb5e1
6 changed files with 18 additions and 27 deletions

View File

@ -54,7 +54,7 @@ class HeuristicProcessor(object):
return '<'+styles+' style="page-break-before:always">'+chap
def analyze_title_matches(self, match):
chap = match.group('chap')
#chap = match.group('chap')
title = match.group('title')
if not title:
self.chapters_no_title = self.chapters_no_title + 1
@ -102,8 +102,7 @@ class HeuristicProcessor(object):
min_lns = tot_ln_fds * percent
#self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
if min_lns > tot_htm_ends:
return True
return min_lns > tot_htm_ends
def dump(self, raw, where):
import os
@ -136,7 +135,7 @@ class HeuristicProcessor(object):
'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
'Mlle.', 'Mons.', 'PS.', 'PPS.',
]
ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
@ -150,7 +149,7 @@ class HeuristicProcessor(object):
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
]
for word in ITALICIZE_WORDS:
html = html.replace(word, '<i>%s</i>' % word)
@ -242,7 +241,7 @@ class HeuristicProcessor(object):
lp_title = default_title
else:
lp_title = simple_title
if ignorecase:
arg_ignorecase = r'(?i)'
else:
@ -250,7 +249,7 @@ class HeuristicProcessor(object):
if title_req:
lp_opt_title_open = ''
lp_opt_title_close = ''
lp_opt_title_close = ''
else:
lp_opt_title_open = opt_title_open
lp_opt_title_close = opt_title_close
@ -399,7 +398,7 @@ class HeuristicProcessor(object):
if len(lines) > 1:
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40:
return True
else:
@ -460,7 +459,7 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'markup_chapter_headings', False):
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
if getattr(self.extra_opts, 'italicize_common_cases', False):
if getattr(self.extra_opts, 'italicize_common_cases', False):
html = self.markup_italicis(html)
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
@ -487,7 +486,7 @@ class HeuristicProcessor(object):
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor)
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
###### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False):
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor

View File

@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.conversion.utils import HeuristicProcessor
class LITInput(InputFormatPlugin):

View File

@ -3,7 +3,6 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin):

View File

@ -53,6 +53,7 @@ class TXTInput(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
self.log = log
log.debug('Reading text from file...')
txt = stream.read()
@ -106,7 +107,7 @@ class TXTInput(InputFormatPlugin):
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
# Dehyphenate
dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
dehyphenator = Dehyphenator(options.verbose, log=self.log)
txt = dehyphenator(txt,'txt', length)
# We don't check for block because the processor assumes block.

View File

@ -25,21 +25,21 @@ class HeuristicsWidget(Widget, Ui_Form):
)
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
self.enable_heuristics(self.opt_enable_heuristics.checkState())
def break_cycles(self):
Widget.break_cycles(self)
try:
self.opt_enable_heuristics.stateChanged.disconnect()
self.opt_unwrap_lines.stateChanged.disconnect()
except:
pass
def set_value_handler(self, g, val):
if val is None and g is self.opt_html_unwrap_factor:
g.setValue(0.0)
@ -57,7 +57,7 @@ class HeuristicsWidget(Widget, Ui_Form):
self.opt_format_scene_breaks.setEnabled(state)
self.opt_dehyphenate.setEnabled(state)
self.opt_renumber_headings.setEnabled(state)
self.opt_unwrap_lines.setEnabled(state)
if state and self.opt_unwrap_lines.checkState() == Qt.Checked:
self.opt_html_unwrap_factor.setEnabled(True)

View File

@ -19,7 +19,7 @@ from calibre.devices.scanner import DeviceScanner
from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
warning_dialog, info_dialog, choose_dir
from calibre.ebooks.metadata import authors_to_string
from calibre import preferred_encoding, prints, force_unicode
from calibre import preferred_encoding, prints, force_unicode, as_unicode
from calibre.utils.filenames import ascii_filename
from calibre.devices.errors import FreeSpaceError
from calibre.devices.apple.driver import ITUNES_ASYNC
@ -68,13 +68,7 @@ class DeviceJob(BaseJob): # {{{
if self._aborted:
return
self.failed = True
try:
ex = unicode(err)
except:
try:
ex = str(err).decode(preferred_encoding, 'replace')
except:
ex = repr(err)
ex = as_unicode(err)
self._details = ex + '\n\n' + \
traceback.format_exc()
self.exception = err