This commit is contained in:
Kovid Goyal 2011-01-18 14:52:35 -07:00
parent f56b7453b1
commit 37ee06549e
3 changed files with 8 additions and 11 deletions

View File

@ -54,7 +54,7 @@ class HeuristicProcessor(object):
return '<'+styles+' style="page-break-before:always">'+chap return '<'+styles+' style="page-break-before:always">'+chap
def analyze_title_matches(self, match): def analyze_title_matches(self, match):
chap = match.group('chap') #chap = match.group('chap')
title = match.group('title') title = match.group('title')
if not title: if not title:
self.chapters_no_title = self.chapters_no_title + 1 self.chapters_no_title = self.chapters_no_title + 1
@ -136,7 +136,7 @@ class HeuristicProcessor(object):
'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.', 'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
'Mlle.', 'Mons.', 'PS.', 'PPS.', 'Mlle.', 'Mons.', 'PS.', 'PPS.',
] ]
ITALICIZE_STYLE_PATS = [ ITALICIZE_STYLE_PATS = [
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)', r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)', r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
@ -150,7 +150,7 @@ class HeuristicProcessor(object):
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)', r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)', r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
] ]
for word in ITALICIZE_WORDS: for word in ITALICIZE_WORDS:
html = html.replace(word, '<i>%s</i>' % word) html = html.replace(word, '<i>%s</i>' % word)
@ -242,7 +242,7 @@ class HeuristicProcessor(object):
lp_title = default_title lp_title = default_title
else: else:
lp_title = simple_title lp_title = simple_title
if ignorecase: if ignorecase:
arg_ignorecase = r'(?i)' arg_ignorecase = r'(?i)'
else: else:
@ -250,7 +250,7 @@ class HeuristicProcessor(object):
if title_req: if title_req:
lp_opt_title_open = '' lp_opt_title_open = ''
lp_opt_title_close = '' lp_opt_title_close = ''
else: else:
lp_opt_title_open = opt_title_open lp_opt_title_open = opt_title_open
lp_opt_title_close = opt_title_close lp_opt_title_close = opt_title_close
@ -399,7 +399,7 @@ class HeuristicProcessor(object):
if len(lines) > 1: if len(lines) > 1:
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " + self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank") unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40: if float(len(blanklines)) / float(len(lines)) > 0.40:
return True return True
else: else:
@ -460,7 +460,7 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'markup_chapter_headings', False): if getattr(self.extra_opts, 'markup_chapter_headings', False):
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs) html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
if getattr(self.extra_opts, 'italicize_common_cases', False): if getattr(self.extra_opts, 'italicize_common_cases', False):
html = self.markup_italicis(html) html = self.markup_italicis(html)
# If more than 40% of the lines are empty paragraphs and the user has enabled delete # If more than 40% of the lines are empty paragraphs and the user has enabled delete
@ -487,7 +487,7 @@ class HeuristicProcessor(object):
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor) length = docanalysis.line_length(unwrap_factor)
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format") self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
###### Unwrap lines ###### ###### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False): if getattr(self.extra_opts, 'unwrap_lines', False):
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor # only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor

View File

@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.conversion.utils import HeuristicProcessor
class LITInput(InputFormatPlugin): class LITInput(InputFormatPlugin):

View File

@ -3,7 +3,6 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from calibre.customize.conversion import InputFormatPlugin from calibre.customize.conversion import InputFormatPlugin
class MOBIInput(InputFormatPlugin): class MOBIInput(InputFormatPlugin):