mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
...
This commit is contained in:
parent
01584b0784
commit
3c45dba7cc
@ -175,7 +175,7 @@ class Dehyphenator(object):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, verbose=0, log=None):
|
def __init__(self, verbose=0, log=None):
|
||||||
self.log = default_log if log is None else log
|
self.log = log
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
# Add common suffixes to the regex below to increase the likelihood of a match -
|
# Add common suffixes to the regex below to increase the likelihood of a match -
|
||||||
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
# don't add suffixes which are also complete words, such as 'able' or 'sex'
|
||||||
|
@ -92,8 +92,8 @@ class HeuristicProcessor(object):
|
|||||||
line_end = line_end_ere.findall(raw)
|
line_end = line_end_ere.findall(raw)
|
||||||
tot_htm_ends = len(htm_end)
|
tot_htm_ends = len(htm_end)
|
||||||
tot_ln_fds = len(line_end)
|
tot_ln_fds = len(line_end)
|
||||||
self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
|
#self.log.debug("There are " + unicode(tot_ln_fds) + " total Line feeds, and " +
|
||||||
unicode(tot_htm_ends) + " marked up endings")
|
# unicode(tot_htm_ends) + " marked up endings")
|
||||||
|
|
||||||
if percent > 1:
|
if percent > 1:
|
||||||
percent = 1
|
percent = 1
|
||||||
@ -101,7 +101,7 @@ class HeuristicProcessor(object):
|
|||||||
percent = 0
|
percent = 0
|
||||||
|
|
||||||
min_lns = tot_ln_fds * percent
|
min_lns = tot_ln_fds * percent
|
||||||
self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
|
#self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
|
||||||
if min_lns > tot_htm_ends:
|
if min_lns > tot_htm_ends:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -118,11 +118,11 @@ class TXTInput(InputFormatPlugin):
|
|||||||
txt = separate_paragraphs_print_formatted(txt)
|
txt = separate_paragraphs_print_formatted(txt)
|
||||||
|
|
||||||
if options.paragraph_type == 'unformatted':
|
if options.paragraph_type == 'unformatted':
|
||||||
from calibre.ebooks.conversion.utils import PreProcessor
|
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
||||||
# get length
|
# get length
|
||||||
|
|
||||||
# unwrap lines based on punctuation
|
# unwrap lines based on punctuation
|
||||||
preprocessor = PreProcessor(options, log=getattr(self, 'log', None))
|
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
|
||||||
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
|
||||||
|
|
||||||
flow_size = getattr(options, 'flow_size', 0)
|
flow_size = getattr(options, 'flow_size', 0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user