mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge
This commit is contained in:
commit
a827fcb5e1
@ -54,7 +54,7 @@ class HeuristicProcessor(object):
|
|||||||
return '<'+styles+' style="page-break-before:always">'+chap
|
return '<'+styles+' style="page-break-before:always">'+chap
|
||||||
|
|
||||||
def analyze_title_matches(self, match):
|
def analyze_title_matches(self, match):
|
||||||
chap = match.group('chap')
|
#chap = match.group('chap')
|
||||||
title = match.group('title')
|
title = match.group('title')
|
||||||
if not title:
|
if not title:
|
||||||
self.chapters_no_title = self.chapters_no_title + 1
|
self.chapters_no_title = self.chapters_no_title + 1
|
||||||
@ -102,8 +102,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
min_lns = tot_ln_fds * percent
|
min_lns = tot_ln_fds * percent
|
||||||
#self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
|
#self.log.debug("There must be fewer than " + unicode(min_lns) + " unmarked lines to add markup")
|
||||||
if min_lns > tot_htm_ends:
|
return min_lns > tot_htm_ends
|
||||||
return True
|
|
||||||
|
|
||||||
def dump(self, raw, where):
|
def dump(self, raw, where):
|
||||||
import os
|
import os
|
||||||
@ -136,7 +135,7 @@ class HeuristicProcessor(object):
|
|||||||
'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
|
'nota bene', 'Nota bene', 'Ste.', 'Mme.', 'Mdme.',
|
||||||
'Mlle.', 'Mons.', 'PS.', 'PPS.',
|
'Mlle.', 'Mons.', 'PS.', 'PPS.',
|
||||||
]
|
]
|
||||||
|
|
||||||
ITALICIZE_STYLE_PATS = [
|
ITALICIZE_STYLE_PATS = [
|
||||||
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
|
r'(?msu)(?<=\s)_(?P<words>\S[^_]{0,40}?\S)?_(?=\s)',
|
||||||
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
|
r'(?msu)(?<=\s)/(?P<words>\S[^/]{0,40}?\S)?/(?=\s)',
|
||||||
@ -150,7 +149,7 @@ class HeuristicProcessor(object):
|
|||||||
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
|
r'(?msu)(?<=\s)/:(?P<words>\S[^:/]{0,40}?\S)?:/(?=\s)',
|
||||||
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
|
r'(?msu)(?<=\s)\|:(?P<words>\S[^:\|]{0,40}?\S)?:\|(?=\s)',
|
||||||
]
|
]
|
||||||
|
|
||||||
for word in ITALICIZE_WORDS:
|
for word in ITALICIZE_WORDS:
|
||||||
html = html.replace(word, '<i>%s</i>' % word)
|
html = html.replace(word, '<i>%s</i>' % word)
|
||||||
|
|
||||||
@ -242,7 +241,7 @@ class HeuristicProcessor(object):
|
|||||||
lp_title = default_title
|
lp_title = default_title
|
||||||
else:
|
else:
|
||||||
lp_title = simple_title
|
lp_title = simple_title
|
||||||
|
|
||||||
if ignorecase:
|
if ignorecase:
|
||||||
arg_ignorecase = r'(?i)'
|
arg_ignorecase = r'(?i)'
|
||||||
else:
|
else:
|
||||||
@ -250,7 +249,7 @@ class HeuristicProcessor(object):
|
|||||||
|
|
||||||
if title_req:
|
if title_req:
|
||||||
lp_opt_title_open = ''
|
lp_opt_title_open = ''
|
||||||
lp_opt_title_close = ''
|
lp_opt_title_close = ''
|
||||||
else:
|
else:
|
||||||
lp_opt_title_open = opt_title_open
|
lp_opt_title_open = opt_title_open
|
||||||
lp_opt_title_close = opt_title_close
|
lp_opt_title_close = opt_title_close
|
||||||
@ -399,7 +398,7 @@ class HeuristicProcessor(object):
|
|||||||
if len(lines) > 1:
|
if len(lines) > 1:
|
||||||
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
|
self.log.debug("There are " + unicode(len(blanklines)) + " blank lines. " +
|
||||||
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
unicode(float(len(blanklines)) / float(len(lines))) + " percent blank")
|
||||||
|
|
||||||
if float(len(blanklines)) / float(len(lines)) > 0.40:
|
if float(len(blanklines)) / float(len(lines)) > 0.40:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@ -460,7 +459,7 @@ class HeuristicProcessor(object):
|
|||||||
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
if getattr(self.extra_opts, 'markup_chapter_headings', False):
|
||||||
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
|
html = self.markup_chapters(html, self.totalwords, blanks_between_paragraphs)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
if getattr(self.extra_opts, 'italicize_common_cases', False):
|
||||||
html = self.markup_italicis(html)
|
html = self.markup_italicis(html)
|
||||||
|
|
||||||
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
|
# If more than 40% of the lines are empty paragraphs and the user has enabled delete
|
||||||
@ -487,7 +486,7 @@ class HeuristicProcessor(object):
|
|||||||
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
|
||||||
length = docanalysis.line_length(unwrap_factor)
|
length = docanalysis.line_length(unwrap_factor)
|
||||||
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
|
self.log.debug("Median line length is " + unicode(length) + ", calculated with " + format + " format")
|
||||||
|
|
||||||
###### Unwrap lines ######
|
###### Unwrap lines ######
|
||||||
if getattr(self.extra_opts, 'unwrap_lines', False):
|
if getattr(self.extra_opts, 'unwrap_lines', False):
|
||||||
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
|
# only go through unwrapping code if the histogram shows unwrapping is required or if the user decreased the default unwrap_factor
|
||||||
|
@ -7,8 +7,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
from calibre.ebooks.conversion.utils import HeuristicProcessor
|
|
||||||
|
|
||||||
|
|
||||||
class LITInput(InputFormatPlugin):
|
class LITInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
|
||||||
class MOBIInput(InputFormatPlugin):
|
class MOBIInput(InputFormatPlugin):
|
||||||
|
@ -53,6 +53,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
|
|
||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
|
self.log = log
|
||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
||||||
|
|
||||||
txt = stream.read()
|
txt = stream.read()
|
||||||
@ -106,7 +107,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
log.debug('Auto detected paragraph type as %s' % options.paragraph_type)
|
||||||
|
|
||||||
# Dehyphenate
|
# Dehyphenate
|
||||||
dehyphenator = Dehyphenator(options.verbose, log=getattr(self, 'log', None))
|
dehyphenator = Dehyphenator(options.verbose, log=self.log)
|
||||||
txt = dehyphenator(txt,'txt', length)
|
txt = dehyphenator(txt,'txt', length)
|
||||||
|
|
||||||
# We don't check for block because the processor assumes block.
|
# We don't check for block because the processor assumes block.
|
||||||
|
@ -25,21 +25,21 @@ class HeuristicsWidget(Widget, Ui_Form):
|
|||||||
)
|
)
|
||||||
self.db, self.book_id = db, book_id
|
self.db, self.book_id = db, book_id
|
||||||
self.initialize_options(get_option, get_help, db, book_id)
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
|
self.opt_enable_heuristics.stateChanged.connect(self.enable_heuristics)
|
||||||
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
|
self.opt_unwrap_lines.stateChanged.connect(self.enable_unwrap)
|
||||||
|
|
||||||
self.enable_heuristics(self.opt_enable_heuristics.checkState())
|
self.enable_heuristics(self.opt_enable_heuristics.checkState())
|
||||||
|
|
||||||
def break_cycles(self):
|
def break_cycles(self):
|
||||||
Widget.break_cycles(self)
|
Widget.break_cycles(self)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.opt_enable_heuristics.stateChanged.disconnect()
|
self.opt_enable_heuristics.stateChanged.disconnect()
|
||||||
self.opt_unwrap_lines.stateChanged.disconnect()
|
self.opt_unwrap_lines.stateChanged.disconnect()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def set_value_handler(self, g, val):
|
def set_value_handler(self, g, val):
|
||||||
if val is None and g is self.opt_html_unwrap_factor:
|
if val is None and g is self.opt_html_unwrap_factor:
|
||||||
g.setValue(0.0)
|
g.setValue(0.0)
|
||||||
@ -57,7 +57,7 @@ class HeuristicsWidget(Widget, Ui_Form):
|
|||||||
self.opt_format_scene_breaks.setEnabled(state)
|
self.opt_format_scene_breaks.setEnabled(state)
|
||||||
self.opt_dehyphenate.setEnabled(state)
|
self.opt_dehyphenate.setEnabled(state)
|
||||||
self.opt_renumber_headings.setEnabled(state)
|
self.opt_renumber_headings.setEnabled(state)
|
||||||
|
|
||||||
self.opt_unwrap_lines.setEnabled(state)
|
self.opt_unwrap_lines.setEnabled(state)
|
||||||
if state and self.opt_unwrap_lines.checkState() == Qt.Checked:
|
if state and self.opt_unwrap_lines.checkState() == Qt.Checked:
|
||||||
self.opt_html_unwrap_factor.setEnabled(True)
|
self.opt_html_unwrap_factor.setEnabled(True)
|
||||||
|
@ -19,7 +19,7 @@ from calibre.devices.scanner import DeviceScanner
|
|||||||
from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
|
from calibre.gui2 import config, error_dialog, Dispatcher, dynamic, \
|
||||||
warning_dialog, info_dialog, choose_dir
|
warning_dialog, info_dialog, choose_dir
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
from calibre import preferred_encoding, prints, force_unicode
|
from calibre import preferred_encoding, prints, force_unicode, as_unicode
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.devices.errors import FreeSpaceError
|
from calibre.devices.errors import FreeSpaceError
|
||||||
from calibre.devices.apple.driver import ITUNES_ASYNC
|
from calibre.devices.apple.driver import ITUNES_ASYNC
|
||||||
@ -68,13 +68,7 @@ class DeviceJob(BaseJob): # {{{
|
|||||||
if self._aborted:
|
if self._aborted:
|
||||||
return
|
return
|
||||||
self.failed = True
|
self.failed = True
|
||||||
try:
|
ex = as_unicode(err)
|
||||||
ex = unicode(err)
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
ex = str(err).decode(preferred_encoding, 'replace')
|
|
||||||
except:
|
|
||||||
ex = repr(err)
|
|
||||||
self._details = ex + '\n\n' + \
|
self._details = ex + '\n\n' + \
|
||||||
traceback.format_exc()
|
traceback.format_exc()
|
||||||
self.exception = err
|
self.exception = err
|
||||||
|
Loading…
x
Reference in New Issue
Block a user