Tweak to append separator character to completed text. TXT input: apply paragraph transformations when specified, or detected with auto, no matter the formatting type specified or detected.

This commit is contained in:
Kovid Goyal 2011-02-04 23:26:43 -07:00
commit 79bb1a7cb5
3 changed files with 47 additions and 26 deletions

View File

@ -30,6 +30,13 @@ defaults.
series_index_auto_increment = 'next' series_index_auto_increment = 'next'
# Should the completion separator be append
# to the end of the completed text to
# automatically begin a new completion operation.
# Can be either True or False
completer_append_separator = False
# The algorithm used to copy author to author_sort # The algorithm used to copy author to author_sort
# Possible values are: # Possible values are:
# invert: use "fn ln" -> "ln, fn" (the original algorithm) # invert: use "fn ln" -> "ln, fn" (the original algorithm)

View File

@ -57,6 +57,7 @@ class TXTInput(InputFormatPlugin):
log.debug('Reading text from file...') log.debug('Reading text from file...')
txt = stream.read() txt = stream.read()
# Get the encoding of the document. # Get the encoding of the document.
if options.input_encoding: if options.input_encoding:
ienc = options.input_encoding ienc = options.input_encoding
@ -70,13 +71,16 @@ class TXTInput(InputFormatPlugin):
log.debug('No input encoding specified and could not auto detect using %s' % ienc) log.debug('No input encoding specified and could not auto detect using %s' % ienc)
txt = txt.decode(ienc, 'replace') txt = txt.decode(ienc, 'replace')
# Replace entities
txt = _ent_pat.sub(xml_entity_to_unicode, txt) txt = _ent_pat.sub(xml_entity_to_unicode, txt)
# Normalize line endings # Normalize line endings
txt = normalize_line_endings(txt) txt = normalize_line_endings(txt)
# Detect formatting
if options.formatting_type == 'auto': if options.formatting_type == 'auto':
options.formatting_type = detect_formatting_type(txt) options.formatting_type = detect_formatting_type(txt)
log.debug('Auto detected formatting as %s' % options.formatting_type)
if options.formatting_type == 'heuristic': if options.formatting_type == 'heuristic':
setattr(options, 'enable_heuristics', True) setattr(options, 'enable_heuristics', True)
@ -105,41 +109,43 @@ class TXTInput(InputFormatPlugin):
docanalysis = DocAnalysis('txt', txt) docanalysis = DocAnalysis('txt', txt)
length = docanalysis.line_length(.5) length = docanalysis.line_length(.5)
# Reformat paragraphs to block formatting based on the detected type.
# We don't check for block because the processor assumes block.
# single and print at transformed to block for processing.
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
txt = separate_paragraphs_single_line(txt)
elif options.paragraph_type == 'print':
txt = separate_paragraphs_print_formatted(txt)
elif options.paragraph_type == 'unformatted':
from calibre.ebooks.conversion.utils import HeuristicProcessor
# unwrap lines based on punctuation
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
# Process the text using the appropriate text processor.
html = ''
if options.formatting_type == 'markdown': if options.formatting_type == 'markdown':
log.debug('Running text though markdown conversion...') log.debug('Running text through markdown conversion...')
try: try:
html = convert_markdown(txt, disable_toc=options.markdown_disable_toc) html = convert_markdown(txt, disable_toc=options.markdown_disable_toc)
except RuntimeError: except RuntimeError:
raise ValueError('This txt file has malformed markup, it cannot be' raise ValueError('This txt file has malformed markup, it cannot be'
' converted by calibre. See http://daringfireball.net/projects/markdown/syntax') ' converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
elif options.formatting_type == 'textile': elif options.formatting_type == 'textile':
log.debug('Running text though textile conversion...') log.debug('Running text through textile conversion...')
html = convert_textile(txt) html = convert_textile(txt)
else: else:
# Dehyphenate log.debug('Running text through basic conversion...')
dehyphenator = Dehyphenator(options.verbose, log=self.log) if options.formatting_type == 'heuristic':
txt = dehyphenator(txt,'txt', length) # Dehyphenate
dehyphenator = Dehyphenator(options.verbose, log=self.log)
# We don't check for block because the processor assumes block. txt = dehyphenator(txt,'txt', length)
# single and print at transformed to block for processing.
if options.paragraph_type == 'single' or options.paragraph_type == 'unformatted':
txt = separate_paragraphs_single_line(txt)
elif options.paragraph_type == 'print':
txt = separate_paragraphs_print_formatted(txt)
if options.paragraph_type == 'unformatted':
from calibre.ebooks.conversion.utils import HeuristicProcessor
# get length
# unwrap lines based on punctuation
preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None))
txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
flow_size = getattr(options, 'flow_size', 0) flow_size = getattr(options, 'flow_size', 0)
html = convert_basic(txt, epub_split_size_kb=flow_size) html = convert_basic(txt, epub_split_size_kb=flow_size)
# Run the HTMLized text through the html processing plugin.
from calibre.customize.ui import plugin_for_input_format from calibre.customize.ui import plugin_for_input_format
html_input = plugin_for_input_format('html') html_input = plugin_for_input_format('html')
for opt in html_input.options: for opt in html_input.options:
@ -158,6 +164,7 @@ class TXTInput(InputFormatPlugin):
htmlfile.write(html.encode('utf-8')) htmlfile.write(html.encode('utf-8'))
odi = options.debug_pipeline odi = options.debug_pipeline
options.debug_pipeline = None options.debug_pipeline = None
# Generate oeb from htl conversion.
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
{}) {})
options.debug_pipeline = odi options.debug_pipeline = odi

View File

@ -10,6 +10,7 @@ from PyQt4.Qt import QLineEdit, QListView, QAbstractListModel, Qt, QTimer, \
QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \ QApplication, QPoint, QItemDelegate, QStyleOptionViewItem, \
QStyle, QEvent, pyqtSignal QStyle, QEvent, pyqtSignal
from calibre.utils.config import tweaks
from calibre.utils.icu import sort_key, lower from calibre.utils.icu import sort_key, lower
from calibre.gui2 import NONE from calibre.gui2 import NONE
from calibre.gui2.widgets import EnComboBox from calibre.gui2.widgets import EnComboBox
@ -231,12 +232,18 @@ class MultiCompleteLineEdit(QLineEdit):
cursor_pos = self.cursorPosition() cursor_pos = self.cursorPosition()
before_text = unicode(self.text())[:cursor_pos] before_text = unicode(self.text())[:cursor_pos]
after_text = unicode(self.text())[cursor_pos:] after_text = unicode(self.text())[cursor_pos:]
after_parts = after_text.split(self.sep)
if len(after_parts) < 3 and not after_parts[-1].strip():
after_text = u''
prefix_len = len(before_text.split(self.sep)[-1].lstrip()) prefix_len = len(before_text.split(self.sep)[-1].lstrip())
return prefix_len, \ if tweaks['completer_append_separator']:
before_text[:cursor_pos - prefix_len] + text + after_text prefix_len = len(before_text.split(self.sep)[-1].lstrip())
completed_text = before_text[:cursor_pos - prefix_len] + text + self.sep + ' ' + after_text
prefix_len = prefix_len - len(self.sep) - 1
if prefix_len < 0:
prefix_len = 0
else:
prefix_len = len(before_text.split(self.sep)[-1].lstrip())
completed_text = before_text[:cursor_pos - prefix_len] + text + after_text
return prefix_len, completed_text
def completion_selected(self, text): def completion_selected(self, text):
prefix_len, ctext = self.get_completed_text(text) prefix_len, ctext = self.get_completed_text(text)