mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
started updating manuals, fix lit postprocess to handle content in pre tags correctly
This commit is contained in:
parent
8e1dfbc6fd
commit
2ebf94812e
@ -492,7 +492,9 @@ OptionRecommendation(name='enable_heuristics',
|
||||
OptionRecommendation(name='markup_chapter_headings',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Detect unformatted chapter headings and sub headings. Change '
|
||||
'them to h2 and h3 tags.')),
|
||||
'them to h2 and h3 tags. This setting will not create a TOC, '
|
||||
'but can be used in conjunction with structure detection to create '
|
||||
'one.')),
|
||||
|
||||
OptionRecommendation(name='italicize_common_cases',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
@ -501,7 +503,7 @@ OptionRecommendation(name='italicize_common_cases',
|
||||
|
||||
OptionRecommendation(name='fix_indents',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Turn indentation created from multiple entities '
|
||||
help=_('Turn indentation created from multiple non-breaking space entities '
|
||||
'into CSS indents.')),
|
||||
|
||||
OptionRecommendation(name='html_unwrap_factor',
|
||||
|
@ -22,7 +22,7 @@ class LITInput(InputFormatPlugin):
|
||||
from calibre.ebooks.lit.reader import LitReader
|
||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||
self.log = log
|
||||
return create_oebbook(log, stream, options, self, reader=LitReader)
|
||||
return create_oebbook(log, stream, options, reader=LitReader)
|
||||
|
||||
def postprocess_book(self, oeb, opts, log):
|
||||
from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
||||
@ -39,10 +39,13 @@ class LITInput(InputFormatPlugin):
|
||||
body = body[0]
|
||||
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
||||
pre = body[0]
|
||||
from calibre.ebooks.txt.processor import convert_basic
|
||||
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
|
||||
separate_paragraphs_single_line
|
||||
from lxml import etree
|
||||
import copy
|
||||
html = convert_basic(pre.text).replace('<html>',
|
||||
html = separate_paragraphs_single_line(pre.text)
|
||||
html = preserve_spaces(html)
|
||||
html = convert_basic(html).replace('<html>',
|
||||
'<html xmlns="%s">'%XHTML_NS)
|
||||
root = etree.fromstring(html)
|
||||
body = XPath('//h:body')(root)
|
||||
|
@ -255,6 +255,46 @@ you are producing are meant for a particular device type, choose the correspondi
|
||||
|
||||
The Output profile also controls the screen size. This will cause, for example, images to be auto-resized to be fit to the screen in some output formats. So choose a profile of a device that has a screen size similar to your device.
|
||||
|
||||
.. _heuristic-processing:
|
||||
|
||||
Heuristic Processing
|
||||
---------------------
|
||||
|
||||
:guilabel:`Preprocess input`
|
||||
This option activates various algorithms that try to detect and correct common cases of
|
||||
badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc.
|
||||
Turn this option on if your input document suffers from bad formatting. But be aware that in
|
||||
some cases, this option can lead to worse results, so use with care.
|
||||
|
||||
:guilabel:`Line-unwrap factor`
|
||||
This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this
|
||||
option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
|
||||
than the length of 40% of all lines in the document.
|
||||
|
||||
:guilabel:`Unwrap lines`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Detect and markup unformatted chapter headings and sub headings`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Renumber sequences of <h1> or <h2> tags to prevent splitting`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Delete blank lines between paragraphs`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Ensure scene breaks are consistently formatted`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Remove unnecessary hyphens`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Italicize common words and patterns`
|
||||
Lorem ipsum
|
||||
|
||||
:guilabel:`Replace entity indents with CSS indents`
|
||||
Lorem ipsum
|
||||
|
||||
.. _structure-detection:
|
||||
|
||||
Structure Detection
|
||||
@ -330,16 +370,6 @@ There are a few more options in this section.
|
||||
two covers. This option will simply remove the first image from the source document, thereby
|
||||
ensuring that the converted book has only one cover, the one specified in |app|.
|
||||
|
||||
:guilabel:`Preprocess input`
|
||||
This option activates various algorithms that try to detect and correct common cases of
|
||||
badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc.
|
||||
Turn this option on if your input document suffers from bad formatting. But be aware that in
|
||||
some cases, this option can lead to worse results, so use with care.
|
||||
|
||||
:guilabel:`Line-unwrap factor`
|
||||
This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this
|
||||
option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
|
||||
than the length of 40% of all lines in the document.
|
||||
|
||||
Table of Contents
|
||||
------------------
|
||||
@ -500,6 +530,9 @@ more blank lines are a paragraph boundary::
|
||||
|
||||
TXT input supports a number of options to differentiate how paragraphs are detected.
|
||||
|
||||
:guilabel:`Auto`
|
||||
Analyzes the text file and attempts to determine how paragraphs are defined.
|
||||
|
||||
:guilabel:`Treat each line as a paragraph`
|
||||
Assumes that every line is a paragraph::
|
||||
|
||||
@ -518,6 +551,12 @@ TXT input supports a number of options to differentiate how paragraphs are detec
|
||||
This is the
|
||||
third.
|
||||
|
||||
:guilabel:`Unformatted`
|
||||
Assumes that the document has no formatting, but does use hard line breaks. Punctuation
|
||||
and median line length are used to attempt to re-create paragraphs.
|
||||
|
||||
:guilabel:`Process using Textile`
|
||||
|
||||
:guilabel:`Process using markdown`
|
||||
|app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown
|
||||
allows for basic formatting to be added to TXT documents, such as bold, italics, section headings, tables,
|
||||
|
Loading…
x
Reference in New Issue
Block a user