mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
started updating manuals, fix lit postprocess to handle content in pre tags correctly
This commit is contained in:
parent
8e1dfbc6fd
commit
2ebf94812e
@ -492,7 +492,9 @@ OptionRecommendation(name='enable_heuristics',
|
|||||||
OptionRecommendation(name='markup_chapter_headings',
|
OptionRecommendation(name='markup_chapter_headings',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Detect unformatted chapter headings and sub headings. Change '
|
help=_('Detect unformatted chapter headings and sub headings. Change '
|
||||||
'them to h2 and h3 tags.')),
|
'them to h2 and h3 tags. This setting will not create a TOC, '
|
||||||
|
'but can be used in conjunction with structure detection to create '
|
||||||
|
'one.')),
|
||||||
|
|
||||||
OptionRecommendation(name='italicize_common_cases',
|
OptionRecommendation(name='italicize_common_cases',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
@ -501,7 +503,7 @@ OptionRecommendation(name='italicize_common_cases',
|
|||||||
|
|
||||||
OptionRecommendation(name='fix_indents',
|
OptionRecommendation(name='fix_indents',
|
||||||
recommended_value=False, level=OptionRecommendation.LOW,
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
help=_('Turn indentation created from multiple entities '
|
help=_('Turn indentation created from multiple non-breaking space entities '
|
||||||
'into CSS indents.')),
|
'into CSS indents.')),
|
||||||
|
|
||||||
OptionRecommendation(name='html_unwrap_factor',
|
OptionRecommendation(name='html_unwrap_factor',
|
||||||
|
@ -22,7 +22,7 @@ class LITInput(InputFormatPlugin):
|
|||||||
from calibre.ebooks.lit.reader import LitReader
|
from calibre.ebooks.lit.reader import LitReader
|
||||||
from calibre.ebooks.conversion.plumber import create_oebbook
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
self.log = log
|
self.log = log
|
||||||
return create_oebbook(log, stream, options, self, reader=LitReader)
|
return create_oebbook(log, stream, options, reader=LitReader)
|
||||||
|
|
||||||
def postprocess_book(self, oeb, opts, log):
|
def postprocess_book(self, oeb, opts, log):
|
||||||
from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
||||||
@ -39,10 +39,13 @@ class LITInput(InputFormatPlugin):
|
|||||||
body = body[0]
|
body = body[0]
|
||||||
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
||||||
pre = body[0]
|
pre = body[0]
|
||||||
from calibre.ebooks.txt.processor import convert_basic
|
from calibre.ebooks.txt.processor import convert_basic, preserve_spaces, \
|
||||||
|
separate_paragraphs_single_line
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import copy
|
import copy
|
||||||
html = convert_basic(pre.text).replace('<html>',
|
html = separate_paragraphs_single_line(pre.text)
|
||||||
|
html = preserve_spaces(html)
|
||||||
|
html = convert_basic(html).replace('<html>',
|
||||||
'<html xmlns="%s">'%XHTML_NS)
|
'<html xmlns="%s">'%XHTML_NS)
|
||||||
root = etree.fromstring(html)
|
root = etree.fromstring(html)
|
||||||
body = XPath('//h:body')(root)
|
body = XPath('//h:body')(root)
|
||||||
|
@ -255,6 +255,46 @@ you are producing are meant for a particular device type, choose the correspondi
|
|||||||
|
|
||||||
The Output profile also controls the screen size. This will cause, for example, images to be auto-resized to be fit to the screen in some output formats. So choose a profile of a device that has a screen size similar to your device.
|
The Output profile also controls the screen size. This will cause, for example, images to be auto-resized to be fit to the screen in some output formats. So choose a profile of a device that has a screen size similar to your device.
|
||||||
|
|
||||||
|
.. _heuristic-processing:
|
||||||
|
|
||||||
|
Heuristic Processing
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
:guilabel:`Preprocess input`
|
||||||
|
This option activates various algorithms that try to detect and correct common cases of
|
||||||
|
badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc.
|
||||||
|
Turn this option on if your input document suffers from bad formatting. But be aware that in
|
||||||
|
some cases, this option can lead to worse results, so use with care.
|
||||||
|
|
||||||
|
:guilabel:`Line-unwrap factor`
|
||||||
|
This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this
|
||||||
|
option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
|
||||||
|
than the length of 40% of all lines in the document.
|
||||||
|
|
||||||
|
:guilabel:`Unwrap lines`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Detect and markup unformatted chapter headings and sub headings`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Renumber sequences of <h1> or <h2> tags to prevent splitting`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Delete blank lines between paragraphs`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Ensure scene breaks are consistently formatted`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Remove unnecessary hyphens`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Italicize common words and patterns`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
|
:guilabel:`Replace entity indents with CSS indents`
|
||||||
|
Lorem ipsum
|
||||||
|
|
||||||
.. _structure-detection:
|
.. _structure-detection:
|
||||||
|
|
||||||
Structure Detection
|
Structure Detection
|
||||||
@ -330,16 +370,6 @@ There are a few more options in this section.
|
|||||||
two covers. This option will simply remove the first image from the source document, thereby
|
two covers. This option will simply remove the first image from the source document, thereby
|
||||||
ensuring that the converted book has only one cover, the one specified in |app|.
|
ensuring that the converted book has only one cover, the one specified in |app|.
|
||||||
|
|
||||||
:guilabel:`Preprocess input`
|
|
||||||
This option activates various algorithms that try to detect and correct common cases of
|
|
||||||
badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc.
|
|
||||||
Turn this option on if your input document suffers from bad formatting. But be aware that in
|
|
||||||
some cases, this option can lead to worse results, so use with care.
|
|
||||||
|
|
||||||
:guilabel:`Line-unwrap factor`
|
|
||||||
This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this
|
|
||||||
option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less
|
|
||||||
than the length of 40% of all lines in the document.
|
|
||||||
|
|
||||||
Table of Contents
|
Table of Contents
|
||||||
------------------
|
------------------
|
||||||
@ -500,6 +530,9 @@ more blank lines are a paragraph boundary::
|
|||||||
|
|
||||||
TXT input supports a number of options to differentiate how paragraphs are detected.
|
TXT input supports a number of options to differentiate how paragraphs are detected.
|
||||||
|
|
||||||
|
:guilabel:`Auto`
|
||||||
|
Analyzes the text file and attempts to determine how paragraphs are defined.
|
||||||
|
|
||||||
:guilabel:`Treat each line as a paragraph`
|
:guilabel:`Treat each line as a paragraph`
|
||||||
Assumes that every line is a paragraph::
|
Assumes that every line is a paragraph::
|
||||||
|
|
||||||
@ -518,6 +551,12 @@ TXT input supports a number of options to differentiate how paragraphs are detec
|
|||||||
This is the
|
This is the
|
||||||
third.
|
third.
|
||||||
|
|
||||||
|
:guilabel:`Unformatted`
|
||||||
|
Assumes that the document has no formatting, but does use hard line breaks. Punctuation
|
||||||
|
and median line length are used to attempt to re-create paragraphs.
|
||||||
|
|
||||||
|
:guilabel:`Process using Textile`
|
||||||
|
|
||||||
:guilabel:`Process using markdown`
|
:guilabel:`Process using markdown`
|
||||||
|app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown
|
|app| also supports running TXT input though a transformation preprocessor known as markdown. Markdown
|
||||||
allows for basic formatting to be added to TXT documents, such as bold, italics, section headings, tables,
|
allows for basic formatting to be added to TXT documents, such as bold, italics, section headings, tables,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user