mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
LIT Input: Fix smarten punctuation not working for text (as opposed to HTML) based LIT files. Fixes #1460998 [Private bug](https://bugs.launchpad.net/calibre/+bug/1460998)
This commit is contained in:
parent
dd1c304626
commit
cce2f70507
@ -26,7 +26,8 @@ class LITInput(InputFormatPlugin):
|
|||||||
from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
from calibre.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
||||||
for item in oeb.spine:
|
for item in oeb.spine:
|
||||||
root = item.data
|
root = item.data
|
||||||
if not hasattr(root, 'xpath'): continue
|
if not hasattr(root, 'xpath'):
|
||||||
|
continue
|
||||||
for bad in ('metadata', 'guide'):
|
for bad in ('metadata', 'guide'):
|
||||||
metadata = XPath('//h:'+bad)(root)
|
metadata = XPath('//h:'+bad)(root)
|
||||||
if metadata:
|
if metadata:
|
||||||
@ -42,11 +43,16 @@ class LITInput(InputFormatPlugin):
|
|||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import copy
|
import copy
|
||||||
|
self.log('LIT file with all text in singe <pre> tag detected')
|
||||||
html = separate_paragraphs_single_line(pre.text)
|
html = separate_paragraphs_single_line(pre.text)
|
||||||
html = convert_basic(html).replace('<html>',
|
html = convert_basic(html).replace('<html>',
|
||||||
'<html xmlns="%s">'%XHTML_NS)
|
'<html xmlns="%s">'%XHTML_NS)
|
||||||
html = xml_to_unicode(html, strip_encoding_pats=True,
|
html = xml_to_unicode(html, strip_encoding_pats=True,
|
||||||
resolve_entities=True)[0]
|
resolve_entities=True)[0]
|
||||||
|
if opts.smarten_punctuation:
|
||||||
|
# SmartyPants skips text inside <pre> tags
|
||||||
|
from calibre.ebooks.conversion.preprocess import smarten_punctuation
|
||||||
|
html = smarten_punctuation(html, self.log)
|
||||||
root = etree.fromstring(html)
|
root = etree.fromstring(html)
|
||||||
body = XPath('//h:body')(root)
|
body = XPath('//h:body')(root)
|
||||||
pre.tag = XHTML('div')
|
pre.tag = XHTML('div')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user