This commit is contained in:
Kovid Goyal 2013-12-10 22:30:55 +05:30
parent ed50bc7ed5
commit faf9fc0e1b
2 changed files with 12 additions and 17 deletions

View File

@ -11,8 +11,8 @@ import re
from lxml.etree import XMLParser, fromstring, XMLSyntaxError from lxml.etree import XMLParser, fromstring, XMLSyntaxError
import cssutils import cssutils
from calibre import force_unicode
from calibre.ebooks.html_entities import html5_entities from calibre.ebooks.html_entities import html5_entities
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style as fix_style_tag
from calibre.ebooks.oeb.polish.utils import PositionFinder from calibre.ebooks.oeb.polish.utils import PositionFinder
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR from calibre.ebooks.oeb.polish.check.base import BaseError, WARN, ERROR
from calibre.ebooks.oeb.base import OEB_DOCS from calibre.ebooks.oeb.base import OEB_DOCS
@ -23,15 +23,6 @@ ALL_ENTITIES = HTML_ENTITTIES | XML_ENTITIES
replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES)))) replace_pat = re.compile('&(%s);' % '|'.join(re.escape(x) for x in sorted((HTML_ENTITTIES - XML_ENTITIES))))
def fix_style_tag(container, style):
prev = style.getprevious()
ws = style.getparent().text if prev is None else prev.tail
ws = ws.splitlines()[-1]
indent = ws[len(ws.rstrip()):]
sheet = container.parse_css(style.text)
style.text = '\n' + force_unicode(sheet.cssText, 'utf-8') + '\n' + indent
class XMLParseError(BaseError): class XMLParseError(BaseError):
is_parsing_error = True is_parsing_error = True

View File

@ -150,6 +150,16 @@ def pretty_block(parent, level=1, indent=' '):
child.tail = '' child.tail = ''
child.tail = child.tail + nn + (indent * l) child.tail = child.tail + nn + (indent * l)
def pretty_script_or_style(container, child):
if child.text:
indent = indent_for_tag(child)
if child.tag.endswith('style'):
child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8')
child.text = textwrap.dedent(child.text)
child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()])
set_indent(child, 'text', indent)
def pretty_html_tree(container, root): def pretty_html_tree(container, root):
root.text = '\n\n' root.text = '\n\n'
for child in root: for child in root:
@ -161,13 +171,7 @@ def pretty_html_tree(container, root):
# Handle <script> and <style> tags # Handle <script> and <style> tags
for child in root.xpath('//*[local-name()="script" or local-name()="style"]'): for child in root.xpath('//*[local-name()="script" or local-name()="style"]'):
if child.text: pretty_script_or_style(container, child)
indent = indent_for_tag(child)
if child.tag.endswith('style'):
child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8')
child.text = textwrap.dedent(child.text)
child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()])
set_indent(child, 'text', indent)
def fix_html(container, raw): def fix_html(container, raw):
root = container.parse_xhtml(raw) root = container.parse_xhtml(raw)