py3: Ensure cssText is of well-known type

This commit is contained in:
Kovid Goyal 2019-04-28 15:10:18 +05:30
parent e99c7fd296
commit 5b3376c3f5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
18 changed files with 68 additions and 56 deletions

View File

@ -83,7 +83,10 @@ class FB2Input(InputFormatPlugin):
log.debug('Parsing stylesheet...')
stylesheet = parser.parseString(text)
stylesheet.namespaces['h'] = XHTML_NS
css = unicode_type(stylesheet.cssText).replace('h|style', 'h|span')
css = stylesheet.cssText
if isinstance(css, bytes):
css = css.decode('utf-8', 'replace')
css = css.replace('h|style', 'h|span')
css = re.sub(r'name\s*=\s*', 'class=', css)
self.extract_embedded_content(doc)
log.debug('Converting XML to HTML...')

View File

@ -210,7 +210,7 @@ class PDFOutput(OutputFormatPlugin):
def process_fonts(self):
''' Make sure all fonts are embeddable. Also remove some fonts that cause problems. '''
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.base import urlnormalize, css_text
from calibre.utils.fonts.utils import remove_embed_restriction
processed = set()
@ -244,7 +244,7 @@ class PDFOutput(OutputFormatPlugin):
s = rule.style
f = s.getProperty(u'font-family')
if f is not None:
font_families = parse_font_family(f.propertyValue.cssText)
font_families = parse_font_family(css_text(f.propertyValue))
ff = [x for x in font_families if x.lower() != u'courier']
if len(ff) != len(font_families):
if 'courier' not in self.filtered_font_warnings:

View File

@ -21,7 +21,7 @@ from calibre.ebooks.oeb.base import (
XHTML, XHTML_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.logging import default_log
from polyglot.builtins import unicode_type, string_or_bytes
from polyglot.builtins import unicode_type, string_or_bytes, as_bytes
from polyglot.urllib import urldefrag
SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
@ -135,7 +135,7 @@ class OEB2HTML(object):
css = b''
for item in oeb_book.manifest:
if item.media_type == 'text/css':
css += item.data.cssText + b'\n\n'
css += as_bytes(item.data.cssText) + b'\n\n'
return css
def prepare_string_for_html(self, raw):

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
from lxml import etree
from calibre.ebooks.oeb.base import (urlnormalize, XPath, XHTML_NS, XHTML,
XHTML_MIME)
XHTML_MIME, css_text)
DEFAULT_TITLE = __('Table of Contents')
@ -85,7 +85,7 @@ class TOCAdder(object):
embed_css = ''
s = getattr(oeb, 'store_embed_font_rules', None)
if getattr(s, 'body_font_family', None):
css = [x.cssText for x in s.rules] + [
css = [css_text(x) for x in s.rules] + [
'body { font-family: %s }'%s.body_font_family]
embed_css = '\n\n'.join(css)
@ -139,5 +139,3 @@ class TOCAdder(object):
if self.added_toc_guide_entry:
self.oeb.guide.remove('toc')
self.added_toc_guide_entry = False

View File

@ -110,6 +110,13 @@ self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b
'video', 'title', 'script', 'style'}
def css_text(x):
ans = x.cssText
if isinstance(ans, bytes):
ans = ans.decode('utf-8', 'replace')
return ans
def as_string_type(pat, for_unicode):
if for_unicode:
if isinstance(pat, bytes):
@ -284,9 +291,7 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
el.text):
stylesheet = parser.parseString(el.text, validate=False)
replaceUrls(stylesheet, link_repl_func)
repl = stylesheet.cssText
if isbytestring(repl):
repl = repl.decode('utf-8')
repl = css_text(stylesheet)
el.text = '\n'+ clean_xml_chars(repl) + '\n'
text = el.get('style')
@ -297,10 +302,8 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
# Parsing errors are raised by css_parser
continue
replaceUrls(stext, link_repl_func)
repl = stext.cssText.replace('\n', ' ').replace('\r',
repl = css_text(stext).replace('\n', ' ').replace('\r',
' ')
if isbytestring(repl):
repl = repl.decode('utf-8')
el.set('style', repl)
@ -1088,7 +1091,7 @@ class Manifest(object):
if isinstance(data, unicode_type):
return data
if hasattr(data, 'cssText'):
return unicode_type(data.cssText, 'utf-8', 'replace')
return css_text(data)
return unicode_type(data)
@property

View File

@ -13,6 +13,7 @@ from functools import wraps
from css_parser.css import PropertyValue
from css_parser import profile as cssprofiles, CSSParser
from tinycss.fonts3 import parse_font, serialize_font_family
from calibre.ebooks.oeb.base import css_text
DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll', # {{{
'background-color': 'transparent', 'background-image': 'none',
@ -62,9 +63,9 @@ BORDER_PROPS = ('color', 'style', 'width')
def normalize_edge(name, cssvalue):
style = {}
if isinstance(cssvalue, PropertyValue):
primitives = [v.cssText for v in cssvalue]
primitives = [css_text(v) for v in cssvalue]
else:
primitives = [cssvalue.cssText]
primitives = [css_text(cssvalue)]
if len(primitives) == 1:
value, = primitives
values = (value, value, value, value)
@ -96,14 +97,14 @@ def simple_normalizer(prefix, names, check_inherit=True):
def normalize_simple_composition(name, cssvalue, composition, check_inherit=True):
if check_inherit and cssvalue.cssText == 'inherit':
if check_inherit and css_text(cssvalue) == 'inherit':
style = {k:'inherit' for k in composition}
else:
style = {k:DEFAULTS[k] for k in composition}
try:
primitives = [v.cssText for v in cssvalue]
primitives = [css_text(v) for v in cssvalue]
except TypeError:
primitives = [cssvalue.cssText]
primitives = [css_text(cssvalue)]
while primitives:
value = primitives.pop()
for key in composition:
@ -119,7 +120,7 @@ font_composition = ('font-style', 'font-variant', 'font-weight', 'font-size', 'l
def normalize_font(cssvalue, font_family_as_list=False):
# See https://developer.mozilla.org/en-US/docs/Web/CSS/font
composition = font_composition
val = cssvalue.cssText
val = css_text(cssvalue)
if val == 'inherit':
ans = {k:'inherit' for k in composition}
elif val in {'caption', 'icon', 'menu', 'message-box', 'small-caption', 'status-bar'}:
@ -416,7 +417,7 @@ def test_normalization(return_tests=False): # {{{
css = '; '.join('border-%s-%s: %s' % (edge, p, v) for edge in ('top',) for p, v in zip(BORDER_PROPS, vals.split()))
style = parseStyle(css)
condense_rule(style)
self.assertEqual(style.cssText, 'border-top: %s' % vals)
self.assertEqual(css_text(style), 'border-top: %s' % vals)
css += ';' + '; '.join('border-%s-%s: %s' % (edge, p, v) for edge in ('right', 'left', 'bottom') for p, v in
zip(BORDER_PROPS, vals.replace('red', 'green').split()))
style = parseStyle(css)

View File

@ -15,7 +15,7 @@ from css_parser.css import CSSStyleSheet, CSSRule, Property
from css_selectors import Select, INAPPROPRIATE_PSEUDO_CLASSES, SelectorError
from calibre import as_unicode
from calibre.ebooks.css_transform_rules import all_properties
from calibre.ebooks.oeb.base import OEB_STYLES, XHTML
from calibre.ebooks.oeb.base import OEB_STYLES, XHTML, css_text
from calibre.ebooks.oeb.normalize_css import normalizers, DEFAULTS
from calibre.ebooks.oeb.stylizer import media_ok, INHERITED
from tinycss.fonts3 import serialize_font_family, parse_font_family
@ -120,8 +120,8 @@ class Values(tuple):
def cssText(self):
' This will return either a string or a tuple of strings '
if len(self) == 1:
return self[0].cssText
return tuple(x.cssText for x in self)
return css_text(self[0])
return tuple(css_text(x) for x in self)
def normalize_style_declaration(decl, sheet_name):
@ -129,7 +129,7 @@ def normalize_style_declaration(decl, sheet_name):
for prop in iterdeclaration(decl):
if prop.name == 'font-family':
# Needed because of https://bitbucket.org/cthedot/cssutils/issues/66/incorrect-handling-of-spaces-in-font
prop.propertyValue.cssText = serialize_font_family(parse_font_family(prop.propertyValue.cssText))
prop.propertyValue.cssText = serialize_font_family(parse_font_family(css_text(prop.propertyValue)))
ans[prop.name] = Values(prop.propertyValue, sheet_name, prop.priority)
return ans

View File

@ -13,7 +13,7 @@ from css_parser.css import CSSRule, CSSStyleDeclaration
from css_selectors import parse, SelectorSyntaxError
from calibre import force_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XHTML
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XHTML, css_text
from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style, pretty_xml_tree, serialize
from calibre.utils.icu import numeric_sort_key
@ -329,9 +329,9 @@ def remove_property_value(prop, predicate):
if len(removed_vals) == len(prop.propertyValue):
prop.parent.removeProperty(prop.name)
else:
x = prop.propertyValue.cssText
x = css_text(prop.propertyValue)
for v in removed_vals:
x = x.replace(v.cssText, '').strip()
x = x.replace(css_text(v), '').strip()
prop.propertyValue.cssText = x
return bool(removed_vals)

View File

@ -6,6 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from calibre.ebooks.oeb.base import css_text
from calibre.ebooks.oeb.polish.container import OEB_STYLES, OEB_DOCS
from calibre.ebooks.oeb.normalize_css import normalize_font
from tinycss.fonts3 import parse_font_family, parse_font, serialize_font_family, serialize_font
@ -27,7 +28,7 @@ def font_family_data_from_declaration(style, families):
font_families = [unquote(x) for x in f]
f = style.getProperty('font-family')
if f is not None:
font_families = parse_font_family(f.propertyValue.cssText)
font_families = parse_font_family(css_text(f.propertyValue))
for f in font_families:
families[f] = families.get(f, False)
@ -40,7 +41,7 @@ def font_family_data_from_sheet(sheet, families):
elif rule.type == rule.FONT_FACE_RULE:
ff = rule.style.getProperty('font-family')
if ff is not None:
for f in parse_font_family(ff.propertyValue.cssText):
for f in parse_font_family(css_text(ff.propertyValue)):
families[f] = True
@ -67,7 +68,7 @@ def change_font_in_declaration(style, old_name, new_name=None):
changed = False
ff = style.getProperty('font-family')
if ff is not None:
fams = parse_font_family(ff.propertyValue.cssText)
fams = parse_font_family(css_text(ff.propertyValue))
nfams = list(filter(None, [new_name if x == old_name else x for x in fams]))
if fams != nfams:
if nfams:
@ -77,7 +78,7 @@ def change_font_in_declaration(style, old_name, new_name=None):
changed = True
ff = style.getProperty('font')
if ff is not None:
props = parse_font(ff.propertyValue.cssText)
props = parse_font(css_text(ff.propertyValue))
fams = props.get('font-family') or []
nfams = list(filter(None, [new_name if x == old_name else x for x in fams]))
if fams != nfams:
@ -112,7 +113,7 @@ def change_font_in_sheet(container, sheet, old_name, new_name, sheet_name):
elif rule.type == rule.FONT_FACE_RULE:
ff = rule.style.getProperty('font-family')
if ff is not None:
families = {x for x in parse_font_family(ff.propertyValue.cssText)}
families = {x for x in parse_font_family(css_text(ff.propertyValue))}
if old_name in families:
changed = True
removals.append(rule)
@ -148,7 +149,7 @@ def change_font(container, old_name, new_name=None):
if style:
style = container.parse_css(style, is_declaration=True)
if change_font_in_declaration(style, old_name, new_name):
style = style.cssText.strip().rstrip(';').strip()
style = css_text(style).strip().rstrip(';').strip()
if style:
elem.set('style', style)
else:

View File

@ -14,6 +14,7 @@ from collections import Counter, defaultdict
from calibre import sanitize_file_name
from calibre.ebooks.chardet import strip_encoding_declarations
from calibre.ebooks.oeb.base import css_text
from calibre.ebooks.oeb.polish.css import iter_declarations, remove_property_value
from calibre.ebooks.oeb.polish.utils import extract
from polyglot.urllib import urlparse, urlunparse
@ -371,7 +372,7 @@ def remove_links_to(container, predicate):
if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
sheet = container.parse_css(tag.text)
if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
tag.text = sheet.cssText
tag.text = css_text(sheet)
removed = True
for tag in styleattrpath(root):
style = tag.get('style')
@ -379,7 +380,7 @@ def remove_links_to(container, predicate):
style = container.parse_css(style, is_declaration=True)
if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
removed = True
tag.set('style', style.cssText)
tag.set('style', css_text(style))
elif mt in OEB_STYLES:
removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
if removed:

View File

@ -13,7 +13,7 @@ from functools import partial
from lxml.etree import tostring
import regex
from calibre.ebooks.oeb.base import XHTML
from calibre.ebooks.oeb.base import XHTML, css_text
from calibre.ebooks.oeb.polish.cascade import iterrules, resolve_styles, iterdeclaration
from calibre.utils.icu import ord_string, safe_chr
from polyglot.builtins import unicode_type
@ -204,7 +204,7 @@ class StatsCollector(object):
cssdict = {}
for prop in iterdeclaration(rule.style):
if prop.name == 'font-family':
cssdict['font-family'] = [icu_lower(x) for x in parse_font_family(prop.propertyValue.cssText)]
cssdict['font-family'] = [icu_lower(x) for x in parse_font_family(css_text(prop.propertyValue))]
elif prop.name.startswith('font-'):
cssdict[prop.name] = prop.propertyValue[0].value
elif prop.name == 'src':
@ -215,7 +215,7 @@ class StatsCollector(object):
cssdict['src'] = fname
break
else:
container.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % prop.propertyValue.cssText)
container.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % css_text(prop.propertyValue))
if 'src' not in cssdict:
continue
ff = cssdict.get('font-family')

View File

@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
import os, sys
from calibre import prints, as_unicode
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XPath, css_text
from calibre.ebooks.oeb.polish.container import OEB_FONTS
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.utils.fonts.sfnt.subset import subset
@ -102,7 +102,7 @@ def subset_all_fonts(container, font_stats, report):
if style.get('type', 'text/css') == 'text/css' and style.text:
sheet = container.parse_css(style.text, name)
if remove_font_face_rules(container, sheet, remove, name):
style.text = sheet.cssText
style.text = css_text(sheet)
container.dirty(name)
if total_old > 0:
report(_('Reduced total font size to %.1f%% of original')%(

View File

@ -297,6 +297,8 @@ class Stylizer(object):
self.logger.warn('CSS import of non-CSS file %r' % path)
return (None, None)
data = item.data.cssText
if not isinstance(data, bytes):
data = data.encode('utf-8')
return ('utf-8', data)
def flatten_rule(self, rule, href, index, is_user_agent_sheet=False):
@ -491,6 +493,8 @@ class Style(object):
val = [val]
for c in val:
c = c.cssText
if isinstance(c, bytes):
c = c.decode('utf-8', 'replace')
if validate_color(c):
col = c
break

View File

@ -17,7 +17,7 @@ from css_parser.css import Property
from calibre import guess_type
from calibre.ebooks import unit_convert
from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
namespace, barename, XPath)
namespace, barename, XPath, css_text)
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.utils.filenames import ascii_filename, ascii_text
from calibre.utils.icu import numeric_sort_key
@ -138,7 +138,7 @@ class EmbedFontsCSSRules(object):
return None
if not self.href:
iid, href = oeb.manifest.generate(u'page_styles', u'page_styles.css')
rules = [x.cssText for x in self.rules]
rules = [css_text(x) for x in self.rules]
rules = u'\n\n'.join(rules)
sheet = css_parser.parseString(rules, validate=False)
self.href = oeb.manifest.add(iid, href, guess_type(href)[0],
@ -637,7 +637,7 @@ class CSSFlattener(object):
items = sorted(stylizer.page_rule.items())
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
css = ('@page {\n%s\n}\n'%css) if items else ''
rules = [r.cssText for r in stylizer.font_face_rules + self.embed_font_rules]
rules = [css_text(r) for r in stylizer.font_face_rules + self.embed_font_rules]
raw = '\n\n'.join(rules)
css += '\n\n' + raw
global_css[css].append(item)

View File

@ -15,7 +15,7 @@ from collections import OrderedDict
from lxml.etree import XPath as _XPath
from lxml import etree
from calibre import as_unicode
from calibre import as_unicode, force_unicode
from calibre.ebooks.epub import rules
from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize)
@ -84,10 +84,10 @@ class Split(object):
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
OEB_STYLES]
for rule in rules(stylesheets):
before = getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower()
after = getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower()
before = force_unicode(getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower())
after = force_unicode(getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower())
try:
if before and before not in {'avoid', 'auto', 'inherit'}:
self.page_break_selectors.add((rule.selectorText, True))

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
from collections import defaultdict
from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.base import urlnormalize, css_text
from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
from polyglot.builtins import iteritems, itervalues, unicode_type, range
from tinycss.fonts3 import parse_font_family
@ -30,7 +30,7 @@ def get_font_properties(rule, default=None):
val = s.getProperty(q).propertyValue[0]
val = getattr(val, g)
if q == 'font-family':
val = parse_font_family(s.getProperty(q).propertyValue.cssText)
val = parse_font_family(css_text(s.getProperty(q).propertyValue))
if val and val[0] == 'inherit':
val = None
except (IndexError, KeyError, AttributeError, TypeError, ValueError):

View File

@ -15,6 +15,7 @@ from PyQt5.Qt import QTextEdit, Qt, QTextCursor
from calibre import prepare_string_for_xml, xml_entity_to_unicode
from calibre.ebooks.oeb.polish.container import OEB_DOCS
from calibre.ebooks.oeb.base import css_text
from calibre.gui2 import error_dialog
from calibre.gui2.tweak_book.editor.syntax.html import ATTR_NAME, ATTR_END, ATTR_START, ATTR_VALUE
from calibre.gui2.tweak_book import tprefs, current_container
@ -280,7 +281,7 @@ def set_style_property(tag, property_name, value, editor):
c = editor.textCursor()
def css(d):
return d.cssText.replace('\n', ' ')
return css_text(d).replace('\n', ' ')
if block is None or offset is None:
d = parseStyle('')
d.setProperty(property_name, value)

View File

@ -18,7 +18,7 @@ from PyQt5.Qt import (
import regex
from calibre import prepare_string_for_xml
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, css_text
from calibre.ebooks.oeb.polish.replace import get_recommended_folders
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.gui2.tweak_book import (
@ -445,7 +445,7 @@ class TextEdit(PlainTextEdit):
c.movePosition(c.Start), c.movePosition(c.End, c.KeepAnchor)
text = unicode_type(c.selectedText()).replace(PARAGRAPH_SEPARATOR, '\n').rstrip('\0')
from calibre.ebooks.oeb.polish.css import sort_sheet
text = sort_sheet(current_container(), text).cssText
text = css_text(sort_sheet(current_container(), text))
c.insertText(text)
c.movePosition(c.Start)
c.endEditBlock()