Fix Irish Independent and update bundled cssutils

This commit is contained in:
Kovid Goyal 2009-12-23 08:17:56 -07:00
parent 17202ef371
commit 154466cbbc
9 changed files with 626 additions and 85 deletions

View File

@ -15,6 +15,7 @@ class IrishIndependent(BasicNewsRecipe):
max_articles_per_feed = 100
remove_tags_before = dict(id='article')
remove_tags_after = [dict(name='div', attrs={'class':'toolsBottom'})]
no_stylesheets = True
remove_tags = [
dict(name='div',attrs={'class':'toolsBottom'}),
dict(name='div',attrs={'class':'toolsTop'}),

View File

@ -412,7 +412,7 @@ def check_isbn13(isbn):
return None
def check_isbn(isbn):
isbn = re.sub(r'[^0-9X]', '', isbn).upper()
isbn = re.sub(r'[^0-9X]', '', isbn.upper())
if len(isbn) == 10:
return check_isbn10(isbn)
if len(isbn) == 13:

View File

@ -17,7 +17,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
A Python package to parse and build CSS Cascading Style Sheets. DOM only, not any rendering facilities!
A Python package to parse and build CSS Cascading Style Sheets. DOM only, not
any rendering facilities!
Based upon and partly implementing the following specifications :
@ -26,30 +27,47 @@ Based upon and partly implementing the following specifications :
`CSS 2.1 Errata <http://www.w3.org/Style/css2-updates/CR-CSS21-20070719-errata.html>`__
A few errata, mainly the definition of CHARSET_SYM tokens
`CSS3 Module: Syntax <http://www.w3.org/TR/css3-syntax/>`__
Used in parts since cssutils 0.9.4. cssutils tries to use the features from CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some parts are from CSS 2.1
Used in parts since cssutils 0.9.4. cssutils tries to use the features from
CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some
parts are from CSS 2.1
`MediaQueries <http://www.w3.org/TR/css3-mediaqueries/>`__
MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in @import and @media rules.
MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in
@import and @media rules.
`Namespaces <http://dev.w3.org/csswg/css3-namespace/>`__
Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5 for dev version
Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5
for dev version
`Selectors <http://www.w3.org/TR/css3-selectors/>`__
The selector syntax defined here (and not in CSS 2.1) should be parsable with cssutils (*should* mind though ;) )
The selector syntax defined here (and not in CSS 2.1) should be parsable
with cssutils (*should* mind though ;) )
`DOM Level 2 Style CSS <http://www.w3.org/TR/DOM-Level-2-Style/css.html>`__
DOM for package css
`DOM Level 2 Style Stylesheets <http://www.w3.org/TR/DOM-Level-2-Style/stylesheets.html>`__
DOM for package stylesheets
`CSSOM <http://dev.w3.org/csswg/cssom/>`__
A few details (mainly the NamespaceRule DOM) is taken from here. Plan is to move implementation to the stuff defined here which is newer but still no REC so might change anytime...
A few details (mainly the NamespaceRule DOM) is taken from here. Plan is
to move implementation to the stuff defined here which is newer but still
no REC so might change anytime...
The cssutils tokenizer is a customized implementation of `CSS3 Module: Syntax (W3C Working Draft 13 August 2003) <http://www.w3.org/TR/css3-syntax/>`__ which itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as possible but uses some (helpful) parts of the CSS 2.1 tokenizer.
The cssutils tokenizer is a customized implementation of `CSS3 Module: Syntax
(W3C Working Draft 13 August 2003) <http://www.w3.org/TR/css3-syntax/>`__ which
itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as
possible but uses some (helpful) parts of the CSS 2.1 tokenizer.
I guess cssutils is neither CSS 2.1 nor CSS 3 compliant but tries to at least be able to parse both grammars including some more real world cases (some CSS hacks are actually parsed and serialized). Both official grammars are not final nor bugfree but still feasible. cssutils aim is not to be fully compliant to any CSS specification (the specifications seem to be in a constant flow anyway) but cssutils *should* be able to read and write as many as possible CSS stylesheets "in the wild" while at the same time implement the official APIs which are well documented. Some minor extensions are provided as well.
I guess cssutils is neither CSS 2.1 nor CSS 3 compliant but tries to at least
be able to parse both grammars including some more real world cases (some CSS
hacks are actually parsed and serialized). Both official grammars are not final
nor bugfree but still feasible. cssutils aim is not to be fully compliant to
any CSS specification (the specifications seem to be in a constant flow anyway)
but cssutils *should* be able to read and write as many as possible CSS
stylesheets "in the wild" while at the same time implement the official APIs
which are well documented. Some minor extensions are provided as well.
Please visit http://cthedot.de/cssutils/ for more details.
Tested with Python 2.5 on Windows Vista mainly.
Tested with Python 2.6 on Windows 7 mainly.
This library may be used ``from cssutils import *`` which
@ -70,13 +88,16 @@ Usage may be::
__all__ = ['css', 'stylesheets', 'CSSParser', 'CSSSerializer']
__docformat__ = 'restructuredtext'
__author__ = 'Christof Hoeke with contributions by Walter Doerwald'
__date__ = '$LastChangedDate:: 2009-10-17 15:12:28 -0600 #$:'
__date__ = '$LastChangedDate:: 2009-11-26 16:31:32 -0700 #$:'
VERSION = '0.9.7a1'
__version__ = '%s $Id: __init__.py 1877 2009-10-17 21:12:28Z cthedot $' % VERSION
__version__ = '%s $Id: __init__.py 1892 2009-11-26 23:31:32Z cthedot $' % VERSION
import codec
import os.path
import urllib
import urlparse
import xml.dom
# order of imports is important (partly circular)
@ -230,19 +251,22 @@ def getUrls(sheet):
if u is not None:
yield u
def replaceUrls(sheet, replacer):
def replaceUrls(sheet, replacer, ignoreImportRules=False):
"""Replace all URLs in :class:`cssutils.css.CSSImportRule` or
:class:`cssutils.css.CSSValue` objects of given `sheet`.
:param sheet:
:class:`cssutils.css.CSSStyleSheet` which is changed
:param replacer:
a function which is called with a single argument `urlstring` which is
the current value of each url() excluding ``url(`` and ``)`` and
a function which is called with a single argument `urlstring` which
is the current value of each url() excluding ``url(`` and ``)`` and
surrounding single or double quotes.
:param ignoreImportRules:
if ``True`` does not call `replacer` with URLs from @import rules.
"""
for importrule in (r for r in sheet if r.type == r.IMPORT_RULE):
importrule.href = replacer(importrule.href)
if not ignoreImportRules:
for importrule in (r for r in sheet if r.type == r.IMPORT_RULE):
importrule.href = replacer(importrule.href)
def setProperty(v):
if v.CSS_PRIMITIVE_VALUE == v.cssValueType and\
@ -273,7 +297,7 @@ def resolveImports(sheet, target=None):
@import rules which use media information are tried to be wrapped into
@media rules so keeping the media information. This may not work in
all instances (if e.g. an @import rule itself contains an @import rule
with different media infos or if it is contains rules which may not be
with different media infos or if it contains rules which may not be
used inside an @media block like @namespace rules.). In these cases
the @import rule is kept as in the original sheet and a WARNING is issued.
@ -281,43 +305,110 @@ def resolveImports(sheet, target=None):
in this given :class:`cssutils.css.CSSStyleSheet` all import rules are
resolved and added to a resulting *flat* sheet.
:param target:
A :class:`cssutils.css.CSSStyleSheet` object which will be the resulting
*flat* sheet if given
:returns: given `target` or a new :class:`cssutils.css.CSSStyleSheet` object
A :class:`cssutils.css.CSSStyleSheet` object which will be the
resulting *flat* sheet if given
:returns: given `target` or a new :class:`cssutils.css.CSSStyleSheet`
object
"""
if not target:
target = css.CSSStyleSheet()
target = css.CSSStyleSheet(href=sheet.href,
media=sheet.media,
title=sheet.title)
def getReplacer(targetbase):
"Return a replacer which uses base to return adjusted URLs"
basesch, baseloc, basepath, basequery, basefrag = urlparse.urlsplit(targetbase)
basepath, basepathfilename = os.path.split(basepath)
def replacer(url):
scheme, location, path, query, fragment = urlparse.urlsplit(url)
if not scheme and not location and not path.startswith(u'/'):
# relative
path, filename = os.path.split(path)
combined = os.path.normpath(os.path.join(basepath, path, filename))
return urllib.pathname2url(combined)
else:
# keep anything absolute
return url
return replacer
#target.add(css.CSSComment(cssText=u'/* START %s */' % sheet.href))
for rule in sheet.cssRules:
if rule.type == rule.CHARSET_RULE:
pass
elif rule.type == rule.IMPORT_RULE:
log.info(u'Processing @import %r' % rule.href, neverraise=True)
if rule.styleSheet:
target.add(css.CSSComment(cssText=u'/* START @import "%s" */' % rule.href))
if rule.media.mediaText == 'all':
t = target
else:
log.info(u'Replacing @import media with @media: %s' %
rule.media.mediaText, neverraise=True)
t = css.CSSMediaRule(rule.media.mediaText)
# add all rules of @import to current sheet
target.add(css.CSSComment(cssText=u'/* START @import "%s" */'
% rule.href))
try:
resolveImports(rule.styleSheet, t)
# nested imports
importedSheet = resolveImports(rule.styleSheet)
except xml.dom.HierarchyRequestErr, e:
log.warn(u'Cannot resolve @import: %s' %
e, neverraise=True)
log.warn(u'@import: Cannot resolve target, keeping rule: %s'
% e, neverraise=True)
target.add(rule)
else:
if t != target:
target.add(t)
t.add(css.CSSComment(cssText=u'/* END "%s" */' % rule.href))
# adjust relative URI references
log.info(u'@import: Adjusting paths for %r' % rule.href,
neverraise=True)
replaceUrls(importedSheet,
getReplacer(rule.href),
ignoreImportRules=True)
# might have to wrap rules in @media if media given
if rule.media.mediaText == u'all':
mediaproxy = None
else:
keepimport = False
for r in importedSheet:
# check if rules present which may not be
# combined with media
if r.type not in (r.COMMENT,
r.STYLE_RULE,
r.IMPORT_RULE):
keepimport = True
break
if keepimport:
log.warn(u'Cannot combine imported sheet with'
u' given media as other rules then'
u' comments or stylerules found %r,'
u' keeping %r' % (r,
rule.cssText),
neverraise=True)
target.add(rule)
continue
# wrap in @media if media is not `all`
log.info(u'@import: Wrapping some rules in @media '
u' to keep media: %s'
% rule.media.mediaText, neverraise=True)
mediaproxy = css.CSSMediaRule(rule.media.mediaText)
for r in importedSheet:
if mediaproxy:
mediaproxy.add(r)
else:
# add to top sheet directly but are difficult anyway
target.add(r)
if mediaproxy:
target.add(mediaproxy)
else:
log.error(u'Cannot get referenced stylesheet %r' %
rule.href, neverraise=True)
# keep @import as it is
log.error(u'Cannot get referenced stylesheet %r, keeping rule'
% rule.href, neverraise=True)
target.add(rule)
else:
target.add(rule)
return target

View File

@ -51,7 +51,7 @@ TODO:
"""
__all__ = ['CSSStyleDeclaration', 'Property']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssstyledeclaration.py 1870 2009-10-17 19:56:59Z cthedot $'
__version__ = '$Id: cssstyledeclaration.py 1879 2009-11-17 20:35:04Z cthedot $'
from cssproperties import CSS2Properties
from property import Property
@ -581,6 +581,9 @@ class CSSStyleDeclaration(CSS2Properties, cssutils.util.Base2):
if isinstance(name, Property):
newp = name
name = newp.literalname
elif not value:
# empty string or None effectively removed property
return self.removeProperty(name)
else:
newp = Property(name, value, priority)
if not newp.wellformed:

View File

@ -1,7 +1,7 @@
"""CSSUnknownRule implements DOM Level 2 CSS CSSUnknownRule."""
__all__ = ['CSSUnknownRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssunknownrule.py 1638 2009-01-13 20:39:33Z cthedot $'
__version__ = '$Id: cssunknownrule.py 1897 2009-12-17 22:09:06Z cthedot $'
import cssrule
import cssutils
@ -98,6 +98,20 @@ class CSSUnknownRule(cssrule.CSSRule):
token=token)
return expected
def FUNCTION(expected, seq, token, tokenizer=None):
# handled as opening (
type_, val, line, col = token
val = self._tokenvalue(token)
if expected != 'EOF':
new['nesting'].append(u'(')
seq.append(val, type_, line=line, col=col)
return expected
else:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Expected end of rule.',
token=token)
return expected
def EOF(expected, seq, token, tokenizer=None):
"close all blocks and return 'EOF'"
for x in reversed(new['nesting']):
@ -154,6 +168,7 @@ class CSSUnknownRule(cssrule.CSSRule):
seq=newseq, tokenizer=tokenizer,
productions={'CHAR': CHAR,
'EOF': EOF,
'FUNCTION': FUNCTION,
'INVALID': INVALID,
'STRING': STRING,
'URI': URI,

View File

@ -1,7 +1,7 @@
"""Property is a single CSS property in a CSSStyleDeclaration."""
__all__ = ['Property']
__docformat__ = 'restructuredtext'
__version__ = '$Id: property.py 1868 2009-10-17 19:36:54Z cthedot $'
__version__ = '$Id: property.py 1878 2009-11-17 20:16:26Z cthedot $'
from cssutils.helper import Deprecated
from cssvalue import CSSValue
@ -43,7 +43,7 @@ class Property(cssutils.util.Base):
;
"""
def __init__(self, name=None, value=None, priority=u'',
def __init__(self, name=None, value=None, priority=u'',
_mediaQuery=False, parent=None):
"""
:param name:
@ -56,7 +56,7 @@ class Property(cssutils.util.Base):
:param _mediaQuery:
if ``True`` value is optional (used by MediaQuery)
:param parent:
the parent object, normally a
the parent object, normally a
:class:`cssutils.css.CSSStyleDeclaration`
"""
super(Property, self).__init__()
@ -68,19 +68,16 @@ class Property(cssutils.util.Base):
self.__nametoken = None
self._name = u''
self._literalname = u''
self.seqs[1] = CSSValue(parent=self)
if name:
self.name = name
if value:
self.cssValue = value
else:
self.seqs[1] = CSSValue(parent=self)
self._priority = u''
self._literalpriority = u''
if priority:
self.priority = priority
def __repr__(self):
return "cssutils.css.%s(name=%r, value=%r, priority=%r)" % (
self.__class__.__name__,
@ -143,12 +140,12 @@ class Property(cssutils.util.Base):
self._log.error(u'Property: No property value found: %r.' %
self._valuestr(cssText), colontoken)
if wellformed:
if wellformed:
self.wellformed = True
self.name = nametokens
self.cssValue = valuetokens
self.priority = prioritytokens
# also invalid values are set!
self.validate()
@ -161,7 +158,7 @@ class Property(cssutils.util.Base):
def _setName(self, name):
"""
:exceptions:
:exceptions:
- :exc:`~xml.dom.SyntaxErr`:
Raised if the specified name has a syntax error and is
unparsable.
@ -223,7 +220,7 @@ class Property(cssutils.util.Base):
name = property(lambda self: self._name, _setName,
doc="Name of this property.")
literalname = property(lambda self: self._literalname,
doc="Readonly literal (not normalized) name "
"of this property")
@ -246,14 +243,14 @@ class Property(cssutils.util.Base):
if self._mediaQuery and not cssText:
self.seqs[1] = CSSValue(parent=self)
else:
#if not self.seqs[1]:
# self.seqs[1] = CSSValue(parent=self)
self.seqs[1] = CSSValue(parent=self)
oldvalue = self.seqs[1].cssText
try:
self.seqs[1].cssText = cssText
except:
self.seqs[1].cssText = oldvalue
raise
self.seqs[1].cssText = cssText
self.wellformed = self.wellformed and self.seqs[1].wellformed
# self.valid = self.valid and self.cssValue.valid
cssValue = property(_getCSSValue, _setCSSValue,
doc="(cssutils) CSSValue object of this property")
@ -368,11 +365,11 @@ class Property(cssutils.util.Base):
def validate(self):
"""Validate value against `profiles` which are checked dynamically.
properties in e.g. @font-face rules are checked against
``cssutils.profile.CSS3_FONT_FACE`` only.
properties in e.g. @font-face rules are checked against
``cssutils.profile.CSS3_FONT_FACE`` only.
For each of the following cases a message is reported:
- INVALID (so the property is known but not valid)
``ERROR Property: Invalid value for "{PROFILE-1[/PROFILE-2...]"
property: ...``
@ -385,46 +382,46 @@ class Property(cssutils.util.Base):
``DEBUG Found valid "{PROFILE-1[/PROFILE-2...]" property...``
- UNKNOWN property
``WARNING Unknown Property name...`` is issued
``WARNING Unknown Property name...`` is issued
so for example::
cssutils.log.setLevel(logging.DEBUG)
parser = cssutils.CSSParser()
s = parser.parseString('''body {
s = parser.parseString('''body {
unknown-property: x;
color: 4;
color: rgba(1,2,3,4);
color: red
color: red
}''')
# Log output:
WARNING Property: Unknown Property name. [2:9: unknown-property]
ERROR Property: Invalid value for "CSS Color Module Level 3/CSS Level 2.1" property: 4 [3:9: color]
DEBUG Property: Found valid "CSS Color Module Level 3" value: rgba(1, 2, 3, 4) [4:9: color]
DEBUG Property: Found valid "CSS Level 2.1" value: red [5:9: color]
and when setting an explicit default profile::
cssutils.profile.defaultProfiles = cssutils.profile.CSS_LEVEL_2
s = parser.parseString('''body {
s = parser.parseString('''body {
unknown-property: x;
color: 4;
color: rgba(1,2,3,4);
color: red
color: red
}''')
# Log output:
WARNING Property: Unknown Property name. [2:9: unknown-property]
ERROR Property: Invalid value for "CSS Color Module Level 3/CSS Level 2.1" property: 4 [3:9: color]
WARNING Property: Not valid for profile "CSS Level 2.1" but valid "CSS Color Module Level 3" value: rgba(1, 2, 3, 4) [4:9: color]
DEBUG Property: Found valid "CSS Level 2.1" value: red [5:9: color]
DEBUG Property: Found valid "CSS Level 2.1" value: red [5:9: color]
"""
valid = False
profiles = None
try:
# if @font-face use that profile
@ -434,7 +431,7 @@ class Property(cssutils.util.Base):
#TODO: same for @page
except AttributeError:
pass
if self.name and self.value:
if self.name in cssutils.profile.knownNames:
@ -443,14 +440,14 @@ class Property(cssutils.util.Base):
cssutils.profile.validateWithProfile(self.name,
self.value,
profiles)
if not valid:
self._log.error(u'Property: Invalid value for '
u'"%s" property: %s'
% (u'/'.join(validprofiles), self.value),
token=self.__nametoken,
neverraise=True)
# TODO: remove logic to profiles!
elif valid and not matching:#(profiles and profiles not in validprofiles):
if not profiles:
@ -464,13 +461,13 @@ class Property(cssutils.util.Base):
token = self.__nametoken,
neverraise=True)
valid = False
elif valid:
self._log.debug(u'Property: Found valid "%s" value: %s'
% (u'/'.join(validprofiles), self.value),
token = self.__nametoken,
neverraise=True)
if self._priority not in (u'', u'important'):
valid = False

428
src/cssutils/sac.py Normal file
View File

@ -0,0 +1,428 @@
#!/usr/bin/env python
"""A validating CSSParser"""
__all__ = ['CSSParser']
__docformat__ = 'restructuredtext'
__version__ = '$Id: parse.py 1754 2009-05-30 14:50:13Z cthedot $'
import helper
import codecs
import errorhandler
import os
import tokenize2
import urllib
import sys
class ErrorHandler(object):
"""Basic class for CSS error handlers.
This class class provides a default implementation ignoring warnings and
recoverable errors and throwing a SAXParseException for fatal errors.
If a CSS application needs to implement customized error handling, it must
extend this class and then register an instance with the CSS parser
using the parser's setErrorHandler method. The parser will then report all
errors and warnings through this interface.
The parser shall use this class instead of throwing an exception: it is
up to the application whether to throw an exception for different types of
errors and warnings. Note, however, that there is no requirement that the
parser continue to provide useful information after a call to fatalError
(in other words, a CSS driver class could catch an exception and report a
fatalError).
"""
def __init__(self):
self._log = errorhandler.ErrorHandler()
def error(self, exception, token=None):
self._log.error(exception, token, neverraise=True)
def fatal(self, exception, token=None):
self._log.fatal(exception, token)
def warn(self, exception, token=None):
self._log.warn(exception, token, neverraise=True)
class DocumentHandler(object):
"""
void endFontFace()
Receive notification of the end of a font face statement.
void endMedia(SACMediaList media)
Receive notification of the end of a media statement.
void endPage(java.lang.String name, java.lang.String pseudo_page)
Receive notification of the end of a media statement.
void importStyle(java.lang.String uri, SACMediaList media, java.lang.String defaultNamespaceURI)
Receive notification of a import statement in the style sheet.
void startFontFace()
Receive notification of the beginning of a font face statement.
void startMedia(SACMediaList media)
Receive notification of the beginning of a media statement.
void startPage(java.lang.String name, java.lang.String pseudo_page)
Receive notification of the beginning of a page statement.
"""
def __init__(self):
def log(msg):
sys.stderr.write('INFO\t%s\n' % msg)
self._log = log
def comment(self, text, line=None, col=None):
"Receive notification of a comment."
self._log("comment %r at [%s, %s]" % (text, line, col))
def startDocument(self, encoding):
"Receive notification of the beginning of a style sheet."
# source
self._log("startDocument encoding=%s" % encoding)
def endDocument(self, source=None, line=None, col=None):
"Receive notification of the end of a document."
self._log("endDocument EOF")
def importStyle(self, uri, media, name, line=None, col=None):
"Receive notification of a import statement in the style sheet."
# defaultNamespaceURI???
self._log("importStyle at [%s, %s]" % (line, col))
def namespaceDeclaration(self, prefix, uri, line=None, col=None):
"Receive notification of an unknown rule t-rule not supported by this parser."
# prefix might be None!
self._log("namespaceDeclaration at [%s, %s]" % (line, col))
def startSelector(self, selectors=None, line=None, col=None):
"Receive notification of the beginning of a rule statement."
# TODO selectorList!
self._log("startSelector at [%s, %s]" % (line, col))
def endSelector(self, selectors=None, line=None, col=None):
"Receive notification of the end of a rule statement."
self._log("endSelector at [%s, %s]" % (line, col))
def property(self, name, value='TODO', important=False, line=None, col=None):
"Receive notification of a declaration."
# TODO: value is LexicalValue?
self._log("property %r at [%s, %s]" % (name, line, col))
def ignorableAtRule(self, atRule, line=None, col=None):
"Receive notification of an unknown rule t-rule not supported by this parser."
self._log("ignorableAtRule %r at [%s, %s]" % (atRule, line, col))
class EchoHandler(DocumentHandler):
"Echos all input to property `out`"
def __init__(self):
super(EchoHandler, self).__init__()
self._out = []
out = property(lambda self: u''.join(self._out))
def startDocument(self, encoding):
super(EchoHandler, self).startDocument(encoding)
if u'utf-8' != encoding:
self._out.append(u'@charset "%s";\n' % encoding)
# def comment(self, text, line=None, col=None):
# self._out.append(u'/*%s*/' % text)
def importStyle(self, uri, media, name, line=None, col=None):
"Receive notification of a import statement in the style sheet."
# defaultNamespaceURI???
super(EchoHandler, self).importStyle(uri, media, name, line, col)
self._out.append(u'@import %s%s%s;\n' % (helper.string(uri),
u'%s ' % media if media else u'',
u'%s ' % name if name else u'')
)
def namespaceDeclaration(self, prefix, uri, line=None, col=None):
super(EchoHandler, self).namespaceDeclaration(prefix, uri, line, col)
self._out.append(u'@namespace %s%s;\n' % (u'%s ' % prefix if prefix else u'',
helper.string(uri)))
def startSelector(self, selectors=None, line=None, col=None):
super(EchoHandler, self).startSelector(selectors, line, col)
if selectors:
self._out.append(u', '.join(selectors))
self._out.append(u' {\n')
def endSelector(self, selectors=None, line=None, col=None):
self._out.append(u' }')
def property(self, name, value, important=False, line=None, col=None):
super(EchoHandler, self).property(name, value, line, col)
self._out.append(u' %s: %s%s;\n' % (name, value,
u' !important' if important else u''))
class Parser(object):
"""
java.lang.String getParserVersion()
Returns a string about which CSS language is supported by this parser.
boolean parsePriority(InputSource source)
Parse a CSS priority value (e.g.
LexicalUnit parsePropertyValue(InputSource source)
Parse a CSS property value.
void parseRule(InputSource source)
Parse a CSS rule.
SelectorList parseSelectors(InputSource source)
Parse a comma separated list of selectors.
void parseStyleDeclaration(InputSource source)
Parse a CSS style declaration (without '{' and '}').
void parseStyleSheet(InputSource source)
Parse a CSS document.
void parseStyleSheet(java.lang.String uri)
Parse a CSS document from a URI.
void setConditionFactory(ConditionFactory conditionFactory)
void setDocumentHandler(DocumentHandler handler)
Allow an application to register a document event handler.
void setErrorHandler(ErrorHandler handler)
Allow an application to register an error event handler.
void setLocale(java.util.Locale locale)
Allow an application to request a locale for errors and warnings.
void setSelectorFactory(SelectorFactory selectorFactory)
"""
def __init__(self, documentHandler=None, errorHandler=None):
self._tokenizer = tokenize2.Tokenizer()
if documentHandler:
self.setDocumentHandler(documentHandler)
else:
self.setDocumentHandler(DocumentHandler())
if errorHandler:
self.setErrorHandler(errorHandler)
else:
self.setErrorHandler(ErrorHandler())
def parseString(self, cssText, encoding=None):
if isinstance(cssText, str):
cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0]
tokens = self._tokenizer.tokenize(cssText, fullsheet=True)
def COMMENT(val, line, col):
self._handler.comment(val[2:-2], line, col)
def EOF(val, line, col):
self._handler.endDocument(val, line, col)
def simple(t):
map = {'COMMENT': COMMENT,
'S': lambda val, line, col: None,
'EOF': EOF}
type_, val, line, col = t
if type_ in map:
map[type_](val, line, col)
return True
else:
return False
# START PARSING
t = tokens.next()
type_, val, line, col = t
encoding = 'utf-8'
if 'CHARSET_SYM' == type_:
# @charset "encoding";
# S
encodingtoken = tokens.next()
semicolontoken = tokens.next()
if 'STRING' == type_:
encoding = helper.stringvalue(val)
# ;
if 'STRING' == encodingtoken[0] and semicolontoken:
encoding = helper.stringvalue(encodingtoken[1])
else:
self._errorHandler.fatal(u'Invalid @charset')
t = tokens.next()
type_, val, line, col = t
self._handler.startDocument(encoding)
while True:
start = (line, col)
try:
if simple(t):
pass
elif 'ATKEYWORD' == type_ or type_ in ('PAGE_SYM', 'MEDIA_SYM', 'FONT_FACE_SYM'):
atRule = [val]
braces = 0
while True:
# read till end ;
# TODO: or {}
t = tokens.next()
type_, val, line, col = t
atRule.append(val)
if u';' == val and not braces:
break
elif u'{' == val:
braces += 1
elif u'}' == val:
braces -= 1
if braces == 0:
break
self._handler.ignorableAtRule(u''.join(atRule), *start)
elif 'IMPORT_SYM' == type_:
# import URI or STRING media? name?
uri, media, name = None, None, None
while True:
t = tokens.next()
type_, val, line, col = t
if 'STRING' == type_:
uri = helper.stringvalue(val)
elif 'URI' == type_:
uri = helper.urivalue(val)
elif u';' == val:
break
if uri:
self._handler.importStyle(uri, media, name)
else:
self._errorHandler.error(u'Invalid @import'
u' declaration at %r'
% (start,))
elif 'NAMESPACE_SYM' == type_:
prefix, uri = None, None
while True:
t = tokens.next()
type_, val, line, col = t
if 'IDENT' == type_:
prefix = val
elif 'STRING' == type_:
uri = helper.stringvalue(val)
elif 'URI' == type_:
uri = helper.urivalue(val)
elif u';' == val:
break
if uri:
self._handler.namespaceDeclaration(prefix, uri, *start)
else:
self._errorHandler.error(u'Invalid @namespace'
u' declaration at %r'
% (start,))
else:
# CSSSTYLERULE
selector = []
selectors = []
while True:
# selectors[, selector]* {
if 'S' == type_:
selector.append(u' ')
elif simple(t):
pass
elif u',' == val:
selectors.append(u''.join(selector).strip())
selector = []
elif u'{' == val:
selectors.append(u''.join(selector).strip())
self._handler.startSelector(selectors, *start)
break
else:
selector.append(val)
t = tokens.next()
type_, val, line, col = t
end = None
while True:
# name: value [!important][;name: value [!important]]*;?
name, value, important = None, [], False
while True:
# name:
t = tokens.next()
type_, val, line, col = t
if 'S' == type_:
pass
elif simple(t):
pass
elif 'IDENT' == type_:
if name:
self._errorHandler.error('more than one property name', t)
else:
name = val
elif u':' == val:
if not name:
self._errorHandler.error('no property name', t)
break
elif u';' == val:
self._errorHandler.error('premature end of property', t)
end = val
break
elif u'}' == val:
if name:
self._errorHandler.error('premature end of property', t)
end = val
break
else:
self._errorHandler.error('unexpected property name token %r' % val, t)
while not u';' == end and not u'}' == end:
# value !;}
t = tokens.next()
type_, val, line, col = t
if 'S' == type_:
value.append(u' ')
elif simple(t):
pass
elif u'!' == val or u';' == val or u'}' == val:
value = ''.join(value).strip()
if not value:
self._errorHandler.error('premature end of property (no value)', t)
end = val
break
else:
value.append(val)
while u'!' == end:
# !important
t = tokens.next()
type_, val, line, col = t
if simple(t):
pass
elif u'IDENT' == type_ and not important:
important = True
elif u';' == val or u'}' == val:
end = val
break
else:
self._errorHandler.error('unexpected priority token %r' % val)
if name and value:
self._handler.property(name, value, important)
if u'}' == end:
self._handler.endSelector(selectors, line=line, col=col)
break
else:
# reset
end = None
else:
self._handler.endSelector(selectors, line=line, col=col)
t = tokens.next()
type_, val, line, col = t
except StopIteration:
break
def setDocumentHandler(self, handler):
"Allow an application to register a document event `handler`."
self._handler = handler
def setErrorHandler(self, handler):
"TODO"
self._errorHandler = handler

View File

@ -307,7 +307,7 @@ class CSSCapture(object):
sf.write(sheet.cssText)
sf.close()
def csscombine(path=None, url=None,
def csscombine(path=None, url=None,
sourceencoding=None, targetencoding=None,
minify=True):
"""Combine sheets referred to by @import rules in given CSS proxy sheet
@ -318,6 +318,8 @@ def csscombine(path=None, url=None,
`path` or `url`
path or URL to a CSSStyleSheet which imports other sheets which
are then combined into one sheet
`sourceencoding`
explicit encoding of the source proxysheet, default 'utf-8'
`targetencoding`
encoding of the combined stylesheet, default 'utf-8'
`minify`
@ -348,6 +350,5 @@ def csscombine(path=None, url=None,
cssutils.setSerializer(oldser)
else:
cssText = result.cssText
return cssText
return cssText

View File

@ -3,7 +3,7 @@
"""cssutils serializer"""
__all__ = ['CSSSerializer', 'Preferences']
__docformat__ = 'restructuredtext'
__version__ = '$Id: serialize.py 1872 2009-10-17 21:00:40Z cthedot $'
__version__ = '$Id: serialize.py 1898 2009-12-19 12:17:04Z cthedot $'
import codecs
import cssutils
@ -191,7 +191,6 @@ class Out(object):
add ``*spacer`` except ``space=False``
"""
prefspace = self.ser.prefs.spacer
if val or typ in ('STRING', 'URI'):
# PRE
if 'COMMENT' == typ:
@ -230,7 +229,10 @@ class Out(object):
if indent:
self.out.append(self.ser._indentblock(val, self.ser._level+1))
else:
if val.endswith(u' '):
self._remove_last_if_S()
self.out.append(val)
# POST
if lineSeparator:
# Property , ...
@ -238,6 +240,9 @@ class Out(object):
elif val in u'+>~': # enclose selector combinator
self.out.insert(-1, self.ser.prefs.selectorCombinatorSpacer)
self.out.append(self.ser.prefs.selectorCombinatorSpacer)
elif u')' == val and not keepS: # CHAR funcend
# TODO: pref?
self.out.append(u' ')
elif u',' == val: # list
self.out.append(self.ser.prefs.listItemSpacer)
elif u':' == val: # prop