IGN:Initial framework for html2epub

This commit is contained in:
Kovid Goyal 2008-08-21 15:48:32 -07:00
parent 105a809feb
commit 6b6b18e771
48 changed files with 11670 additions and 5 deletions

View File

@ -15,6 +15,7 @@ class ConversionError(Exception):
class UnknownFormatError(Exception):
pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'epub', 'pdf', 'prc', 'mobi', 'azw',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz']

View File

@ -6,3 +6,48 @@ __docformat__ = 'restructuredtext en'
'''
Conversion to EPUB.
'''
import sys
from calibre.utils.config import Config, StringConfig
def config(defaults=None):
desc = _('Options to control the conversion to EPUB')
if defaults is None:
c = Config('epub', desc)
else:
c = StringConfig(defaults, desc)
c.add_opt('output', ['-o', '--output'], default=None,
help=_('The output EPUB file. If not specified, it is derived from the input file name.'))
c.add_opt('encoding', ['--encoding'], default=None,
help=_('Character encoding for HTML files. Default is to auto detect.'))
metadata = c.add_group('metadata', _('Set metadata of the generated ebook'))
metadata('title', ['-t', '--title'], default=None,
help=_('Set the title. Default is to autodetect.'))
metadata('authors', ['-a', '--authors'], default=_('Unknown'),
help=_('The author(s) of the ebook, as a comma separated list.'))
traversal = c.add_group('traversal', _('Control the following of links in HTML files.'))
traversal('breadth_first', ['--breadth-first'], default=False,
help=_('Traverse links in HTML files breadth first. Normally, they are traversed depth first'))
traversal('max_levels', ['--max-levels'], default=sys.getrecursionlimit(), group='traversal',
help=_('Maximum levels of recursion when following links in HTML files. Must be non-negative. 0 implies that no links in the root HTML file are followed.'))
structure = c.add_group('structure detection', _('Control auto-detection of document structure.'))
structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]",
help=_('''\
An XPath expression to detect chapter titles. The default is to consider <h1> or
<h2> tags that contain the text "chapter" or "book" or "section" as chapter titles. This
is achieved by the expression: "//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]"
The expression used must evaluate to a list of elements. To disable chapter detection,
use the expression "/".
''').replace('\n', ' '))
structure('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
help=_('Don\'t add detected chapters to the Table of Contents'))
structure('no_links_in_toc', ['--no-links-in-toc'], default=False,
help=_('Don\'t add links in the root HTML file to the Table of Contents'))
debug = c.add_group('debug', _('Options useful for debugging'))
debug('verbose', ['-v', '--verbose'], default=0, action='count',
help=_('Be more verbose while processing. Can be specified multiple times to increase verbosity.'))
return c

View File

@ -0,0 +1,212 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import os, sys, logging
from lxml import html
from lxml.etree import XPath
get_text = XPath("//text()")
from calibre import LoggingInterface
from calibre.ebooks.html import PreProcessor
from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.traverse import traverse, opf_traverse
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.metadata.opf import OPFReader
from calibre.ptempfile import PersistentTemporaryDirectory
class HTMLProcessor(PreProcessor, LoggingInterface):
ENCODING_PATS = [re.compile(r'<[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE),
re.compile(r'<meta.*?content=[\'"].*?charset=([^\s\'"]+).*?[\'"].*?>', re.IGNORECASE)]
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles):
LoggingInterface.__init__(self, logging.getLogger('html2epub'))
self.htmlfile = htmlfile
self.opts = opts
self.tdir = tdir
self.resource_map = resource_map
self.resource_dir = os.path.join(tdir, 'resources')
self.htmlfiles = htmlfiles
self.parse_html()
self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
self.rewrite_links(htmlfiles)
self.extract_css()
self.collect_font_statistics()
self.split()
def parse_html(self):
''' Create lxml ElementTree from HTML '''
src = open(self.htmlfile.path, 'rb').decode(self.htmlfile.encoding, 'replace')
src = self.preprocess(src)
# lxml chokes on unicode input when it contains encoding declarations
for pat in self.ENCODING_PATS:
src = pat.sub('', src)
try:
self.root = html.document_fromstring(src)
except:
if self.opts.verbose:
self.log_exception('lxml based parsing failed')
self.root = html.soupparser.fromstring()
self.head = self.body = None
head = self.root.xpath('//head')
if head:
self.head = head[0]
body = self.root.xpath('//body')
if body:
self.body = body[0]
self.detected_chapters = self.opts.chapter(self.root)
def rewrite_links(self, olink):
link = self.htmlfile.resolve(olink)
if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path):
return olink
if link.path in self.htmlfiles:
return os.path.basename(link.path)
if link.path in self.resource_map.keys():
return self.resource_map[]
name = os.path.basename(link.path)
name, ext = os.path.splitext(name)
name += ('_%d'%len(self.resource_map)) + ext
shutil.copyfile(link.path, os.path.join(self.resource_dir, name))
name = 'resources/'+name
self.resource_map[link.path] = name
return name
def extract_css(self):
css = []
for link in self.root.xpath('//link'):
if 'css' in link.get('type', 'text/css').lower():
file = self.htmlfile.resolve(link.get('href', ''))
if os.path.exists(file) and os.path.isfile(file):
css.append(open(file, 'rb').read().decode('utf-8'))
link.getparent().remove(link)
for style in self.root.xpath('//style'):
if 'css' in style.get('type', 'text/css').lower():
css.append('\n'.join(get_text(style)))
style.getparent().remove(style)
css_counter = 1
for elem in self.root.xpath('//*[@style]'):
if 'id' not in elem.keys():
elem['id'] = 'calibre_css_id_%d'%css_counter
css_counter += 1
css.append('#%s {%s}'%(elem['id'], elem['style']))
elem.attrib.pop('style')
chapter_counter = 1
for chapter in self.detected_chapters:
if chapter.tag.lower() == 'a':
if 'name' in chapter.keys():
chapter['id'] = id = chapter['name']
elif 'id' in chapter.keys():
id = chapter['id']
else:
id = 'calibre_detected_chapter_%d'%chapter_counter
chapter_counter += 1
chapter['id'] = id
else:
if 'id' not in chapter.keys():
id = 'calibre_detected_chapter_%d'%chapter_counter
chapter_counter += 1
chapter['id'] = id
css.append('#%s {%s}'%(id, 'page-break-before:always'))
self.raw_css = '\n\n'.join(css)
# TODO: Figure out what to do about CSS imports from linked stylesheets
def collect_font_statistics(self):
'''
Collect font statistics to figure out the base font size used in this
HTML document.
'''
self.font_statistics = {} #: A mapping of font size (in pts) to number of characters rendered at that font size
for text in get_text(self.body if self.body is not None else self.root):
length, parent = len(re.sub(r'\s+', '', text)), text.getparent()
#TODO: Use cssutils on self.raw_css to figure out the font size
# of this piece text and update statistics accordingly
def split(self):
''' Split into individual flows to accommodate Adobe's incompetence '''
# TODO: Split on page breaks, keeping track of anchors (a.name and id)
# and preserving tree structure so that CSS continues to apply
pass
def config():
c = common_config()
return c
def option_parser():
c = config()
return c.option_parser(usage=_('''\
%prog [options] file.html
Convert a HTML file to an EPUB ebook. Follows links in the HTML file.
'''))
def search_for_opf(dir):
for f in os.listdir(dir):
if f.lower().endswith('.opf'):
return OPFReader(open(os.path.join(dir, f), 'rb'), dir)
def parse_content(filelist, opts):
tdir = PersistentTemporaryDirectory('_html2epub')
os.makedirs(os.path.join(tdir, 'content', 'resources'))
resource_map = {}
for htmlfile in filelist:
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), resource_map)
def convert(htmlfile, opts, notification=None):
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
opts.output = os.path.abspath(opts.output)
opf = search_for_opf(os.path.dirname(htmlfile))
if opf:
mi = MetaInformation(opf)
else:
mi = get_metadata(open(htmlfile, 'rb'), 'html')
if opts.title:
mi.title = opts.title
if opts.authors != _('Unknown'):
opts.authors = opts.authors.split(',')
opts.authors = [a.strip() for a in opts.authors]
mi.authors = opts.authors
if not mi.title:
mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
opts.chapter = XPath(opts.chapter,
namespaces={'re':'http://exslt.org/regular-expressions'})
filelist = None
print 'Building file list...'
if opf is not None:
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
if not filelist:
filelist = traverse(htmlfile, verbose=opts.verbose, encoding=opts.encoding)\
[0 if opts.breadth_first else 1]
if opts.verbose:
print '\tFound files...'
for f in filelist:
print '\t\t', f
parse_content(filelist, opts)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print _('You must specify an input HTML file')
return 1
convert(args[1], opts)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -123,9 +123,12 @@ class HTMLFile(object):
url = match.group(i)
if url:
break
link = Link(url, self.base)
link = self.resolve(url)
if link not in self.links:
self.links.append(link)
def resolve(self, url):
return Link(url, self.base)
def depth_first(root, flat, visited=set([])):
@ -152,7 +155,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
Recursively traverse all links in the HTML file.
:param max_levels: Maximum levels of recursion. Must be non-negative. 0
implies that no links in hte root HTML file are followed.
implies that no links in the root HTML file are followed.
:param encoding: Specify character encoding of HTML files. If `None` it is
auto-detected.
:return: A pair of lists (breadth_first, depth_first). Each list contains
@ -186,7 +189,23 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None)
return flat, list(depth_first(flat[0], flat))
def opf_traverse(opf_reader, verbose=0, encoding=None):
'''
Return a list of :class:`HTMLFile` objects in the order specified by the
`<spine>` element of the OPF.
:param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance.
:param encoding: Specify character encoding of HTML files. If `None` it is
auto-detected.
'''
if not opf_reader.spine:
raise ValueError('OPF does not have a spine')
flat = []
for path in opf_reader.spine.items():
if path not in flat:
flat.append(os.path.abspath(path))
flat = [HTMLFile(path, 0, encoding, verbose) for path in flat]
return flat

View File

@ -0,0 +1,74 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
import re
class PreProcessor(object):
PREPROCESS = []
# Fix Baen markup
BAEN = [
(re.compile(r'page-break-before:\s*\w+([\s;\}])', re.IGNORECASE),
lambda match: match.group(1)),
(re.compile(r'<p>\s*(<a id.*?>\s*</a>)\s*</p>', re.IGNORECASE),
lambda match: match.group(1)),
(re.compile(r'<\s*a\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*</a>', re.IGNORECASE),
lambda match: ''),
]
# Fix pdftohtml markup
PDFTOHTML = [
# Remove <hr> tags
(re.compile(r'<hr.*?>', re.IGNORECASE), lambda match: '<span style="page-break-after:always"> </span>'),
# Remove page numbers
(re.compile(r'\d+<br>', re.IGNORECASE), lambda match: ''),
# Remove <br> and replace <br><br> with <p>
(re.compile(r'<br.*?>\s*<br.*?>', re.IGNORECASE), lambda match: '<p>'),
(re.compile(r'(.*)<br.*?>', re.IGNORECASE),
lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40
else match.group(1)),
# Remove hyphenation
(re.compile(r'-\n\r?'), lambda match: ''),
]
# Fix Book Designer markup
BOOK_DESIGNER = [
# HR
(re.compile('<hr>', re.IGNORECASE),
lambda match : '<span style="page-break-after:always"> </span>'),
# Create header tags
(re.compile('<h2[^><]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h1 id="BookTitle" align="%s">%s</h1>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<h2[^><]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?</h2>', re.IGNORECASE),
lambda match : '<h2 id="BookAuthor" align="%s">%s</h2>'%(match.group(2) if match.group(2) else 'center', match.group(3))),
(re.compile('<span[^><]*?id=title[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h2 class="title">%s</h2>'%(match.group(1),)),
(re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
]
def is_baen(self, src):
return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
re.IGNORECASE).search(src) is not None
def is_book_designer(self, raw):
return re.search('<H2[^><]*id=BookTitle', raw) is not None
def is_pdftohtml(self, src):
return src.startswith('<!-- created by calibre\'s pdftohtml -->')
def preprocess(self, html):
if self.is_baen(html):
rules = self.BAEN
elif self.is_book_designer(html):
rules = self.BOOK_DESIGNER
elif self.is_pdftohtml(html):
rules = self.PDFTOHTML
else:
rules = []
for rule in self.PREPROCESS + rules:
html = rule[0].sub(rule[1], html)
return html

View File

@ -25,6 +25,7 @@ entry_points = {
'epub-meta = calibre.ebooks.metadata.epub:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
'html2lrf = calibre.ebooks.lrf.html.convert_from:main',
'html2epub = calibre.ebooks.epub.from_html:main',
'markdown-calibre = calibre.ebooks.markdown.markdown:main',
'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main',
'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main',

View File

@ -13,7 +13,8 @@ from gettext import GNUTranslations
import __builtin__
__builtin__.__dict__['_'] = lambda s: s
from calibre.constants import iswindows, isosx, islinux, isfrozen
from calibre.constants import iswindows, isosx, islinux, isfrozen,\
preferred_encoding
from calibre.translations.msgfmt import make
_run_once = False
@ -146,4 +147,8 @@ if not _run_once:
sys.argv[1:] = winutil.argv()[1-len(sys.argv):]
################################################################################
# Convert command line arguments to unicode
for i in range(1, len(sys.argv)):
if not isinstance(sys.argv[i], unicode):
sys.argv[i] = sys.argv[i].decode(preferred_encoding, 'replace')

View File

@ -8,6 +8,7 @@ Manage application-wide preferences.
'''
import os, re, cPickle, textwrap
from copy import deepcopy
from functools import partial
from optparse import OptionParser as _OptionParser
from optparse import IndentedHelpFormatter
from PyQt4.QtCore import QString
@ -200,6 +201,7 @@ class OptionSet(object):
raise ValueError('A group by the name %s already exists in this set'%name)
self.groups[name] = description
self.group_list.append(name)
return partial(self.add_opt, group=name)
def add_opt(self, name, switches=[], help=None, type=None, choices=None,
group=None, default=None, action=None, metavar=None):
@ -234,7 +236,7 @@ class OptionSet(object):
parser = OptionParser(usage, gui_mode=gui_mode)
groups = defaultdict(lambda : parser)
for group, desc in self.groups.items():
groups[group] = parser.add_group(group, desc)
groups[group] = parser.add_option_group(group.upper(), desc)
for pref in self.preferences:
if not pref.switches:

View File

@ -0,0 +1,254 @@
#!/usr/bin/env python
"""cssutils - CSS Cascading Style Sheets library for Python
Copyright (C) 2004-2008 Christof Hoeke
cssutils is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
A Python package to parse and build CSS Cascading Style Sheets. DOM only, not any rendering facilities!
Based upon and partly implementing the following specifications :
`CSS 2.1 <http://www.w3.org/TR/CSS21/>`__
General CSS rules and properties are defined here
`CSS 2.1 Errata <http://www.w3.org/Style/css2-updates/CR-CSS21-20070719-errata.html>`__
A few errata, mainly the definition of CHARSET_SYM tokens
`CSS3 Module: Syntax <http://www.w3.org/TR/css3-syntax/>`__
Used in parts since cssutils 0.9.4. cssutils tries to use the features from CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some parts are from CSS 2.1
`MediaQueries <http://www.w3.org/TR/css3-mediaqueries/>`__
MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in @import and @media rules.
`Namespaces <http://dev.w3.org/csswg/css3-namespace/>`__
Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5 for dev version
`Selectors <http://www.w3.org/TR/css3-selectors/>`__
The selector syntax defined here (and not in CSS 2.1) should be parsable with cssutils (*should* mind though ;) )
`DOM Level 2 Style CSS <http://www.w3.org/TR/DOM-Level-2-Style/css.html>`__
DOM for package css
`DOM Level 2 Style Stylesheets <http://www.w3.org/TR/DOM-Level-2-Style/stylesheets.html>`__
DOM for package stylesheets
`CSSOM <http://dev.w3.org/csswg/cssom/>`__
A few details (mainly the NamespaceRule DOM) is taken from here. Plan is to move implementation to the stuff defined here which is newer but still no REC so might change anytime...
The cssutils tokenizer is a customized implementation of `CSS3 Module: Syntax (W3C Working Draft 13 August 2003) <http://www.w3.org/TR/css3-syntax/>`__ which itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as possible but uses some (helpful) parts of the CSS 2.1 tokenizer.
I guess cssutils is neither CSS 2.1 nor CSS 3 compliant but tries to at least be able to parse both grammars including some more real world cases (some CSS hacks are actually parsed and serialized). Both official grammars are not final nor bugfree but still feasible. cssutils aim is not to be fully compliant to any CSS specification (the specifications seem to be in a constant flow anyway) but cssutils *should* be able to read and write as many as possible CSS stylesheets "in the wild" while at the same time implement the official APIs which are well documented. Some minor extensions are provided as well.
Please visit http://cthedot.de/cssutils/ for more details.
Tested with Python 2.5 on Windows Vista mainly.
This library may be used ``from cssutils import *`` which
import subpackages ``css`` and ``stylesheets``, CSSParser and
CSSSerializer classes only.
Usage may be::
>>> from cssutils import *
>>> parser = CSSParser()
>>> sheet = parser.parseString(u'a { color: red}')
>>> print sheet.cssText
a {
color: red
}
"""
__all__ = ['css', 'stylesheets', 'CSSParser', 'CSSSerializer']
__docformat__ = 'restructuredtext'
__author__ = 'Christof Hoeke with contributions by Walter Doerwald'
__date__ = '$LastChangedDate:: 2008-08-11 20:11:23 +0200 #$:'
VERSION = '0.9.5.1'
__version__ = '%s $Id: __init__.py 1426 2008-08-11 18:11:23Z cthedot $' % VERSION
import codec
import xml.dom
# order of imports is important (partly circular)
from helper import Deprecated
import errorhandler
log = errorhandler.ErrorHandler()
import util
import css
import stylesheets
from parse import CSSParser
from serialize import CSSSerializer
ser = CSSSerializer()
# used by Selector defining namespace prefix '*'
_ANYNS = -1
class DOMImplementationCSS(object):
"""
This interface allows the DOM user to create a CSSStyleSheet
outside the context of a document. There is no way to associate
the new CSSStyleSheet with a document in DOM Level 2.
This class is its *own factory*, as it is given to
xml.dom.registerDOMImplementation which simply calls it and receives
an instance of this class then.
"""
_features = [
('css', '1.0'),
('css', '2.0'),
('stylesheets', '1.0'),
('stylesheets', '2.0')
]
def createCSSStyleSheet(self, title, media):
"""
Creates a new CSSStyleSheet.
title of type DOMString
The advisory title. See also the Style Sheet Interfaces
section.
media of type DOMString
The comma-separated list of media associated with the new style
sheet. See also the Style Sheet Interfaces section.
returns
CSSStyleSheet: A new CSS style sheet.
TODO: DOMException
SYNTAX_ERR: Raised if the specified media string value has a
syntax error and is unparsable.
"""
return css.CSSStyleSheet(title=title, media=media)
def createDocument(self, *args):
# not needed to HTML, also not for CSS?
raise NotImplementedError
def createDocumentType(self, *args):
# not needed to HTML, also not for CSS?
raise NotImplementedError
def hasFeature(self, feature, version):
return (feature.lower(), unicode(version)) in self._features
xml.dom.registerDOMImplementation('cssutils', DOMImplementationCSS)
def parseString(*a, **k):
return CSSParser().parseString(*a, **k)
parseString.__doc__ = CSSParser.parseString.__doc__
def parseFile(*a, **k):
return CSSParser().parseFile(*a, **k)
parseFile.__doc__ = CSSParser.parseFile.__doc__
def parseUrl(*a, **k):
return CSSParser().parseUrl(*a, **k)
parseUrl.__doc__ = CSSParser.parseUrl.__doc__
@Deprecated('Use cssutils.parseFile() instead.')
def parse(*a, **k):
return parseFile(*a, **k)
parse.__doc__ = CSSParser.parse.__doc__
# set "ser", default serializer
def setSerializer(serializer):
"""
sets the global serializer used by all class in cssutils
"""
global ser
ser = serializer
def getUrls(sheet):
"""
Utility function to get all ``url(urlstring)`` values in
``CSSImportRules`` and ``CSSStyleDeclaration`` objects (properties)
of given CSSStyleSheet ``sheet``.
This function is a generator. The url values exclude ``url(`` and ``)``
and surrounding single or double quotes.
"""
for importrule in (r for r in sheet if r.type == r.IMPORT_RULE):
yield importrule.href
def getUrl(v):
if v.CSS_PRIMITIVE_VALUE == v.cssValueType and\
v.CSS_URI == v.primitiveType:
return v.getStringValue()
def styleDeclarations(base):
"recursive generator to find all CSSStyleDeclarations"
if hasattr(base, 'cssRules'):
for rule in base.cssRules:
for s in styleDeclarations(rule):
yield s
elif hasattr(base, 'style'):
yield base.style
for style in styleDeclarations(sheet):
for p in style.getProperties(all=True):
v = p.cssValue
if v.CSS_VALUE_LIST == v.cssValueType:
for item in v:
u = getUrl(item)
if u is not None:
yield u
elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
u = getUrl(v)
if u is not None:
yield u
def replaceUrls(sheet, replacer):
"""
Utility function to replace all ``url(urlstring)`` values in
``CSSImportRules`` and ``CSSStyleDeclaration`` objects (properties)
of given CSSStyleSheet ``sheet``.
``replacer`` must be a function which is called with a single
argument ``urlstring`` which is the current value of url()
excluding ``url(`` and ``)`` and surrounding single or double quotes.
"""
for importrule in (r for r in sheet if r.type == r.IMPORT_RULE):
importrule.href = replacer(importrule.href)
def setProperty(v):
if v.CSS_PRIMITIVE_VALUE == v.cssValueType and\
v.CSS_URI == v.primitiveType:
v.setStringValue(v.CSS_URI,
replacer(v.getStringValue()))
def styleDeclarations(base):
"recursive generator to find all CSSStyleDeclarations"
if hasattr(base, 'cssRules'):
for rule in base.cssRules:
for s in styleDeclarations(rule):
yield s
elif hasattr(base, 'style'):
yield base.style
for style in styleDeclarations(sheet):
for p in style.getProperties(all=True):
v = p.cssValue
if v.CSS_VALUE_LIST == v.cssValueType:
for item in v:
setProperty(item)
elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
setProperty(v)
if __name__ == '__main__':
print __doc__

View File

@ -0,0 +1,581 @@
#!/usr/bin/env python
"""Python codec for CSS."""
__docformat__ = 'restructuredtext'
__author__ = 'Walter Doerwald'
__version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $'
import codecs, marshal
# We're using bits to store all possible candidate encodings (or variants, i.e.
# we have two bits for the variants of UTF-16 and two for the
# variants of UTF-32).
#
# Prefixes for various CSS encodings
# UTF-8-SIG xEF xBB xBF
# UTF-16 (LE) xFF xFE ~x00|~x00
# UTF-16 (BE) xFE xFF
# UTF-16-LE @ x00 @ x00
# UTF-16-BE x00 @
# UTF-32 (LE) xFF xFE x00 x00
# UTF-32 (BE) x00 x00 xFE xFF
# UTF-32-LE @ x00 x00 x00
# UTF-32-BE x00 x00 x00 @
# CHARSET @ c h a ...
def detectencoding_str(input, final=False):
"""
Detect the encoding of the byte string ``input``, which contains the
beginning of a CSS file. This function returs the detected encoding (or
``None`` if it hasn't got enough data), and a flag that indicates whether
to encoding has been detected explicitely or implicitely. To detect the
encoding the first few bytes are used (or if ``input`` is ASCII compatible
and starts with a charset rule the encoding name from the rule). "Explicit"
detection means that the bytes start with a BOM or a charset rule.
If the encoding can't be detected yet, ``None`` is returned as the encoding.
``final`` specifies whether more data is available in later calls or not.
If ``final`` is true, ``detectencoding_str()`` will never return ``None``
as the encoding.
"""
# A bit for every candidate
CANDIDATE_UTF_8_SIG = 1
CANDIDATE_UTF_16_AS_LE = 2
CANDIDATE_UTF_16_AS_BE = 4
CANDIDATE_UTF_16_LE = 8
CANDIDATE_UTF_16_BE = 16
CANDIDATE_UTF_32_AS_LE = 32
CANDIDATE_UTF_32_AS_BE = 64
CANDIDATE_UTF_32_LE = 128
CANDIDATE_UTF_32_BE = 256
CANDIDATE_CHARSET = 512
candidates = 1023 # all candidates
li = len(input)
if li>=1:
# Check first byte
c = input[0]
if c != "\xef":
candidates &= ~CANDIDATE_UTF_8_SIG
if c != "\xff":
candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_16_AS_LE)
if c != "\xfe":
candidates &= ~CANDIDATE_UTF_16_AS_BE
if c != "@":
candidates &= ~(CANDIDATE_UTF_32_LE|CANDIDATE_UTF_16_LE|CANDIDATE_CHARSET)
if c != "\x00":
candidates &= ~(CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_BE|CANDIDATE_UTF_16_BE)
if li>=2:
# Check second byte
c = input[1]
if c != "\xbb":
candidates &= ~CANDIDATE_UTF_8_SIG
if c != "\xfe":
candidates &= ~(CANDIDATE_UTF_16_AS_LE|CANDIDATE_UTF_32_AS_LE)
if c != "\xff":
candidates &= ~CANDIDATE_UTF_16_AS_BE
if c != "\x00":
candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE)
if c != "@":
candidates &= ~CANDIDATE_UTF_16_BE
if c != "c":
candidates &= ~CANDIDATE_CHARSET
if li>=3:
# Check third byte
c = input[2]
if c != "\xbf":
candidates &= ~CANDIDATE_UTF_8_SIG
if c != "c":
candidates &= ~CANDIDATE_UTF_16_LE
if c != "\x00":
candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE)
if c != "\xfe":
candidates &= ~CANDIDATE_UTF_32_AS_BE
if c != "h":
candidates &= ~CANDIDATE_CHARSET
if li>=4:
# Check fourth byte
c = input[3]
if input[2:4] == "\x00\x00":
candidates &= ~CANDIDATE_UTF_16_AS_LE
if c != "\x00":
candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE)
if c != "\xff":
candidates &= ~CANDIDATE_UTF_32_AS_BE
if c != "@":
candidates &= ~CANDIDATE_UTF_32_BE
if c != "a":
candidates &= ~CANDIDATE_CHARSET
if candidates == 0:
return ("utf-8", False)
if not (candidates & (candidates-1)): # only one candidate remaining
if candidates == CANDIDATE_UTF_8_SIG and li >= 3:
return ("utf-8-sig", True)
elif candidates == CANDIDATE_UTF_16_AS_LE and li >= 2:
return ("utf-16", True)
elif candidates == CANDIDATE_UTF_16_AS_BE and li >= 2:
return ("utf-16", True)
elif candidates == CANDIDATE_UTF_16_LE and li >= 4:
return ("utf-16-le", False)
elif candidates == CANDIDATE_UTF_16_BE and li >= 2:
return ("utf-16-be", False)
elif candidates == CANDIDATE_UTF_32_AS_LE and li >= 4:
return ("utf-32", True)
elif candidates == CANDIDATE_UTF_32_AS_BE and li >= 4:
return ("utf-32", True)
elif candidates == CANDIDATE_UTF_32_LE and li >= 4:
return ("utf-32-le", False)
elif candidates == CANDIDATE_UTF_32_BE and li >= 4:
return ("utf-32-be", False)
elif candidates == CANDIDATE_CHARSET and li >= 4:
prefix = '@charset "'
if input[:len(prefix)] == prefix:
pos = input.find('"', len(prefix))
if pos >= 0:
return (input[len(prefix):pos], True)
# if this is the last call, and we haven't determined an encoding yet,
# we default to UTF-8
if final:
return ("utf-8", False)
return (None, False) # dont' know yet
def detectencoding_unicode(input, final=False):
"""
Detect the encoding of the unicode string ``input``, which contains the
beginning of a CSS file. The encoding is detected from the charset rule
at the beginning of ``input``. If there is no charset rule, ``"utf-8"``
will be returned.
If the encoding can't be detected yet, ``None`` is returned. ``final``
specifies whether more data will be available in later calls or not. If
``final`` is true, ``detectencoding_unicode()`` will never return ``None``.
"""
prefix = u'@charset "'
if input.startswith(prefix):
pos = input.find(u'"', len(prefix))
if pos >= 0:
return (input[len(prefix):pos], True)
elif final or not prefix.startswith(input):
# if this is the last call, and we haven't determined an encoding yet,
# (or the string definitely doesn't start with prefix) we default to UTF-8
return ("utf-8", False)
return (None, False) # don't know yet
def _fixencoding(input, encoding, final=False):
"""
Replace the name of the encoding in the charset rule at the beginning of
``input`` with ``encoding``. If ``input`` doesn't starts with a charset
rule, ``input`` will be returned unmodified.
If the encoding can't be found yet, ``None`` is returned. ``final``
specifies whether more data will be available in later calls or not.
If ``final`` is true, ``_fixencoding()`` will never return ``None``.
"""
prefix = u'@charset "'
if len(input) > len(prefix):
if input.startswith(prefix):
pos = input.find(u'"', len(prefix))
if pos >= 0:
if encoding.replace("_", "-").lower() == "utf-8-sig":
encoding = u"utf-8"
return prefix + encoding + input[pos:]
# we haven't seen the end of the encoding name yet => fall through
else:
return input # doesn't start with prefix, so nothing to fix
elif not prefix.startswith(input) or final:
# can't turn out to be a @charset rule later (or there is no "later")
return input
if final:
return input
return None # don't know yet
def decode(input, errors="strict", encoding=None, force=True):
if encoding is None or not force:
(_encoding, explicit) = detectencoding_str(input, True)
if _encoding == "css":
raise ValueError("css not allowed as encoding name")
if (explicit and not force) or encoding is None: # Take the encoding from the input
encoding = _encoding
(input, consumed) = codecs.getdecoder(encoding)(input, errors)
return (_fixencoding(input, unicode(encoding), True), consumed)
def encode(input, errors="strict", encoding=None):
consumed = len(input)
if encoding is None:
encoding = detectencoding_unicode(input, True)[0]
if encoding.replace("_", "-").lower() == "utf-8-sig":
input = _fixencoding(input, u"utf-8", True)
else:
input = _fixencoding(input, unicode(encoding), True)
if encoding == "css":
raise ValueError("css not allowed as encoding name")
encoder = codecs.getencoder(encoding)
return (encoder(input, errors)[0], consumed)
def _bytes2int(bytes):
# Helper: convert an 8 bit string into an ``int``.
i = 0
for byte in bytes:
i = (i<<8) + ord(byte)
return i
def _int2bytes(i):
# Helper: convert an ``int`` into an 8-bit string.
v = []
while i:
v.insert(0, chr(i&0xff))
i >>= 8
return "".join(v)
if hasattr(codecs, "IncrementalDecoder"):
class IncrementalDecoder(codecs.IncrementalDecoder):
def __init__(self, errors="strict", encoding=None, force=True):
self.decoder = None
self.encoding = encoding
self.force = force
codecs.IncrementalDecoder.__init__(self, errors)
# Store ``errors`` somewhere else,
# because we have to hide it in a property
self._errors = errors
self.buffer = ""
self.headerfixed = False
def iterdecode(self, input):
for part in input:
result = self.decode(part, False)
if result:
yield result
result = self.decode("", True)
if result:
yield result
def decode(self, input, final=False):
# We're doing basically the same as a ``BufferedIncrementalDecoder``,
# but since the buffer is only relevant until the encoding has been
# detected (in which case the buffer of the underlying codec might
# kick in), we're implementing buffering ourselves to avoid some
# overhead.
if self.decoder is None:
input = self.buffer + input
# Do we have to detect the encoding from the input?
if self.encoding is None or not self.force:
(encoding, explicit) = detectencoding_str(input, final)
if encoding is None: # no encoding determined yet
self.buffer = input # retry the complete input on the next call
return u"" # no encoding determined yet, so no output
elif encoding == "css":
raise ValueError("css not allowed as encoding name")
if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input
self.encoding = encoding
self.buffer = "" # drop buffer, as the decoder might keep its own
decoder = codecs.getincrementaldecoder(self.encoding)
self.decoder = decoder(self._errors)
if self.headerfixed:
return self.decoder.decode(input, final)
# If we haven't fixed the header yet,
# the content of ``self.buffer`` is a ``unicode`` object
output = self.buffer + self.decoder.decode(input, final)
encoding = self.encoding
if encoding.replace("_", "-").lower() == "utf-8-sig":
encoding = "utf-8"
newoutput = _fixencoding(output, unicode(encoding), final)
if newoutput is None:
# retry fixing the @charset rule (but keep the decoded stuff)
self.buffer = output
return u""
self.headerfixed = True
return newoutput
def reset(self):
codecs.IncrementalDecoder.reset(self)
self.decoder = None
self.buffer = ""
self.headerfixed = False
def _geterrors(self):
return self._errors
def _seterrors(self, errors):
# Setting ``errors`` must be done on the real decoder too
if self.decoder is not None:
self.decoder.errors = errors
self._errors = errors
errors = property(_geterrors, _seterrors)
def getstate(self):
if self.decoder is not None:
state = (self.encoding, self.buffer, self.headerfixed, True, self.decoder.getstate())
else:
state = (self.encoding, self.buffer, self.headerfixed, False, None)
return ("", _bytes2int(marshal.dumps(state)))
def setstate(self, state):
state = _int2bytes(marshal.loads(state[1])) # ignore buffered input
self.encoding = state[0]
self.buffer = state[1]
self.headerfixed = state[2]
if state[3] is not None:
self.decoder = codecs.getincrementaldecoder(self.encoding)(self._errors)
self.decoder.setstate(state[4])
else:
self.decoder = None
if hasattr(codecs, "IncrementalEncoder"):
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors="strict", encoding=None):
self.encoder = None
self.encoding = encoding
codecs.IncrementalEncoder.__init__(self, errors)
# Store ``errors`` somewhere else,
# because we have to hide it in a property
self._errors = errors
self.buffer = u""
def iterencode(self, input):
for part in input:
result = self.encode(part, False)
if result:
yield result
result = self.encode(u"", True)
if result:
yield result
def encode(self, input, final=False):
if self.encoder is None:
input = self.buffer + input
if self.encoding is not None:
# Replace encoding in the @charset rule with the specified one
encoding = self.encoding
if encoding.replace("_", "-").lower() == "utf-8-sig":
encoding = "utf-8"
newinput = _fixencoding(input, unicode(encoding), final)
if newinput is None: # @charset rule incomplete => Retry next time
self.buffer = input
return ""
input = newinput
else:
# Use encoding from the @charset declaration
self.encoding = detectencoding_unicode(input, final)[0]
if self.encoding is not None:
if self.encoding == "css":
raise ValueError("css not allowed as encoding name")
info = codecs.lookup(self.encoding)
encoding = self.encoding
if self.encoding.replace("_", "-").lower() == "utf-8-sig":
input = _fixencoding(input, u"utf-8", True)
self.encoder = info.incrementalencoder(self._errors)
self.buffer = u""
else:
self.buffer = input
return ""
return self.encoder.encode(input, final)
def reset(self):
codecs.IncrementalEncoder.reset(self)
self.encoder = None
self.buffer = u""
def _geterrors(self):
return self._errors
def _seterrors(self, errors):
# Setting ``errors ``must be done on the real encoder too
if self.encoder is not None:
self.encoder.errors = errors
self._errors = errors
errors = property(_geterrors, _seterrors)
def getstate(self):
if self.encoder is not None:
state = (self.encoding, self.buffer, True, self.encoder.getstate())
else:
state = (self.encoding, self.buffer, False, None)
return _bytes2int(marshal.dumps(state))
def setstate(self, state):
state = _int2bytes(marshal.loads(state))
self.encoding = state[0]
self.buffer = state[1]
if state[2] is not None:
self.encoder = codecs.getincrementalencoder(self.encoding)(self._errors)
self.encoder.setstate(state[4])
else:
self.encoder = None
class StreamWriter(codecs.StreamWriter):
def __init__(self, stream, errors="strict", encoding=None, header=False):
codecs.StreamWriter.__init__(self, stream, errors)
self.streamwriter = None
self.encoding = encoding
self._errors = errors
self.buffer = u""
def encode(self, input, errors='strict'):
li = len(input)
if self.streamwriter is None:
input = self.buffer + input
li = len(input)
if self.encoding is not None:
# Replace encoding in the @charset rule with the specified one
encoding = self.encoding
if encoding.replace("_", "-").lower() == "utf-8-sig":
encoding = "utf-8"
newinput = _fixencoding(input, unicode(encoding), False)
if newinput is None: # @charset rule incomplete => Retry next time
self.buffer = input
return ("", 0)
input = newinput
else:
# Use encoding from the @charset declaration
self.encoding = detectencoding_unicode(input, False)[0]
if self.encoding is not None:
if self.encoding == "css":
raise ValueError("css not allowed as encoding name")
self.streamwriter = codecs.getwriter(self.encoding)(self.stream, self._errors)
encoding = self.encoding
if self.encoding.replace("_", "-").lower() == "utf-8-sig":
input = _fixencoding(input, u"utf-8", True)
self.buffer = u""
else:
self.buffer = input
return ("", 0)
return (self.streamwriter.encode(input, errors)[0], li)
def _geterrors(self):
return self._errors
def _seterrors(self, errors):
# Setting ``errors`` must be done on the streamwriter too
if self.streamwriter is not None:
self.streamwriter.errors = errors
self._errors = errors
errors = property(_geterrors, _seterrors)
class StreamReader(codecs.StreamReader):
def __init__(self, stream, errors="strict", encoding=None, force=True):
codecs.StreamReader.__init__(self, stream, errors)
self.streamreader = None
self.encoding = encoding
self.force = force
self._errors = errors
def decode(self, input, errors='strict'):
if self.streamreader is None:
if self.encoding is None or not self.force:
(encoding, explicit) = detectencoding_str(input, False)
if encoding is None: # no encoding determined yet
return (u"", 0) # no encoding determined yet, so no output
elif encoding == "css":
raise ValueError("css not allowed as encoding name")
if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input
self.encoding = encoding
streamreader = codecs.getreader(self.encoding)
streamreader = streamreader(self.stream, self._errors)
(output, consumed) = streamreader.decode(input, errors)
encoding = self.encoding
if encoding.replace("_", "-").lower() == "utf-8-sig":
encoding = "utf-8"
newoutput = _fixencoding(output, unicode(encoding), False)
if newoutput is not None:
self.streamreader = streamreader
return (newoutput, consumed)
return (u"", 0) # we will create a new streamreader on the next call
return self.streamreader.decode(input, errors)
def _geterrors(self):
return self._errors
def _seterrors(self, errors):
# Setting ``errors`` must be done on the streamreader too
if self.streamreader is not None:
self.streamreader.errors = errors
self._errors = errors
errors = property(_geterrors, _seterrors)
if hasattr(codecs, "CodecInfo"):
# We're running on Python 2.5 or better
def search_function(name):
if name == "css":
return codecs.CodecInfo(
name="css",
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)
else:
# If we're running on Python 2.4, define the utf-8-sig codec here
def utf8sig_encode(input, errors='strict'):
return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
def utf8sig_decode(input, errors='strict'):
prefix = 0
if input[:3] == codecs.BOM_UTF8:
input = input[3:]
prefix = 3
(output, consumed) = codecs.utf_8_decode(input, errors, True)
return (output, consumed+prefix)
class UTF8SigStreamWriter(codecs.StreamWriter):
def reset(self):
codecs.StreamWriter.reset(self)
try:
del self.encode
except AttributeError:
pass
def encode(self, input, errors='strict'):
self.encode = codecs.utf_8_encode
return utf8sig_encode(input, errors)
class UTF8SigStreamReader(codecs.StreamReader):
def reset(self):
codecs.StreamReader.reset(self)
try:
del self.decode
except AttributeError:
pass
def decode(self, input, errors='strict'):
if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this is a BOM
# => try again on the next call
return (u"", 0)
self.decode = codecs.utf_8_decode
return utf8sig_decode(input, errors)
def search_function(name):
import encodings
name = encodings.normalize_encoding(name)
if name == "css":
return (encode, decode, StreamReader, StreamWriter)
elif name == "utf_8_sig":
return (utf8sig_encode, utf8sig_decode, UTF8SigStreamReader, UTF8SigStreamWriter)
codecs.register(search_function)
# Error handler for CSS escaping
def cssescape(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't know how to handle %r" % exc)
return (u"".join(u"\\%06x" % ord(c) for c in exc.object[exc.start:exc.end]), exc.end)
codecs.register_error("cssescape", cssescape)

View File

@ -0,0 +1,63 @@
"""
Document Object Model Level 2 CSS
http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/css.html
currently implemented
- CSSStyleSheet
- CSSRuleList
- CSSRule
- CSSComment (cssutils addon)
- CSSCharsetRule
- CSSFontFaceRule
- CSSImportRule
- CSSMediaRule
- CSSNamespaceRule (WD)
- CSSPageRule
- CSSStyleRule
- CSSUnkownRule
- Selector and SelectorList
- CSSStyleDeclaration
- CSS2Properties
- CSSValue
- CSSPrimitiveValue
- CSSValueList
todo
- RGBColor, Rect, Counter
"""
__all__ = [
'CSSStyleSheet',
'CSSRuleList',
'CSSRule',
'CSSComment',
'CSSCharsetRule',
'CSSFontFaceRule'
'CSSImportRule',
'CSSMediaRule',
'CSSNamespaceRule',
'CSSPageRule',
'CSSStyleRule',
'CSSUnknownRule',
'Selector', 'SelectorList',
'CSSStyleDeclaration', 'Property',
'CSSValue', 'CSSPrimitiveValue', 'CSSValueList'
]
__docformat__ = 'restructuredtext'
__version__ = '$Id: __init__.py 1116 2008-03-05 13:52:23Z cthedot $'
from cssstylesheet import *
from cssrulelist import *
from cssrule import *
from csscomment import *
from csscharsetrule import *
from cssfontfacerule import *
from cssimportrule import *
from cssmediarule import *
from cssnamespacerule import *
from csspagerule import *
from cssstylerule import *
from cssunknownrule import *
from selector import *
from selectorlist import *
from cssstyledeclaration import *
from cssvalue import *

View File

@ -0,0 +1,165 @@
"""CSSCharsetRule implements DOM Level 2 CSS CSSCharsetRule.
TODO:
- check encoding syntax and not codecs.lookup?
"""
__all__ = ['CSSCharsetRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: csscharsetrule.py 1170 2008-03-20 17:42:07Z cthedot $'
import codecs
import xml.dom
import cssrule
import cssutils
class CSSCharsetRule(cssrule.CSSRule):
"""
The CSSCharsetRule interface represents an @charset rule in a CSS style
sheet. The value of the encoding attribute does not affect the encoding
of text data in the DOM objects; this encoding is always UTF-16
(also in Python?). After a stylesheet is loaded, the value of the
encoding attribute is the value found in the @charset rule. If there
was no @charset in the original document, then no CSSCharsetRule is
created. The value of the encoding attribute may also be used as a hint
for the encoding used on serialization of the style sheet.
The value of the @charset rule (and therefore of the CSSCharsetRule)
may not correspond to the encoding the document actually came in;
character encoding information e.g. in an HTTP header, has priority
(see CSS document representation) but this is not reflected in the
CSSCharsetRule.
Properties
==========
cssText: of type DOMString
The parsable textual representation of this rule
encoding: of type DOMString
The encoding information used in this @charset rule.
Inherits properties from CSSRule
Format
======
charsetrule:
CHARSET_SYM S* STRING S* ';'
BUT: Only valid format is:
@charset "ENCODING";
"""
type = property(lambda self: cssrule.CSSRule.CHARSET_RULE)
def __init__(self, encoding=None, parentRule=None,
parentStyleSheet=None, readonly=False):
"""
encoding:
a valid character encoding
readonly:
defaults to False, not used yet
if readonly allows setting of properties in constructor only
"""
super(CSSCharsetRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = '@charset'
self._encoding = None
if encoding:
self.encoding = encoding
self._readonly = readonly
def _getCssText(self):
"""returns serialized property cssText"""
return cssutils.ser.do_CSSCharsetRule(self)
def _setCssText(self, cssText):
"""
DOMException on setting
- SYNTAX_ERR: (self)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- INVALID_MODIFICATION_ERR: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- HIERARCHY_REQUEST_ERR: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSCharsetRule, self)._setCssText(cssText)
wellformed = True
tokenizer = self._tokenize2(cssText)
if self._type(self._nexttoken(tokenizer)) != self._prods.CHARSET_SYM:
wellformed = False
self._log.error(u'CSSCharsetRule must start with "@charset "',
error=xml.dom.InvalidModificationErr)
encodingtoken = self._nexttoken(tokenizer)
encodingtype = self._type(encodingtoken)
encoding = self._stringtokenvalue(encodingtoken)
if self._prods.STRING != encodingtype or not encoding:
wellformed = False
self._log.error(u'CSSCharsetRule: no encoding found; %r.' %
self._valuestr(cssText))
semicolon = self._tokenvalue(self._nexttoken(tokenizer))
EOFtype = self._type(self._nexttoken(tokenizer))
if u';' != semicolon or EOFtype not in ('EOF', None):
wellformed = False
self._log.error(u'CSSCharsetRule: Syntax Error: %r.' %
self._valuestr(cssText))
if wellformed:
self.encoding = encoding
cssText = property(fget=_getCssText, fset=_setCssText,
doc="(DOM) The parsable textual representation.")
def _setEncoding(self, encoding):
"""
DOMException on setting
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if this encoding rule is readonly.
- SYNTAX_ERR: (self)
Raised if the specified encoding value has a syntax error and
is unparsable.
Currently only valid Python encodings are allowed.
"""
self._checkReadonly()
tokenizer = self._tokenize2(encoding)
encodingtoken = self._nexttoken(tokenizer)
unexpected = self._nexttoken(tokenizer)
valid = True
if not encodingtoken or unexpected or\
self._prods.IDENT != self._type(encodingtoken):
valid = False
self._log.error(
'CSSCharsetRule: Syntax Error in encoding value %r.' %
encoding)
else:
try:
codecs.lookup(encoding)
except LookupError:
valid = False
self._log.error('CSSCharsetRule: Unknown (Python) encoding %r.' %
encoding)
else:
self._encoding = encoding.lower()
encoding = property(lambda self: self._encoding, _setEncoding,
doc="(DOM)The encoding information used in this @charset rule.")
wellformed = property(lambda self: bool(self.encoding))
def __repr__(self):
return "cssutils.css.%s(encoding=%r)" % (
self.__class__.__name__, self.encoding)
def __str__(self):
return "<cssutils.css.%s object encoding=%r at 0x%x>" % (
self.__class__.__name__, self.encoding, id(self))

View File

@ -0,0 +1,92 @@
"""CSSComment is not defined in DOM Level 2 at all but a cssutils defined
class only.
Implements CSSRule which is also extended for a CSSComment rule type
"""
__all__ = ['CSSComment']
__docformat__ = 'restructuredtext'
__version__ = '$Id: csscomment.py 1170 2008-03-20 17:42:07Z cthedot $'
import xml.dom
import cssrule
import cssutils
class CSSComment(cssrule.CSSRule):
"""
(cssutils) a CSS comment
Properties
==========
cssText: of type DOMString
The comment text including comment delimiters
Inherits properties from CSSRule
Format
======
::
/*...*/
"""
type = property(lambda self: cssrule.CSSRule.COMMENT) # value = -1
# constant but needed:
wellformed = True
def __init__(self, cssText=None, parentRule=None,
parentStyleSheet=None, readonly=False):
super(CSSComment, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._cssText = None
if cssText:
self._setCssText(cssText)
self._readonly = readonly
def _getCssText(self):
"""returns serialized property cssText"""
return cssutils.ser.do_CSSComment(self)
def _setCssText(self, cssText):
"""
cssText
textual text to set or tokenlist which is not tokenized
anymore. May also be a single token for this rule
parser
if called from cssparser directly this is Parser instance
DOMException on setting
- SYNTAX_ERR: (self)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- INVALID_MODIFICATION_ERR: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSComment, self)._setCssText(cssText)
tokenizer = self._tokenize2(cssText)
commenttoken = self._nexttoken(tokenizer)
unexpected = self._nexttoken(tokenizer)
if not commenttoken or\
self._type(commenttoken) != self._prods.COMMENT or\
unexpected:
self._log.error(u'CSSComment: Not a CSSComment: %r' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
self._cssText = self._tokenvalue(commenttoken)
cssText = property(_getCssText, _setCssText,
doc=u"(cssutils) Textual representation of this comment")
def __repr__(self):
return "cssutils.css.%s(cssText=%r)" % (
self.__class__.__name__, self.cssText)
def __str__(self):
return "<cssutils.css.%s object cssText=%r at 0x%x>" % (
self.__class__.__name__, self.cssText, id(self))

View File

@ -0,0 +1,163 @@
"""CSSFontFaceRule implements DOM Level 2 CSS CSSFontFaceRule.
"""
__all__ = ['CSSFontFaceRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssfontfacerule.py 1284 2008-06-05 16:29:17Z cthedot $'
import xml.dom
import cssrule
import cssutils
from cssstyledeclaration import CSSStyleDeclaration
class CSSFontFaceRule(cssrule.CSSRule):
"""
The CSSFontFaceRule interface represents a @font-face rule in a CSS
style sheet. The @font-face rule is used to hold a set of font
descriptions.
Properties
==========
atkeyword (cssutils only)
the literal keyword used
cssText: of type DOMString
The parsable textual representation of this rule
style: of type CSSStyleDeclaration
The declaration-block of this rule.
Inherits properties from CSSRule
Format
======
::
font_face
: FONT_FACE_SYM S*
'{' S* declaration [ ';' S* declaration ]* '}' S*
;
"""
type = property(lambda self: cssrule.CSSRule.FONT_FACE_RULE)
# constant but needed:
wellformed = True
def __init__(self, style=None, parentRule=None,
parentStyleSheet=None, readonly=False):
"""
if readonly allows setting of properties in constructor only
style
CSSStyleDeclaration for this CSSStyleRule
"""
super(CSSFontFaceRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = u'@font-face'
if style:
self.style = style
else:
self._style = CSSStyleDeclaration(parentRule=self)
self._readonly = readonly
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_CSSFontFaceRule(self)
def _setCssText(self, cssText):
"""
DOMException on setting
- SYNTAX_ERR: (self, StyleDeclaration)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- INVALID_MODIFICATION_ERR: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- HIERARCHY_REQUEST_ERR: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSFontFaceRule, self)._setCssText(cssText)
tokenizer = self._tokenize2(cssText)
attoken = self._nexttoken(tokenizer, None)
if self._type(attoken) != self._prods.FONT_FACE_SYM:
self._log.error(u'CSSFontFaceRule: No CSSFontFaceRule found: %s' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
wellformed = True
beforetokens, brace = self._tokensupto2(tokenizer,
blockstartonly=True,
separateEnd=True)
if self._tokenvalue(brace) != u'{':
wellformed = False
self._log.error(
u'CSSFontFaceRule: No start { of style declaration found: %r' %
self._valuestr(cssText), brace)
# parse stuff before { which should be comments and S only
new = {'wellformed': True}
newseq = self._tempSeq()#[]
beforewellformed, expected = self._parse(expected=':',
seq=newseq, tokenizer=self._tokenize2(beforetokens),
productions={})
wellformed = wellformed and beforewellformed and new['wellformed']
styletokens, braceorEOFtoken = self._tokensupto2(tokenizer,
blockendonly=True,
separateEnd=True)
val, typ = self._tokenvalue(braceorEOFtoken), self._type(braceorEOFtoken)
if val != u'}' and typ != 'EOF':
wellformed = False
self._log.error(
u'CSSFontFaceRule: No "}" after style declaration found: %r' %
self._valuestr(cssText))
nonetoken = self._nexttoken(tokenizer)
if nonetoken:
wellformed = False
self._log.error(u'CSSFontFaceRule: Trailing content found.',
token=nonetoken)
newstyle = CSSStyleDeclaration()
if 'EOF' == typ:
# add again as style needs it
styletokens.append(braceorEOFtoken)
newstyle.cssText = styletokens
if wellformed:
self.style = newstyle
self._setSeq(newseq) # contains (probably comments) upto { only
cssText = property(_getCssText, _setCssText,
doc="(DOM) The parsable textual representation of the rule.")
def _getStyle(self):
return self._style
def _setStyle(self, style):
"""
style
StyleDeclaration or string
"""
self._checkReadonly()
if isinstance(style, basestring):
self._style = CSSStyleDeclaration(parentRule=self, cssText=style)
else:
self._style._seq = style.seq
style = property(_getStyle, _setStyle,
doc="(DOM) The declaration-block of this rule set.")
def __repr__(self):
return "cssutils.css.%s(style=%r)" % (
self.__class__.__name__, self.style.cssText)
def __str__(self):
return "<cssutils.css.%s object style=%r at 0x%x>" % (
self.__class__.__name__, self.style.cssText, id(self))

View File

@ -0,0 +1,399 @@
"""CSSImportRule implements DOM Level 2 CSS CSSImportRule.
plus:
``name`` property
http://www.w3.org/TR/css3-cascade/#cascading
"""
__all__ = ['CSSImportRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssimportrule.py 1401 2008-07-29 21:07:54Z cthedot $'
import os
import urllib
import urlparse
import xml.dom
import cssrule
import cssutils
class CSSImportRule(cssrule.CSSRule):
"""
Represents an @import rule within a CSS style sheet. The @import rule
is used to import style rules from other style sheets.
Properties
==========
atkeyword: (cssutils only)
the literal keyword used
cssText: of type DOMString
The parsable textual representation of this rule
href: of type DOMString, (DOM readonly, cssutils also writable)
The location of the style sheet to be imported. The attribute will
not contain the url(...) specifier around the URI.
hreftype: 'uri' (serializer default) or 'string' (cssutils only)
The original type of href, not really relevant as it may be
reconfigured in the serializer but it is kept anyway
media: of type stylesheets::MediaList (DOM readonly)
A list of media types for this rule of type MediaList.
name:
An optional name used for cascading
styleSheet: of type CSSStyleSheet (DOM readonly)
The style sheet referred to by this rule. The value of this
attribute is None if the style sheet has not yet been loaded or if
it will not be loaded (e.g. if the stylesheet is for a media type
not supported by the user agent).
Inherits properties from CSSRule
Format
======
import
: IMPORT_SYM S*
[STRING|URI] S* [ medium [ COMMA S* medium]* ]? S* STRING? S* ';' S*
;
"""
type = property(lambda self: cssrule.CSSRule.IMPORT_RULE)
def __init__(self, href=None, mediaText=u'all', name=None,
parentRule=None, parentStyleSheet=None, readonly=False):
"""
if readonly allows setting of properties in constructor only
Do not use as positional but as keyword attributes only!
href
location of the style sheet to be imported.
mediaText
A list of media types for which this style sheet may be used
as a string
"""
super(CSSImportRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = u'@import'
self.hreftype = None
self._styleSheet = None
self._href = None
self.href = href
self._media = cssutils.stylesheets.MediaList()
if mediaText:
self._media.mediaText = mediaText
self._name = name
seq = self._tempSeq()
seq.append(self.href, 'href')
seq.append(self.media, 'media')
seq.append(self.name, 'name')
self._setSeq(seq)
self._readonly = readonly
_usemedia = property(lambda self: self.media.mediaText not in (u'', u'all'),
doc="if self._media is used (or simply empty)")
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_CSSImportRule(self)
def _setCssText(self, cssText):
"""
DOMException on setting
- HIERARCHY_REQUEST_ERR: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- INVALID_MODIFICATION_ERR: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
- SYNTAX_ERR: (self)
Raised if the specified CSS string value has a syntax error and
is unparsable.
"""
super(CSSImportRule, self)._setCssText(cssText)
tokenizer = self._tokenize2(cssText)
attoken = self._nexttoken(tokenizer, None)
if self._type(attoken) != self._prods.IMPORT_SYM:
self._log.error(u'CSSImportRule: No CSSImportRule found: %s' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
# for closures: must be a mutable
new = {'keyword': self._tokenvalue(attoken),
'href': None,
'hreftype': None,
'media': None,
'name': None,
'wellformed': True
}
def __doname(seq, token):
# called by _string or _ident
new['name'] = self._stringtokenvalue(token)
seq.append(new['name'], 'name')
return ';'
def _string(expected, seq, token, tokenizer=None):
if 'href' == expected:
# href
new['href'] = self._stringtokenvalue(token)
new['hreftype'] = 'string'
seq.append(new['href'], 'href')
return 'media name ;'
elif 'name' in expected:
# name
return __doname(seq, token)
else:
new['wellformed'] = False
self._log.error(
u'CSSImportRule: Unexpected string.', token)
return expected
def _uri(expected, seq, token, tokenizer=None):
# href
if 'href' == expected:
uri = self._uritokenvalue(token)
new['hreftype'] = 'uri'
new['href'] = uri
seq.append(new['href'], 'href')
return 'media name ;'
else:
new['wellformed'] = False
self._log.error(
u'CSSImportRule: Unexpected URI.', token)
return expected
def _ident(expected, seq, token, tokenizer=None):
# medialist ending with ; which is checked upon too
if expected.startswith('media'):
mediatokens = self._tokensupto2(
tokenizer, importmediaqueryendonly=True)
mediatokens.insert(0, token) # push found token
last = mediatokens.pop() # retrieve ;
lastval, lasttyp = self._tokenvalue(last), self._type(last)
if lastval != u';' and lasttyp not in ('EOF', self._prods.STRING):
new['wellformed'] = False
self._log.error(u'CSSImportRule: No ";" found: %s' %
self._valuestr(cssText), token=token)
media = cssutils.stylesheets.MediaList()
media.mediaText = mediatokens
if media.wellformed:
new['media'] = media
seq.append(media, 'media')
else:
new['wellformed'] = False
self._log.error(u'CSSImportRule: Invalid MediaList: %s' %
self._valuestr(cssText), token=token)
if lasttyp == self._prods.STRING:
# name
return __doname(seq, last)
else:
return 'EOF' # ';' is token "last"
else:
new['wellformed'] = False
self._log.error(
u'CSSImportRule: Unexpected ident.', token)
return expected
def _char(expected, seq, token, tokenizer=None):
# final ;
val = self._tokenvalue(token)
if expected.endswith(';') and u';' == val:
return 'EOF'
else:
new['wellformed'] = False
self._log.error(
u'CSSImportRule: Unexpected char.', token)
return expected
# import : IMPORT_SYM S* [STRING|URI]
# S* [ medium [ ',' S* medium]* ]? ';' S*
# STRING? # see http://www.w3.org/TR/css3-cascade/#cascading
# ;
newseq = self._tempSeq()
wellformed, expected = self._parse(expected='href',
seq=newseq, tokenizer=tokenizer,
productions={'STRING': _string,
'URI': _uri,
'IDENT': _ident,
'CHAR': _char},
new=new)
# wellformed set by parse
wellformed = wellformed and new['wellformed']
# post conditions
if not new['href']:
wellformed = False
self._log.error(u'CSSImportRule: No href found: %s' %
self._valuestr(cssText))
if expected != 'EOF':
wellformed = False
self._log.error(u'CSSImportRule: No ";" found: %s' %
self._valuestr(cssText))
# set all
if wellformed:
self.atkeyword = new['keyword']
self.hreftype = new['hreftype']
if new['media']:
# use same object
self.media.mediaText = new['media'].mediaText
# put it in newseq too
for index, x in enumerate(newseq):
if x.type == 'media':
newseq.replace(index, self.media,
x.type, x.line, x.col)
break
else:
# reset media
self.media.mediaText = u'all'
newseq.append(self.media, 'media')
self.name = new['name']
self._setSeq(newseq)
self.href = new['href']
if self.styleSheet:
# title is set by href
#self.styleSheet._href = self.href
self.styleSheet._parentStyleSheet = self.parentStyleSheet
cssText = property(fget=_getCssText, fset=_setCssText,
doc="(DOM attribute) The parsable textual representation.")
def _setHref(self, href):
# update seq
for i, item in enumerate(self.seq):
val, typ = item.value, item.type
if 'href' == typ:
self._seq[i] = (href, typ, item.line, item.col)
break
else:
seq = self._tempSeq()
seq.append(self.href, 'href')
self._setSeq(seq)
# set new href
self._href = href
if not self.styleSheet:
# set only if not set before
self.__setStyleSheet()
href = property(lambda self: self._href, _setHref,
doc="Location of the style sheet to be imported.")
media = property(lambda self: self._media,
doc=u"(DOM readonly) A list of media types for this rule"
" of type MediaList")
def _setName(self, name):
"""raises xml.dom.SyntaxErr if name is not a string"""
if isinstance(name, basestring) or name is None:
# "" or ''
if not name:
name = None
# update seq
for i, item in enumerate(self.seq):
val, typ = item.value, item.type
if 'name' == typ:
self._seq[i] = (name, typ, item.line, item.col)
break
else:
# append
seq = self._tempSeq()
for item in self.seq:
# copy current seq
seq.append(item.value, item.type, item.line, item.col)
seq.append(name, 'name')
self._setSeq(seq)
self._name = name
# set title of referred sheet
if self.styleSheet:
self.styleSheet.title = name
else:
self._log.error(u'CSSImportRule: Not a valid name: %s' % name)
name = property(lambda self: self._name, _setName,
doc=u"An optional name for the imported sheet")
def __setStyleSheet(self):
"""Read new CSSStyleSheet cssText from href using parentStyleSheet.href
Indirectly called if setting ``href``. In case of any error styleSheet
is set to ``None``.
"""
# should simply fail so all errors are catched!
if self.parentStyleSheet and self.href:
# relative href
parentHref = self.parentStyleSheet.href
if parentHref is None:
# use cwd instead
parentHref = u'file:' + urllib.pathname2url(os.getcwd()) + '/'
href = urlparse.urljoin(parentHref, self.href)
# all possible exceptions are ignored (styleSheet is None then)
try:
usedEncoding, enctype, cssText = self.parentStyleSheet._resolveImport(href)
if cssText is None:
# catched in next except below!
raise IOError('Cannot read Stylesheet.')
styleSheet = cssutils.css.CSSStyleSheet(href=href,
media=self.media,
ownerRule=self,
title=self.name)
# inherit fetcher for @imports in styleSheet
styleSheet._setFetcher(self.parentStyleSheet._fetcher)
# contentEncoding with parentStyleSheet.overrideEncoding,
# HTTP or parent
encodingOverride, encoding = None, None
if enctype == 0:
encodingOverride = usedEncoding
elif 5 > enctype > 0:
encoding = usedEncoding
styleSheet._setCssTextWithEncodingOverride(cssText,
encodingOverride=encodingOverride,
encoding=encoding)
except (OSError, IOError, ValueError), e:
self._log.warn(u'CSSImportRule: While processing imported style sheet href=%r: %r'
% (self.href, e), neverraise=True)
else:
self._styleSheet = styleSheet
styleSheet = property(lambda self: self._styleSheet,
doc="(readonly) The style sheet referred to by this rule.")
def _getWellformed(self):
"depending if media is used at all"
if self._usemedia:
return bool(self.href and self.media.wellformed)
else:
return bool(self.href)
wellformed = property(_getWellformed)
def __repr__(self):
if self._usemedia:
mediaText = self.media.mediaText
else:
mediaText = None
return "cssutils.css.%s(href=%r, mediaText=%r, name=%r)" % (
self.__class__.__name__,
self.href, self.media.mediaText, self.name)
def __str__(self):
if self._usemedia:
mediaText = self.media.mediaText
else:
mediaText = None
return "<cssutils.css.%s object href=%r mediaText=%r name=%r at 0x%x>" % (
self.__class__.__name__, self.href, mediaText, self.name, id(self))

View File

@ -0,0 +1,349 @@
"""CSSMediaRule implements DOM Level 2 CSS CSSMediaRule.
"""
__all__ = ['CSSMediaRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssmediarule.py 1370 2008-07-14 20:15:03Z cthedot $'
import xml.dom
import cssrule
import cssutils
class CSSMediaRule(cssrule.CSSRule):
"""
Objects implementing the CSSMediaRule interface can be identified by the
MEDIA_RULE constant. On these objects the type attribute must return the
value of that constant.
Properties
==========
atkeyword: (cssutils only)
the literal keyword used
cssRules: A css::CSSRuleList of all CSS rules contained within the
media block.
cssText: of type DOMString
The parsable textual representation of this rule
media: of type stylesheets::MediaList, (DOM readonly)
A list of media types for this rule of type MediaList.
name:
An optional name used for cascading
Format
======
media
: MEDIA_SYM S* medium [ COMMA S* medium ]*
STRING? # the name
LBRACE S* ruleset* '}' S*;
"""
# CONSTANT
type = property(lambda self: cssrule.CSSRule.MEDIA_RULE)
def __init__(self, mediaText='all', name=None,
parentRule=None, parentStyleSheet=None, readonly=False):
"""constructor"""
super(CSSMediaRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = u'@media'
self._media = cssutils.stylesheets.MediaList(mediaText,
readonly=readonly)
self.name = name
self.cssRules = cssutils.css.cssrulelist.CSSRuleList()
self.cssRules.append = self.insertRule
self.cssRules.extend = self.insertRule
self.cssRules.__delitem__ == self.deleteRule
self._readonly = readonly
def __iter__(self):
"""generator which iterates over cssRules."""
for rule in self.cssRules:
yield rule
def _getCssText(self):
"""return serialized property cssText"""
return cssutils.ser.do_CSSMediaRule(self)
def _setCssText(self, cssText):
"""
:param cssText:
a parseable string or a tuple of (cssText, dict-of-namespaces)
:Exceptions:
- `NAMESPACE_ERR`: (Selector)
Raised if a specified selector uses an unknown namespace
prefix.
- `SYNTAX_ERR`: (self, StyleDeclaration, etc)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- `INVALID_MODIFICATION_ERR`: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- `HIERARCHY_REQUEST_ERR`: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- `NO_MODIFICATION_ALLOWED_ERR`: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSMediaRule, self)._setCssText(cssText)
# might be (cssText, namespaces)
cssText, namespaces = self._splitNamespacesOff(cssText)
try:
# use parent style sheet ones if available
namespaces = self.parentStyleSheet.namespaces
except AttributeError:
pass
tokenizer = self._tokenize2(cssText)
attoken = self._nexttoken(tokenizer, None)
if self._type(attoken) != self._prods.MEDIA_SYM:
self._log.error(u'CSSMediaRule: No CSSMediaRule found: %s' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
# media "name"? { cssRules }
# media
wellformed = True
mediatokens, end = self._tokensupto2(tokenizer,
mediaqueryendonly=True,
separateEnd=True)
if u'{' == self._tokenvalue(end) or self._prods.STRING == self._type(end):
newmedia = cssutils.stylesheets.MediaList()
newmedia.mediaText = mediatokens
# name (optional)
name = None
nameseq = self._tempSeq()
if self._prods.STRING == self._type(end):
name = self._stringtokenvalue(end)
# TODO: for now comments are lost after name
nametokens, end = self._tokensupto2(tokenizer,
blockstartonly=True,
separateEnd=True)
wellformed, expected = self._parse(None, nameseq, nametokens, {})
if not wellformed:
self._log.error(u'CSSMediaRule: Syntax Error: %s' %
self._valuestr(cssText))
# check for {
if u'{' != self._tokenvalue(end):
self._log.error(u'CSSMediaRule: No "{" found: %s' %
self._valuestr(cssText))
return
# cssRules
cssrulestokens, braceOrEOF = self._tokensupto2(tokenizer,
mediaendonly=True,
separateEnd=True)
nonetoken = self._nexttoken(tokenizer, None)
if (u'}' != self._tokenvalue(braceOrEOF) and
'EOF' != self._type(braceOrEOF)):
self._log.error(u'CSSMediaRule: No "}" found.',
token=braceOrEOF)
elif nonetoken:
self._log.error(u'CSSMediaRule: Trailing content found.',
token=nonetoken)
else:
# for closures: must be a mutable
newcssrules = [] #cssutils.css.CSSRuleList()
new = {'wellformed': True }
def ruleset(expected, seq, token, tokenizer):
rule = cssutils.css.CSSStyleRule(parentRule=self)
rule.cssText = (self._tokensupto2(tokenizer, token),
namespaces)
if rule.wellformed:
rule._parentStyleSheet=self.parentStyleSheet
seq.append(rule)
return expected
def atrule(expected, seq, token, tokenizer):
# TODO: get complete rule!
tokens = self._tokensupto2(tokenizer, token)
atval = self._tokenvalue(token)
if atval in ('@charset ', '@font-face', '@import', '@namespace',
'@page', '@media'):
self._log.error(
u'CSSMediaRule: This rule is not allowed in CSSMediaRule - ignored: %s.'
% self._valuestr(tokens),
token = token,
error=xml.dom.HierarchyRequestErr)
else:
rule = cssutils.css.CSSUnknownRule(parentRule=self,
parentStyleSheet=self.parentStyleSheet)
rule.cssText = tokens
if rule.wellformed:
seq.append(rule)
return expected
def COMMENT(expected, seq, token, tokenizer=None):
seq.append(cssutils.css.CSSComment([token]))
return expected
tokenizer = (t for t in cssrulestokens) # TODO: not elegant!
wellformed, expected = self._parse(braceOrEOF,
newcssrules,
tokenizer, {
'COMMENT': COMMENT,
'CHARSET_SYM': atrule,
'FONT_FACE_SYM': atrule,
'IMPORT_SYM': atrule,
'NAMESPACE_SYM': atrule,
'PAGE_SYM': atrule,
'MEDIA_SYM': atrule,
'ATKEYWORD': atrule
},
default=ruleset,
new=new)
# no post condition
if newmedia.wellformed and wellformed:
# keep reference
self._media.mediaText = newmedia.mediaText
self.name = name
self._setSeq(nameseq)
del self.cssRules[:]
for r in newcssrules:
self.cssRules.append(r)
cssText = property(_getCssText, _setCssText,
doc="(DOM attribute) The parsable textual representation.")
def _setName(self, name):
if isinstance(name, basestring) or name is None:
# "" or ''
if not name:
name = None
self._name = name
else:
self._log.error(u'CSSImportRule: Not a valid name: %s' % name)
name = property(lambda self: self._name, _setName,
doc=u"An optional name for the media rules")
media = property(lambda self: self._media,
doc=u"(DOM readonly) A list of media types for this rule of type\
MediaList")
wellformed = property(lambda self: self.media.wellformed)
def deleteRule(self, index):
"""
index
within the media block's rule collection of the rule to remove.
Used to delete a rule from the media block.
DOMExceptions
- INDEX_SIZE_ERR: (self)
Raised if the specified index does not correspond to a rule in
the media rule list.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this media rule is readonly.
"""
self._checkReadonly()
try:
self.cssRules[index]._parentRule = None # detach
del self.cssRules[index] # remove from @media
except IndexError:
raise xml.dom.IndexSizeErr(
u'CSSMediaRule: %s is not a valid index in the rulelist of length %i' % (
index, self.cssRules.length))
def add(self, rule):
"""Add rule to end of this mediarule. Same as ``.insertRule(rule)``."""
self.insertRule(rule, index=None)
def insertRule(self, rule, index=None):
"""
rule
The parsable text representing the rule. For rule sets this
contains both the selector and the style declaration. For
at-rules, this specifies both the at-identifier and the rule
content.
cssutils also allows rule to be a valid **CSSRule** object
index
within the media block's rule collection of the rule before
which to insert the specified rule. If the specified index is
equal to the length of the media blocks's rule collection, the
rule will be added to the end of the media block.
If index is not given or None rule will be appended to rule
list.
Used to insert a new rule into the media block.
DOMException on setting
- HIERARCHY_REQUEST_ERR:
(no use case yet as no @charset or @import allowed))
Raised if the rule cannot be inserted at the specified index,
e.g., if an @import rule is inserted after a standard rule set
or other at-rule.
- INDEX_SIZE_ERR: (self)
Raised if the specified index is not a valid insertion point.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this media rule is readonly.
- SYNTAX_ERR: (CSSStyleRule)
Raised if the specified rule has a syntax error and is
unparsable.
returns the index within the media block's rule collection of the
newly inserted rule.
"""
self._checkReadonly()
# check position
if index is None:
index = len(self.cssRules)
elif index < 0 or index > self.cssRules.length:
raise xml.dom.IndexSizeErr(
u'CSSMediaRule: Invalid index %s for CSSRuleList with a length of %s.' % (
index, self.cssRules.length))
# parse
if isinstance(rule, basestring):
tempsheet = cssutils.css.CSSStyleSheet()
tempsheet.cssText = rule
if len(tempsheet.cssRules) != 1 or (tempsheet.cssRules and
not isinstance(tempsheet.cssRules[0], cssutils.css.CSSRule)):
self._log.error(u'CSSMediaRule: Invalid Rule: %s' % rule)
return
rule = tempsheet.cssRules[0]
elif not isinstance(rule, cssutils.css.CSSRule):
self._log.error(u'CSSMediaRule: Not a CSSRule: %s' % rule)
return
# CHECK HIERARCHY
# @charset @import @page @namespace @media
if isinstance(rule, cssutils.css.CSSCharsetRule) or \
isinstance(rule, cssutils.css.CSSFontFaceRule) or \
isinstance(rule, cssutils.css.CSSImportRule) or \
isinstance(rule, cssutils.css.CSSNamespaceRule) or \
isinstance(rule, cssutils.css.CSSPageRule) or \
isinstance(rule, CSSMediaRule):
self._log.error(u'CSSMediaRule: This type of rule is not allowed here: %s' %
rule.cssText,
error=xml.dom.HierarchyRequestErr)
return
self.cssRules.insert(index, rule)
rule._parentRule = self
rule._parentStyleSheet = self.parentStyleSheet
return index
def __repr__(self):
return "cssutils.css.%s(mediaText=%r)" % (
self.__class__.__name__, self.media.mediaText)
def __str__(self):
return "<cssutils.css.%s object mediaText=%r at 0x%x>" % (
self.__class__.__name__, self.media.mediaText, id(self))

View File

@ -0,0 +1,306 @@
"""CSSNamespaceRule currently implements
http://dev.w3.org/csswg/css3-namespace/
(until 0.9.5a2: http://www.w3.org/TR/2006/WD-css3-namespace-20060828/)
"""
__all__ = ['CSSNamespaceRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssnamespacerule.py 1305 2008-06-22 18:42:51Z cthedot $'
import xml.dom
import cssrule
import cssutils
from cssutils.helper import Deprecated
class CSSNamespaceRule(cssrule.CSSRule):
"""
Represents an @namespace rule within a CSS style sheet.
The @namespace at-rule declares a namespace prefix and associates
it with a given namespace (a string). This namespace prefix can then be
used in namespace-qualified names such as those described in the
Selectors Module [SELECT] or the Values and Units module [CSS3VAL].
Properties
==========
atkeyword (cssutils only)
the literal keyword used
cssText: of type DOMString
The parsable textual representation of this rule
namespaceURI: of type DOMString
The namespace URI (a simple string!) which is bound to the given
prefix. If no prefix is set (``CSSNamespaceRule.prefix==''``)
the namespace defined by ``namespaceURI`` is set as the default
namespace.
prefix: of type DOMString
The prefix used in the stylesheet for the given
``CSSNamespaceRule.nsuri``. If prefix is empty namespaceURI sets a
default namespace for the stylesheet.
Inherits properties from CSSRule
Format
======
namespace
: NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S*
;
namespace_prefix
: IDENT
;
"""
type = property(lambda self: cssrule.CSSRule.NAMESPACE_RULE)
def __init__(self, namespaceURI=None, prefix=None, cssText=None,
parentRule=None, parentStyleSheet=None, readonly=False):
"""
:Parameters:
namespaceURI
The namespace URI (a simple string!) which is bound to the
given prefix. If no prefix is set
(``CSSNamespaceRule.prefix==''``) the namespace defined by
namespaceURI is set as the default namespace
prefix
The prefix used in the stylesheet for the given
``CSSNamespaceRule.uri``.
cssText
if no namespaceURI is given cssText must be given to set
a namespaceURI as this is readonly later on
parentStyleSheet
sheet where this rule belongs to
Do not use as positional but as keyword parameters only!
If readonly allows setting of properties in constructor only
format namespace::
namespace
: NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S*
;
namespace_prefix
: IDENT
;
"""
super(CSSNamespaceRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = u'@namespace'
self._prefix = u''
self._namespaceURI = None
if namespaceURI:
self.namespaceURI = namespaceURI
self.prefix = prefix
tempseq = self._tempSeq()
tempseq.append(self.prefix, 'prefix')
tempseq.append(self.namespaceURI, 'namespaceURI')
self._setSeq(tempseq)
elif cssText is not None:
self.cssText = cssText
if parentStyleSheet:
self._parentStyleSheet = parentStyleSheet
self._readonly = readonly
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_CSSNamespaceRule(self)
def _setCssText(self, cssText):
"""
DOMException on setting
:param cssText: initial value for this rules cssText which is parsed
:Exceptions:
- `HIERARCHY_REQUEST_ERR`: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- `INVALID_MODIFICATION_ERR`: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- `NO_MODIFICATION_ALLOWED_ERR`: (CSSRule)
Raised if the rule is readonly.
- `SYNTAX_ERR`: (self)
Raised if the specified CSS string value has a syntax error and
is unparsable.
"""
super(CSSNamespaceRule, self)._setCssText(cssText)
tokenizer = self._tokenize2(cssText)
attoken = self._nexttoken(tokenizer, None)
if self._type(attoken) != self._prods.NAMESPACE_SYM:
self._log.error(u'CSSNamespaceRule: No CSSNamespaceRule found: %s' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
# for closures: must be a mutable
new = {'keyword': self._tokenvalue(attoken),
'prefix': u'',
'uri': None,
'wellformed': True
}
def _ident(expected, seq, token, tokenizer=None):
# the namespace prefix, optional
if 'prefix or uri' == expected:
new['prefix'] = self._tokenvalue(token)
seq.append(new['prefix'], 'prefix')
return 'uri'
else:
new['wellformed'] = False
self._log.error(
u'CSSNamespaceRule: Unexpected ident.', token)
return expected
def _string(expected, seq, token, tokenizer=None):
# the namespace URI as a STRING
if expected.endswith('uri'):
new['uri'] = self._stringtokenvalue(token)
seq.append(new['uri'], 'namespaceURI')
return ';'
else:
new['wellformed'] = False
self._log.error(
u'CSSNamespaceRule: Unexpected string.', token)
return expected
def _uri(expected, seq, token, tokenizer=None):
# the namespace URI as URI which is DEPRECATED
if expected.endswith('uri'):
uri = self._uritokenvalue(token)
new['uri'] = uri
seq.append(new['uri'], 'namespaceURI')
return ';'
else:
new['wellformed'] = False
self._log.error(
u'CSSNamespaceRule: Unexpected URI.', token)
return expected
def _char(expected, seq, token, tokenizer=None):
# final ;
val = self._tokenvalue(token)
if ';' == expected and u';' == val:
return 'EOF'
else:
new['wellformed'] = False
self._log.error(
u'CSSNamespaceRule: Unexpected char.', token)
return expected
# "NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S*"
newseq = self._tempSeq()
wellformed, expected = self._parse(expected='prefix or uri',
seq=newseq, tokenizer=tokenizer,
productions={'IDENT': _ident,
'STRING': _string,
'URI': _uri,
'CHAR': _char},
new=new)
# wellformed set by parse
wellformed = wellformed and new['wellformed']
# post conditions
if new['uri'] is None:
wellformed = False
self._log.error(u'CSSNamespaceRule: No namespace URI found: %s' %
self._valuestr(cssText))
if expected != 'EOF':
wellformed = False
self._log.error(u'CSSNamespaceRule: No ";" found: %s' %
self._valuestr(cssText))
# set all
if wellformed:
self.atkeyword = new['keyword']
self._prefix = new['prefix']
self.namespaceURI = new['uri']
self._setSeq(newseq)
cssText = property(fget=_getCssText, fset=_setCssText,
doc="(DOM attribute) The parsable textual representation.")
def _setNamespaceURI(self, namespaceURI):
"""
DOMException on setting
:param namespaceURI: the initial value for this rules namespaceURI
:Exceptions:
- `NO_MODIFICATION_ALLOWED_ERR`:
(CSSRule) Raised if this rule is readonly or a namespaceURI is
already set in this rule.
"""
self._checkReadonly()
if not self._namespaceURI:
# initial setting
self._namespaceURI = namespaceURI
tempseq = self._tempSeq()
tempseq.append(namespaceURI, 'namespaceURI')
self._setSeq(tempseq) # makes seq readonly!
elif self._namespaceURI != namespaceURI:
self._log.error(u'CSSNamespaceRule: namespaceURI is readonly.',
error=xml.dom.NoModificationAllowedErr)
namespaceURI = property(lambda self: self._namespaceURI, _setNamespaceURI,
doc="URI (string!) of the defined namespace.")
def _setPrefix(self, prefix=None):
"""
DOMException on setting
:param prefix: the new prefix
:Exceptions:
- `SYNTAX_ERR`: (TODO)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- `NO_MODIFICATION_ALLOWED_ERR`: CSSRule)
Raised if this rule is readonly.
"""
self._checkReadonly()
if not prefix:
prefix = u''
else:
tokenizer = self._tokenize2(prefix)
prefixtoken = self._nexttoken(tokenizer, None)
if not prefixtoken or self._type(prefixtoken) != self._prods.IDENT:
self._log.error(u'CSSNamespaceRule: No valid prefix "%s".' %
self._valuestr(prefix),
error=xml.dom.SyntaxErr)
return
else:
prefix = self._tokenvalue(prefixtoken)
# update seg
for i, x in enumerate(self._seq):
if x == self._prefix:
self._seq[i] = (prefix, 'prefix', None, None)
break
else:
# put prefix at the beginning!
self._seq[0] = (prefix, 'prefix', None, None)
# set new prefix
self._prefix = prefix
prefix = property(lambda self: self._prefix, _setPrefix,
doc="Prefix used for the defined namespace.")
# def _setParentStyleSheet(self, parentStyleSheet):
# self._parentStyleSheet = parentStyleSheet
#
# parentStyleSheet = property(lambda self: self._parentStyleSheet,
# _setParentStyleSheet,
# doc=u"Containing CSSStyleSheet.")
wellformed = property(lambda self: self.namespaceURI is not None)
def __repr__(self):
return "cssutils.css.%s(namespaceURI=%r, prefix=%r)" % (
self.__class__.__name__, self.namespaceURI, self.prefix)
def __str__(self):
return "<cssutils.css.%s object namespaceURI=%r prefix=%r at 0x%x>" % (
self.__class__.__name__, self.namespaceURI, self.prefix, id(self))

View File

@ -0,0 +1,286 @@
"""CSSPageRule implements DOM Level 2 CSS CSSPageRule.
"""
__all__ = ['CSSPageRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: csspagerule.py 1284 2008-06-05 16:29:17Z cthedot $'
import xml.dom
import cssrule
import cssutils
from selectorlist import SelectorList
from cssstyledeclaration import CSSStyleDeclaration
class CSSPageRule(cssrule.CSSRule):
"""
The CSSPageRule interface represents a @page rule within a CSS style
sheet. The @page rule is used to specify the dimensions, orientation,
margins, etc. of a page box for paged media.
Properties
==========
atkeyword (cssutils only)
the literal keyword used
cssText: of type DOMString
The parsable textual representation of this rule
selectorText: of type DOMString
The parsable textual representation of the page selector for the rule.
style: of type CSSStyleDeclaration
The declaration-block of this rule.
Inherits properties from CSSRule
Format
======
::
page
: PAGE_SYM S* pseudo_page? S*
LBRACE S* declaration [ ';' S* declaration ]* '}' S*
;
pseudo_page
: ':' IDENT # :first, :left, :right in CSS 2.1
;
"""
type = property(lambda self: cssrule.CSSRule.PAGE_RULE)
# constant but needed:
wellformed = True
def __init__(self, selectorText=None, style=None, parentRule=None,
parentStyleSheet=None, readonly=False):
"""
if readonly allows setting of properties in constructor only
selectorText
type string
style
CSSStyleDeclaration for this CSSStyleRule
"""
super(CSSPageRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = u'@page'
tempseq = self._tempSeq()
if selectorText:
self.selectorText = selectorText
tempseq.append(self.selectorText, 'selectorText')
else:
self._selectorText = u''
if style:
self.style = style
tempseq.append(self.style, 'style')
else:
self._style = CSSStyleDeclaration(parentRule=self)
self._setSeq(tempseq)
self._readonly = readonly
def __parseSelectorText(self, selectorText):
"""
parses selectorText which may also be a list of tokens
and returns (selectorText, seq)
see _setSelectorText for details
"""
# for closures: must be a mutable
new = {'selector': None, 'wellformed': True}
def _char(expected, seq, token, tokenizer=None):
# pseudo_page, :left, :right or :first
val = self._tokenvalue(token)
if ':' == expected and u':' == val:
try:
identtoken = tokenizer.next()
except StopIteration:
self._log.error(
u'CSSPageRule selectorText: No IDENT found.', token)
else:
ival, ityp = self._tokenvalue(identtoken), self._type(identtoken)
if self._prods.IDENT != ityp:
self._log.error(
u'CSSPageRule selectorText: Expected IDENT but found: %r' %
ival, token)
else:
new['selector'] = val + ival
seq.append(new['selector'], 'selector')
return 'EOF'
return expected
else:
new['wellformed'] = False
self._log.error(
u'CSSPageRule selectorText: Unexpected CHAR: %r' % val, token)
return expected
def S(expected, seq, token, tokenizer=None):
"Does not raise if EOF is found."
return expected
def COMMENT(expected, seq, token, tokenizer=None):
"Does not raise if EOF is found."
seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
return expected
newseq = self._tempSeq()
wellformed, expected = self._parse(expected=':',
seq=newseq, tokenizer=self._tokenize2(selectorText),
productions={'CHAR': _char,
'COMMENT': COMMENT,
'S': S},
new=new)
wellformed = wellformed and new['wellformed']
newselector = new['selector']
# post conditions
if expected == 'ident':
self._log.error(
u'CSSPageRule selectorText: No valid selector: %r' %
self._valuestr(selectorText))
if not newselector in (None, u':first', u':left', u':right'):
self._log.warn(u'CSSPageRule: Unknown CSS 2.1 @page selector: %r' %
newselector, neverraise=True)
return newselector, newseq
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_CSSPageRule(self)
def _setCssText(self, cssText):
"""
DOMException on setting
- SYNTAX_ERR: (self, StyleDeclaration)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- INVALID_MODIFICATION_ERR: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- HIERARCHY_REQUEST_ERR: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSPageRule, self)._setCssText(cssText)
tokenizer = self._tokenize2(cssText)
if self._type(self._nexttoken(tokenizer)) != self._prods.PAGE_SYM:
self._log.error(u'CSSPageRule: No CSSPageRule found: %s' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
wellformed = True
selectortokens, startbrace = self._tokensupto2(tokenizer,
blockstartonly=True,
separateEnd=True)
styletokens, braceorEOFtoken = self._tokensupto2(tokenizer,
blockendonly=True,
separateEnd=True)
nonetoken = self._nexttoken(tokenizer)
if self._tokenvalue(startbrace) != u'{':
wellformed = False
self._log.error(
u'CSSPageRule: No start { of style declaration found: %r' %
self._valuestr(cssText), startbrace)
elif nonetoken:
wellformed = False
self._log.error(
u'CSSPageRule: Trailing content found.', token=nonetoken)
newselector, newselectorseq = self.__parseSelectorText(selectortokens)
newstyle = CSSStyleDeclaration()
val, typ = self._tokenvalue(braceorEOFtoken), self._type(braceorEOFtoken)
if val != u'}' and typ != 'EOF':
wellformed = False
self._log.error(
u'CSSPageRule: No "}" after style declaration found: %r' %
self._valuestr(cssText))
else:
if 'EOF' == typ:
# add again as style needs it
styletokens.append(braceorEOFtoken)
newstyle.cssText = styletokens
if wellformed:
self._selectorText = newselector # already parsed
self.style = newstyle
self._setSeq(newselectorseq) # contains upto style only
cssText = property(_getCssText, _setCssText,
doc="(DOM) The parsable textual representation of the rule.")
def _getSelectorText(self):
"""
wrapper for cssutils Selector object
"""
return self._selectorText
def _setSelectorText(self, selectorText):
"""
wrapper for cssutils Selector object
selector: DOM String
in CSS 2.1 one of
- :first
- :left
- :right
- empty
If WS or Comments are included they are ignored here! Only
way to add a comment is via setting ``cssText``
DOMException on setting
- SYNTAX_ERR:
Raised if the specified CSS string value has a syntax error
and is unparsable.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this rule is readonly.
"""
self._checkReadonly()
# may raise SYNTAX_ERR
newselectortext, newseq = self.__parseSelectorText(selectorText)
if newselectortext:
for i, x in enumerate(self.seq):
if x == self._selectorText:
self.seq[i] = newselectortext
self._selectorText = newselectortext
selectorText = property(_getSelectorText, _setSelectorText,
doc="""(DOM) The parsable textual representation of the page selector for the rule.""")
def _getStyle(self):
return self._style
def _setStyle(self, style):
"""
style
StyleDeclaration or string
"""
self._checkReadonly()
if isinstance(style, basestring):
self._style.cssText = style
else:
# cssText would be serialized with optional preferences
# so use seq!
self._style._seq = style.seq
style = property(_getStyle, _setStyle,
doc="(DOM) The declaration-block of this rule set.")
def __repr__(self):
return "cssutils.css.%s(selectorText=%r, style=%r)" % (
self.__class__.__name__, self.selectorText, self.style.cssText)
def __str__(self):
return "<cssutils.css.%s object selectorText=%r style=%r at 0x%x>" % (
self.__class__.__name__, self.selectorText, self.style.cssText,
id(self))

View File

@ -0,0 +1,349 @@
"""CSS2Properties (partly!) implements DOM Level 2 CSS CSS2Properties used
by CSSStyleDeclaration
TODO: CSS2Properties
If an implementation does implement this interface, it is expected to
understand the specific syntax of the shorthand properties, and apply
their semantics; when the margin property is set, for example, the
marginTop, marginRight, marginBottom and marginLeft properties are
actually being set by the underlying implementation.
When dealing with CSS "shorthand" properties, the shorthand properties
should be decomposed into their component longhand properties as
appropriate, and when querying for their value, the form returned
should be the shortest form exactly equivalent to the declarations made
in the ruleset. However, if there is no shorthand declaration that
could be added to the ruleset without changing in any way the rules
already declared in the ruleset (i.e., by adding longhand rules that
were previously not declared in the ruleset), then the empty string
should be returned for the shorthand property.
For example, querying for the font property should not return
"normal normal normal 14pt/normal Arial, sans-serif", when
"14pt Arial, sans-serif" suffices. (The normals are initial values, and
are implied by use of the longhand property.)
If the values for all the longhand properties that compose a particular
string are the initial values, then a string consisting of all the
initial values should be returned (e.g. a border-width value of
"medium" should be returned as such, not as "").
For some shorthand properties that take missing values from other
sides, such as the margin, padding, and border-[width|style|color]
properties, the minimum number of sides possible should be used; i.e.,
"0px 10px" will be returned instead of "0px 10px 0px 10px".
If the value of a shorthand property can not be decomposed into its
component longhand properties, as is the case for the font property
with a value of "menu", querying for the values of the component
longhand properties should return the empty string.
TODO: CSS2Properties DOMImplementation
The interface found within this section are not mandatory. A DOM
application can use the hasFeature method of the DOMImplementation
interface to determine whether it is supported or not. The feature
string for this extended interface listed in this section is "CSS2"
and the version is "2.0".
cssvalues
=========
contributed by Kevin D. Smith, thanks!
"cssvalues" is used as a property validator.
it is an importable object that contains a dictionary of compiled regular
expressions. The keys of this dictionary are all of the valid CSS property
names. The values are compiled regular expressions that can be used to
validate the values for that property. (Actually, the values are references
to the 'match' method of a compiled regular expression, so that they are
simply called like functions.)
"""
__all__ = ['CSS2Properties', 'cssvalues']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssproperties.py 1116 2008-03-05 13:52:23Z cthedot $'
import re
"""
Define some regular expression fragments that will be used as
macros within the CSS property value regular expressions.
"""
MACROS = {
'ident': r'[-]?{nmstart}{nmchar}*',
'name': r'{nmchar}+',
'nmstart': r'[_a-z]|{nonascii}|{escape}',
'nonascii': r'[^\0-\177]',
'unicode': r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?',
'escape': r'{unicode}|\\[ -~\200-\777]',
# 'escape': r'{unicode}|\\[ -~\200-\4177777]',
'int': r'[-]?\d+',
'nmchar': r'[\w-]|{nonascii}|{escape}',
'num': r'[-]?\d+|[-]?\d*\.\d+',
'number': r'{num}',
'string': r'{string1}|{string2}',
'string1': r'"(\\\"|[^\"])*"',
'string2': r"'(\\\'|[^\'])*'",
'nl': r'\n|\r\n|\r|\f',
'w': r'\s*',
'integer': r'{int}',
'length': r'0|{num}(em|ex|px|in|cm|mm|pt|pc)',
'angle': r'0|{num}(deg|grad|rad)',
'time': r'0|{num}m?s',
'frequency': r'0|{num}k?Hz',
'color': r'(maroon|red|orange|yellow|olive|purple|fuchsia|white|lime|green|navy|blue|aqua|teal|black|silver|gray|ActiveBorder|ActiveCaption|AppWorkspace|Background|ButtonFace|ButtonHighlight|ButtonShadow|ButtonText|CaptionText|GrayText|Highlight|HighlightText|InactiveBorder|InactiveCaption|InactiveCaptionText|InfoBackground|InfoText|Menu|MenuText|Scrollbar|ThreeDDarkShadow|ThreeDFace|ThreeDHighlight|ThreeDLightShadow|ThreeDShadow|Window|WindowFrame|WindowText)|#[0-9a-f]{3}|#[0-9a-f]{6}|rgb\({w}{int}{w},{w}{int}{w},{w}{int}{w}\)|rgb\({w}{num}%{w},{w}{num}%{w},{w}{num}%{w}\)',
'uri': r'url\({w}({string}|(\\\)|[^\)])+){w}\)',
'percentage': r'{num}%',
'border-style': 'none|hidden|dotted|dashed|solid|double|groove|ridge|inset|outset',
'border-color': '{color}',
'border-width': '{length}|thin|medium|thick',
'background-color': r'{color}|transparent|inherit',
'background-image': r'{uri}|none|inherit',
'background-position': r'({percentage}|{length})(\s*({percentage}|{length}))?|((top|center|bottom)\s*(left|center|right))|((left|center|right)\s*(top|center|bottom))|inherit',
'background-repeat': r'repeat|repeat-x|repeat-y|no-repeat|inherit',
'background-attachment': r'scroll|fixed|inherit',
'shape': r'rect\(({w}({length}|auto}){w},){3}{w}({length}|auto){w}\)',
'counter': r'counter\({w}{identifier}{w}(?:,{w}{list-style-type}{w})?\)',
'identifier': r'{ident}',
'family-name': r'{string}|{identifier}',
'generic-family': r'serif|sans-serif|cursive|fantasy|monospace',
'absolute-size': r'(x?x-)?(small|large)|medium',
'relative-size': r'smaller|larger',
'font-family': r'(({family-name}|{generic-family}){w},{w})*({family-name}|{generic-family})|inherit',
'font-size': r'{absolute-size}|{relative-size}|{length}|{percentage}|inherit',
'font-style': r'normal|italic|oblique|inherit',
'font-variant': r'normal|small-caps|inherit',
'font-weight': r'normal|bold|bolder|lighter|[1-9]00|inherit',
'line-height': r'normal|{number}|{length}|{percentage}|inherit',
'list-style-image': r'{uri}|none|inherit',
'list-style-position': r'inside|outside|inherit',
'list-style-type': r'disc|circle|square|decimal|decimal-leading-zero|lower-roman|upper-roman|lower-greek|lower-(latin|alpha)|upper-(latin|alpha)|armenian|georgian|none|inherit',
'margin-width': r'{length}|{percentage}|auto',
'outline-color': r'{color}|invert|inherit',
'outline-style': r'{border-style}|inherit',
'outline-width': r'{border-width}|inherit',
'padding-width': r'{length}|{percentage}',
'specific-voice': r'{identifier}',
'generic-voice': r'male|female|child',
'content': r'{string}|{uri}|{counter}|attr\({w}{identifier}{w}\)|open-quote|close-quote|no-open-quote|no-close-quote',
'border-attrs': r'{border-width}|{border-style}|{border-color}',
'background-attrs': r'{background-color}|{background-image}|{background-repeat}|{background-attachment}|{background-position}',
'list-attrs': r'{list-style-type}|{list-style-position}|{list-style-image}',
'font-attrs': r'{font-style}|{font-variant}|{font-weight}',
'outline-attrs': r'{outline-color}|{outline-style}|{outline-width}',
'text-attrs': r'underline|overline|line-through|blink',
}
"""
Define the regular expressions for validation all CSS values
"""
cssvalues = {
'azimuth': r'{angle}|(behind\s+)?(left-side|far-left|left|center-left|center|center-right|right|far-right|right-side)(\s+behind)?|behind|leftwards|rightwards|inherit',
'background-attachment': r'{background-attachment}',
'background-color': r'{background-color}',
'background-image': r'{background-image}',
'background-position': r'{background-position}',
'background-repeat': r'{background-repeat}',
# Each piece should only be allowed one time
'background': r'{background-attrs}(\s+{background-attrs})*|inherit',
'border-collapse': r'collapse|separate|inherit',
'border-color': r'({border-color}|transparent)(\s+({border-color}|transparent)){0,3}|inherit',
'border-spacing': r'{length}(\s+{length})?|inherit',
'border-style': r'{border-style}(\s+{border-style}){0,3}|inherit',
'border-top': r'{border-attrs}(\s+{border-attrs})*|inherit',
'border-right': r'{border-attrs}(\s+{border-attrs})*|inherit',
'border-bottom': r'{border-attrs}(\s+{border-attrs})*|inherit',
'border-left': r'{border-attrs}(\s+{border-attrs})*|inherit',
'border-top-color': r'{border-color}|transparent|inherit',
'border-right-color': r'{border-color}|transparent|inherit',
'border-bottom-color': r'{border-color}|transparent|inherit',
'border-left-color': r'{border-color}|transparent|inherit',
'border-top-style': r'{border-style}|inherit',
'border-right-style': r'{border-style}|inherit',
'border-bottom-style': r'{border-style}|inherit',
'border-left-style': r'{border-style}|inherit',
'border-top-width': r'{border-width}|inherit',
'border-right-width': r'{border-width}|inherit',
'border-bottom-width': r'{border-width}|inherit',
'border-right-width': r'{border-width}|inherit',
'border-width': r'{border-width}(\s+{border-width}){0,3}|inherit',
'border': r'{border-attrs}(\s+{border-attrs})*|inherit',
'bottom': r'{length}|{percentage}|auto|inherit',
'caption-side': r'top|bottom|inherit',
'clear': r'none|left|right|both|inherit',
'clip': r'{shape}|auto|inherit',
'color': r'{color}|inherit',
'content': r'normal|{content}(\s+{content})*|inherit',
'counter-increment': r'({identifier}(\s+{integer})?)(\s+({identifier}(\s+{integer})))*|none|inherit',
'counter-reset': r'({identifier}(\s+{integer})?)(\s+({identifier}(\s+{integer})))*|none|inherit',
'cue-after': r'{uri}|none|inherit',
'cue-before': r'{uri}|none|inherit',
'cue': r'({uri}|none|inherit){1,2}|inherit',
'cursor': r'((({uri}{w},{w})*)?(auto|crosshair|default|pointer|move|(e|ne|nw|n|se|sw|s|w)-resize|text|wait|help|progress))|inherit',
'direction': r'ltr|rtl|inherit',
'display': r'inline|block|list-item|run-in|inline-block|table|inline-table|table-row-group|table-header-group|table-footer-group|table-row|table-column-group|table-column|table-cell|table-caption|none|inherit',
'elevation': r'{angle}|below|level|above|higher|lower|inherit',
'empty-cells': r'show|hide|inherit',
'float': r'left|right|none|inherit',
'font-family': r'{font-family}',
'font-size': r'{font-size}',
'font-style': r'{font-style}',
'font-variant': r'{font-variant}',
'font-weight': r'{font-weight}',
'font': r'({font-attrs}\s+)*{font-size}({w}/{w}{line-height})?\s+{font-family}|caption|icon|menu|message-box|small-caption|status-bar|inherit',
'height': r'{length}|{percentage}|auto|inherit',
'left': r'{length}|{percentage}|auto|inherit',
'letter-spacing': r'normal|{length}|inherit',
'line-height': r'{line-height}',
'list-style-image': r'{list-style-image}',
'list-style-position': r'{list-style-position}',
'list-style-type': r'{list-style-type}',
'list-style': r'{list-attrs}(\s+{list-attrs})*|inherit',
'margin-right': r'{margin-width}|inherit',
'margin-left': r'{margin-width}|inherit',
'margin-top': r'{margin-width}|inherit',
'margin-bottom': r'{margin-width}|inherit',
'margin': r'{margin-width}(\s+{margin-width}){0,3}|inherit',
'max-height': r'{length}|{percentage}|none|inherit',
'max-width': r'{length}|{percentage}|none|inherit',
'min-height': r'{length}|{percentage}|none|inherit',
'min-width': r'{length}|{percentage}|none|inherit',
'orphans': r'{integer}|inherit',
'outline-color': r'{outline-color}',
'outline-style': r'{outline-style}',
'outline-width': r'{outline-width}',
'outline': r'{outline-attrs}(\s+{outline-attrs})*|inherit',
'overflow': r'visible|hidden|scroll|auto|inherit',
'padding-top': r'{padding-width}|inherit',
'padding-right': r'{padding-width}|inherit',
'padding-bottom': r'{padding-width}|inherit',
'padding-left': r'{padding-width}|inherit',
'padding': r'{padding-width}(\s+{padding-width}){0,3}|inherit',
'page-break-after': r'auto|always|avoid|left|right|inherit',
'page-break-before': r'auto|always|avoid|left|right|inherit',
'page-break-inside': r'avoid|auto|inherit',
'pause-after': r'{time}|{percentage}|inherit',
'pause-before': r'{time}|{percentage}|inherit',
'pause': r'({time}|{percentage}){1,2}|inherit',
'pitch-range': r'{number}|inherit',
'pitch': r'{frequency}|x-low|low|medium|high|x-high|inherit',
'play-during': r'{uri}(\s+(mix|repeat))*|auto|none|inherit',
'position': r'static|relative|absolute|fixed|inherit',
'quotes': r'({string}\s+{string})(\s+{string}\s+{string})*|none|inherit',
'richness': r'{number}|inherit',
'right': r'{length}|{percentage}|auto|inherit',
'speak-header': r'once|always|inherit',
'speak-numeral': r'digits|continuous|inherit',
'speak-punctuation': r'code|none|inherit',
'speak': r'normal|none|spell-out|inherit',
'speech-rate': r'{number}|x-slow|slow|medium|fast|x-fast|faster|slower|inherit',
'stress': r'{number}|inherit',
'table-layout': r'auto|fixed|inherit',
'text-align': r'left|right|center|justify|inherit',
'text-decoration': r'none|{text-attrs}(\s+{text-attrs})*|inherit',
'text-indent': r'{length}|{percentage}|inherit',
'text-transform': r'capitalize|uppercase|lowercase|none|inherit',
'top': r'{length}|{percentage}|auto|inherit',
'unicode-bidi': r'normal|embed|bidi-override|inherit',
'vertical-align': r'baseline|sub|super|top|text-top|middle|bottom|text-bottom|{percentage}|{length}|inherit',
'visibility': r'visible|hidden|collapse|inherit',
'voice-family': r'({specific-voice}|{generic-voice}{w},{w})*({specific-voice}|{generic-voice})|inherit',
'volume': r'{number}|{percentage}|silent|x-soft|soft|medium|loud|x-loud|inherit',
'white-space': r'normal|pre|nowrap|pre-wrap|pre-line|inherit',
'widows': r'{integer}|inherit',
'width': r'{length}|{percentage}|auto|inherit',
'word-spacing': r'normal|{length}|inherit',
'z-index': r'auto|{integer}|inherit',
}
def _expand_macros(tokdict):
""" Expand macros in token dictionary """
def macro_value(m):
return '(?:%s)' % MACROS[m.groupdict()['macro']]
for key, value in tokdict.items():
while re.search(r'{[a-z][a-z0-9-]*}', value):
value = re.sub(r'{(?P<macro>[a-z][a-z0-9-]*)}',
macro_value, value)
tokdict[key] = value
return tokdict
def _compile_regexes(tokdict):
""" Compile all regular expressions into callable objects """
for key, value in tokdict.items():
tokdict[key] = re.compile('^(?:%s)$' % value, re.I).match
return tokdict
_compile_regexes(_expand_macros(cssvalues))
# functions to convert between CSS and DOM name
_reCSStoDOMname = re.compile('-[a-z]', re.I)
def _toDOMname(CSSname):
"""
returns DOMname for given CSSname e.g. for CSSname 'font-style' returns
'fontStyle'
"""
def _doCSStoDOMname2(m): return m.group(0)[1].capitalize()
return _reCSStoDOMname.sub(_doCSStoDOMname2, CSSname)
_reDOMtoCSSname = re.compile('([A-Z])[a-z]+')
def _toCSSname(DOMname):
"""
returns CSSname for given DOMname e.g. for DOMname 'fontStyle' returns
'font-style'
"""
def _doDOMtoCSSname2(m): return '-' + m.group(0).lower()
return _reDOMtoCSSname.sub(_doDOMtoCSSname2, DOMname)
class CSS2Properties(object):
"""
The CSS2Properties interface represents a convenience mechanism
for retrieving and setting properties within a CSSStyleDeclaration.
The attributes of this interface correspond to all the properties
specified in CSS2. Getting an attribute of this interface is
equivalent to calling the getPropertyValue method of the
CSSStyleDeclaration interface. Setting an attribute of this
interface is equivalent to calling the setProperty method of the
CSSStyleDeclaration interface.
cssutils actually also allows usage of ``del`` to remove a CSS property
from a CSSStyleDeclaration.
This is an abstract class, the following functions need to be present
in inheriting class:
- ``_getP``
- ``_setP``
- ``_delP``
"""
# actual properties are set after the class definition!
def _getP(self, CSSname): pass
def _setP(self, CSSname, value): pass
def _delP(self, CSSname): pass
# add list of DOMname properties to CSS2Properties
# used for CSSStyleDeclaration to check if allowed properties
# but somehow doubled, any better way?
CSS2Properties._properties = [_toDOMname(p) for p in cssvalues.keys()]
# add CSS2Properties to CSSStyleDeclaration:
def __named_property_def(DOMname):
"""
closure to keep name known in each properties accessor function
DOMname is converted to CSSname here, so actual calls use CSSname
"""
CSSname = _toCSSname(DOMname)
def _get(self): return self._getP(CSSname)
def _set(self, value): self._setP(CSSname, value)
def _del(self): self._delP(CSSname)
return _get, _set, _del
# add all CSS2Properties to CSSStyleDeclaration
for DOMname in CSS2Properties._properties:
setattr(CSS2Properties, DOMname,
property(*__named_property_def(DOMname)))

View File

@ -0,0 +1,134 @@
"""CSSRule implements DOM Level 2 CSS CSSRule."""
__all__ = ['CSSRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssrule.py 1177 2008-03-20 17:47:23Z cthedot $'
import xml.dom
import cssutils
class CSSRule(cssutils.util.Base2):
"""
Abstract base interface for any type of CSS statement. This includes
both rule sets and at-rules. An implementation is expected to preserve
all rules specified in a CSS style sheet, even if the rule is not
recognized by the parser. Unrecognized rules are represented using the
CSSUnknownRule interface.
Properties
==========
cssText: of type DOMString
The parsable textual representation of the rule. This reflects the
current state of the rule and not its initial value.
parentRule: of type CSSRule, readonly
If this rule is contained inside another rule (e.g. a style rule
inside an @media block), this is the containing rule. If this rule
is not nested inside any other rules, this returns None.
parentStyleSheet: of type CSSStyleSheet, readonly
The style sheet that contains this rule.
type: of type unsigned short, readonly
The type of the rule, as defined above. The expectation is that
binding-specific casting methods can be used to cast down from an
instance of the CSSRule interface to the specific derived interface
implied by the type.
cssutils only
-------------
seq (READONLY):
contains sequence of parts of the rule including comments but
excluding @KEYWORD and braces
typeString: string
A string name of the type of this rule, e.g. 'STYLE_RULE'. Mainly
useful for debugging
wellformed:
if a rule is valid
"""
"""
CSSRule type constants.
An integer indicating which type of rule this is.
"""
COMMENT = -1 # cssutils only
UNKNOWN_RULE = 0 #u
STYLE_RULE = 1 #s
CHARSET_RULE = 2 #c
IMPORT_RULE = 3 #i
MEDIA_RULE = 4 #m
FONT_FACE_RULE = 5 #f
PAGE_RULE = 6 #p
NAMESPACE_RULE = 7 # CSSOM
_typestrings = ['UNKNOWN_RULE', 'STYLE_RULE', 'CHARSET_RULE', 'IMPORT_RULE',
'MEDIA_RULE', 'FONT_FACE_RULE', 'PAGE_RULE', 'NAMESPACE_RULE',
'COMMENT']
type = UNKNOWN_RULE
"""
The type of this rule, as defined by a CSSRule type constant.
Overwritten in derived classes.
The expectation is that binding-specific casting methods can be used to
cast down from an instance of the CSSRule interface to the specific
derived interface implied by the type.
(Casting not for this Python implementation I guess...)
"""
def __init__(self, parentRule=None, parentStyleSheet=None, readonly=False):
"""
set common attributes for all rules
"""
super(CSSRule, self).__init__()
self._parentRule = parentRule
self._parentStyleSheet = parentStyleSheet
self._setSeq(self._tempSeq())
# must be set after initialization of #inheriting rule is done
self._readonly = False
def _setCssText(self, cssText):
"""
DOMException on setting
- SYNTAX_ERR:
Raised if the specified CSS string value has a syntax error and
is unparsable.
- INVALID_MODIFICATION_ERR:
Raised if the specified CSS string value represents a different
type of rule than the current one.
- HIERARCHY_REQUEST_ERR:
Raised if the rule cannot be inserted at this point in the
style sheet.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if the rule is readonly.
"""
self._checkReadonly()
cssText = property(lambda self: u'', _setCssText,
doc="""(DOM) The parsable textual representation of the rule. This
reflects the current state of the rule and not its initial value.
The initial value is saved, but this may be removed in a future
version!
MUST BE OVERWRITTEN IN SUBCLASS TO WORK!""")
def _setAtkeyword(self, akw):
"""checks if new keyword is normalized same as old"""
if not self.atkeyword or (self._normalize(akw) ==
self._normalize(self.atkeyword)):
self._atkeyword = akw
else:
self._log.error(u'%s: Invalid atkeyword for this rule: %r' %
(self._normalize(self.atkeyword), akw),
error=xml.dom.InvalidModificationErr)
atkeyword = property(lambda self: self._atkeyword, _setAtkeyword,
doc=u"@keyword for @rules")
parentRule = property(lambda self: self._parentRule,
doc=u"READONLY")
parentStyleSheet = property(lambda self: self._parentStyleSheet,
doc=u"READONLY")
wellformed = property(lambda self: False,
doc=u"READONLY")
typeString = property(lambda self: CSSRule._typestrings[self.type],
doc="Name of this rules type.")

View File

@ -0,0 +1,60 @@
"""
CSSRuleList implements DOM Level 2 CSS CSSRuleList.
Partly also
* http://dev.w3.org/csswg/cssom/#the-cssrulelist
"""
__all__ = ['CSSRuleList']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssrulelist.py 1116 2008-03-05 13:52:23Z cthedot $'
class CSSRuleList(list):
"""
The CSSRuleList object represents an (ordered) list of statements.
The items in the CSSRuleList are accessible via an integral index,
starting from 0.
Subclasses a standard Python list so theoretically all standard list
methods are available. Setting methods like ``__init__``, ``append``,
``extend`` or ``__setslice__`` are added later on instances of this
class if so desired.
E.g. CSSStyleSheet adds ``append`` which is not available in a simple
instance of this class!
Properties
==========
length: of type unsigned long, readonly
The number of CSSRules in the list. The range of valid child rule
indices is 0 to length-1 inclusive.
"""
def __init__(self, *ignored):
"nothing is set as this must also be defined later"
pass
def __notimplemented(self, *ignored):
"no direct setting possible"
raise NotImplementedError(
'Must be implemented by class using an instance of this class.')
append = extend = __setitem__ = __setslice__ = __notimplemented
def item(self, index):
"""
(DOM)
Used to retrieve a CSS rule by ordinal index. The order in this
collection represents the order of the rules in the CSS style
sheet. If index is greater than or equal to the number of rules in
the list, this returns None.
Returns CSSRule, the style rule at the index position in the
CSSRuleList, or None if that is not a valid index.
"""
try:
return self[index]
except IndexError:
return None
length = property(lambda self: len(self),
doc="(DOM) The number of CSSRules in the list.")

View File

@ -0,0 +1,651 @@
"""CSSStyleDeclaration implements DOM Level 2 CSS CSSStyleDeclaration and
extends CSS2Properties
see
http://www.w3.org/TR/1998/REC-CSS2-19980512/syndata.html#parsing-errors
Unknown properties
------------------
User agents must ignore a declaration with an unknown property.
For example, if the style sheet is::
H1 { color: red; rotation: 70minutes }
the user agent will treat this as if the style sheet had been::
H1 { color: red }
Cssutils gives a message about any unknown properties but
keeps any property (if syntactically correct).
Illegal values
--------------
User agents must ignore a declaration with an illegal value. For example::
IMG { float: left } /* correct CSS2 */
IMG { float: left here } /* "here" is not a value of 'float' */
IMG { background: "red" } /* keywords cannot be quoted in CSS2 */
IMG { border-width: 3 } /* a unit must be specified for length values */
A CSS2 parser would honor the first rule and ignore the rest, as if the
style sheet had been::
IMG { float: left }
IMG { }
IMG { }
IMG { }
Cssutils again will issue a message (WARNING in this case) about invalid
CSS2 property values.
TODO:
This interface is also used to provide a read-only access to the
computed values of an element. See also the ViewCSS interface.
- return computed values and not literal values
- simplify unit pairs/triples/quadruples
2px 2px 2px 2px -> 2px for border/padding...
- normalize compound properties like:
background: no-repeat left url() #fff
-> background: #fff url() no-repeat left
"""
__all__ = ['CSSStyleDeclaration', 'Property']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssstyledeclaration.py 1284 2008-06-05 16:29:17Z cthedot $'
import xml.dom
import cssutils
from cssproperties import CSS2Properties
from property import Property
class CSSStyleDeclaration(CSS2Properties, cssutils.util.Base2):
"""
The CSSStyleDeclaration class represents a single CSS declaration
block. This class may be used to determine the style properties
currently set in a block or to set style properties explicitly
within the block.
While an implementation may not recognize all CSS properties within
a CSS declaration block, it is expected to provide access to all
specified properties in the style sheet through the
CSSStyleDeclaration interface.
Furthermore, implementations that support a specific level of CSS
should correctly handle CSS shorthand properties for that level. For
a further discussion of shorthand properties, see the CSS2Properties
interface.
Additionally the CSS2Properties interface is implemented.
Properties
==========
cssText
The parsable textual representation of the declaration block
(excluding the surrounding curly braces). Setting this attribute
will result in the parsing of the new value and resetting of the
properties in the declaration block. It also allows the insertion
of additional properties and their values into the block.
length: of type unsigned long, readonly
The number of properties that have been explicitly set in this
declaration block. The range of valid indices is 0 to length-1
inclusive.
parentRule: of type CSSRule, readonly
The CSS rule that contains this declaration block or None if this
CSSStyleDeclaration is not attached to a CSSRule.
seq: a list (cssutils)
All parts of this style declaration including CSSComments
$css2propertyname
All properties defined in the CSS2Properties class are available
as direct properties of CSSStyleDeclaration with their respective
DOM name, so e.g. ``fontStyle`` for property 'font-style'.
These may be used as::
>>> style = CSSStyleDeclaration(cssText='color: red')
>>> style.color = 'green'
>>> print style.color
green
>>> del style.color
>>> print style.color # print empty string
Format
======
[Property: Value Priority?;]* [Property: Value Priority?]?
"""
def __init__(self, cssText=u'', parentRule=None, readonly=False):
"""
cssText
Shortcut, sets CSSStyleDeclaration.cssText
parentRule
The CSS rule that contains this declaration block or
None if this CSSStyleDeclaration is not attached to a CSSRule.
readonly
defaults to False
"""
super(CSSStyleDeclaration, self).__init__()
self._parentRule = parentRule
#self._seq = self._tempSeq()
self.cssText = cssText
self._readonly = readonly
def __contains__(self, nameOrProperty):
"""
checks if a property (or a property with given name is in style
name
a string or Property, uses normalized name and not literalname
"""
if isinstance(nameOrProperty, Property):
name = nameOrProperty.name
else:
name = self._normalize(nameOrProperty)
return name in self.__nnames()
def __iter__(self):
"""
iterator of set Property objects with different normalized names.
"""
def properties():
for name in self.__nnames():
yield self.getProperty(name)
return properties()
def __setattr__(self, n, v):
"""
Prevent setting of unknown properties on CSSStyleDeclaration
which would not work anyway. For these
``CSSStyleDeclaration.setProperty`` MUST be called explicitly!
TODO:
implementation of known is not really nice, any alternative?
"""
known = ['_tokenizer', '_log', '_ttypes',
'_seq', 'seq', 'parentRule', '_parentRule', 'cssText',
'valid', 'wellformed',
'_readonly']
known.extend(CSS2Properties._properties)
if n in known:
super(CSSStyleDeclaration, self).__setattr__(n, v)
else:
raise AttributeError(
'Unknown CSS Property, ``CSSStyleDeclaration.setProperty("%s", ...)`` MUST be used.'
% n)
def __nnames(self):
"""
returns iterator for all different names in order as set
if names are set twice the last one is used (double reverse!)
"""
names = []
for item in reversed(self.seq):
val = item.value
if isinstance(val, Property) and not val.name in names:
names.append(val.name)
return reversed(names)
def __getitem__(self, CSSName):
"""Retrieve the value of property ``CSSName`` from this declaration.
``CSSName`` will be always normalized.
"""
return self.getPropertyValue(CSSName)
def __setitem__(self, CSSName, value):
"""Set value of property ``CSSName``. ``value`` may also be a tuple of
(value, priority), e.g. style['color'] = ('red', 'important')
``CSSName`` will be always normalized.
"""
priority = None
if type(value) == tuple:
value, priority = value
return self.setProperty(CSSName, value, priority)
def __delitem__(self, CSSName):
"""Delete property ``CSSName`` from this declaration.
If property is not in this declaration return u'' just like
removeProperty.
``CSSName`` will be always normalized.
"""
return self.removeProperty(CSSName)
# overwritten accessor functions for CSS2Properties' properties
def _getP(self, CSSName):
"""
(DOM CSS2Properties)
Overwritten here and effectively the same as
``self.getPropertyValue(CSSname)``.
Parameter is in CSSname format ('font-style'), see CSS2Properties.
Example::
>>> style = CSSStyleDeclaration(cssText='font-style:italic;')
>>> print style.fontStyle
italic
"""
return self.getPropertyValue(CSSName)
def _setP(self, CSSName, value):
"""
(DOM CSS2Properties)
Overwritten here and effectively the same as
``self.setProperty(CSSname, value)``.
Only known CSS2Properties may be set this way, otherwise an
AttributeError is raised.
For these unknown properties ``setPropertyValue(CSSname, value)``
has to be called explicitly.
Also setting the priority of properties needs to be done with a
call like ``setPropertyValue(CSSname, value, priority)``.
Example::
>>> style = CSSStyleDeclaration()
>>> style.fontStyle = 'italic'
>>> # or
>>> style.setProperty('font-style', 'italic', '!important')
"""
self.setProperty(CSSName, value)
# TODO: Shorthand ones
def _delP(self, CSSName):
"""
(cssutils only)
Overwritten here and effectively the same as
``self.removeProperty(CSSname)``.
Example::
>>> style = CSSStyleDeclaration(cssText='font-style:italic;')
>>> del style.fontStyle
>>> print style.fontStyle # prints u''
"""
self.removeProperty(CSSName)
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_css_CSSStyleDeclaration(self)
def _setCssText(self, cssText):
"""
Setting this attribute will result in the parsing of the new value
and resetting of all the properties in the declaration block
including the removal or addition of properties.
DOMException on setting
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this declaration is readonly or a property is readonly.
- SYNTAX_ERR: (self)
Raised if the specified CSS string value has a syntax error and
is unparsable.
"""
self._checkReadonly()
tokenizer = self._tokenize2(cssText)
# for closures: must be a mutable
new = {'wellformed': True}
def ident(expected, seq, token, tokenizer=None):
# a property
tokens = self._tokensupto2(tokenizer, starttoken=token,
semicolon=True)
if self._tokenvalue(tokens[-1]) == u';':
tokens.pop()
property = Property()
property.cssText = tokens
if property.wellformed:
seq.append(property, 'Property')
else:
self._log.error(u'CSSStyleDeclaration: Syntax Error in Property: %s'
% self._valuestr(tokens))
# does not matter in this case
return expected
def unexpected(expected, seq, token, tokenizer=None):
# error, find next ; or } to omit upto next property
ignored = self._tokenvalue(token) + self._valuestr(
self._tokensupto2(tokenizer, propertyvalueendonly=True))
self._log.error(u'CSSStyleDeclaration: Unexpected token, ignoring upto %r.' %
ignored,token)
# does not matter in this case
return expected
# [Property: Value;]* Property: Value?
newseq = self._tempSeq()
wellformed, expected = self._parse(expected=None,
seq=newseq, tokenizer=tokenizer,
productions={'IDENT': ident},#, 'CHAR': char},
default=unexpected)
# wellformed set by parse
# post conditions
# do not check wellformed as invalid things are removed anyway
#if wellformed:
self._setSeq(newseq)
cssText = property(_getCssText, _setCssText,
doc="(DOM) A parsable textual representation of the declaration\
block excluding the surrounding curly braces.")
def getCssText(self, separator=None):
"""
returns serialized property cssText, each property separated by
given ``separator`` which may e.g. be u'' to be able to use
cssText directly in an HTML style attribute. ";" is always part of
each property (except the last one) and can **not** be set with
separator!
"""
return cssutils.ser.do_css_CSSStyleDeclaration(self, separator)
def _getParentRule(self):
return self._parentRule
def _setParentRule(self, parentRule):
self._parentRule = parentRule
parentRule = property(_getParentRule, _setParentRule,
doc="(DOM) The CSS rule that contains this declaration block or\
None if this CSSStyleDeclaration is not attached to a CSSRule.")
def getProperties(self, name=None, all=False):
"""
Returns a list of Property objects set in this declaration.
name
optional name of properties which are requested (a filter).
Only properties with this **always normalized** name are returned.
all=False
if False (DEFAULT) only the effective properties (the ones set
last) are returned. If name is given a list with only one property
is returned.
if True all properties including properties set multiple times with
different values or priorities for different UAs are returned.
The order of the properties is fully kept as in the original
stylesheet.
"""
if name and not all:
# single prop but list
p = self.getProperty(name)
if p:
return [p]
else:
return []
elif not all:
# effective Properties in name order
return [self.getProperty(name)for name in self.__nnames()]
else:
# all properties or all with this name
nname = self._normalize(name)
properties = []
for item in self.seq:
val = item.value
if isinstance(val, Property) and (
(bool(nname) == False) or (val.name == nname)):
properties.append(val)
return properties
def getProperty(self, name, normalize=True):
"""
Returns the effective Property object.
name
of the CSS property, always lowercase (even if not normalized)
normalize
if True (DEFAULT) name will be normalized (lowercase, no simple
escapes) so "color", "COLOR" or "C\olor" will all be equivalent
If False may return **NOT** the effective value but the effective
for the unnormalized name.
"""
nname = self._normalize(name)
found = None
for item in reversed(self.seq):
val = item.value
if isinstance(val, Property):
if (normalize and nname == val.name) or name == val.literalname:
if val.priority:
return val
elif not found:
found = val
return found
def getPropertyCSSValue(self, name, normalize=True):
"""
Returns CSSValue, the value of the effective property if it has been
explicitly set for this declaration block.
name
of the CSS property, always lowercase (even if not normalized)
normalize
if True (DEFAULT) name will be normalized (lowercase, no simple
escapes) so "color", "COLOR" or "C\olor" will all be equivalent
If False may return **NOT** the effective value but the effective
for the unnormalized name.
(DOM)
Used to retrieve the object representation of the value of a CSS
property if it has been explicitly set within this declaration
block. Returns None if the property has not been set.
(This method returns None if the property is a shorthand
property. Shorthand property values can only be accessed and
modified as strings, using the getPropertyValue and setProperty
methods.)
**cssutils currently always returns a CSSValue if the property is
set.**
for more on shorthand properties see
http://www.dustindiaz.com/css-shorthand/
"""
nname = self._normalize(name)
if nname in self._SHORTHANDPROPERTIES:
self._log.info(
u'CSSValue for shorthand property "%s" should be None, this may be implemented later.' %
nname, neverraise=True)
p = self.getProperty(name, normalize)
if p:
return p.cssValue
else:
return None
def getPropertyValue(self, name, normalize=True):
"""
Returns the value of the effective property if it has been explicitly
set for this declaration block. Returns the empty string if the
property has not been set.
name
of the CSS property, always lowercase (even if not normalized)
normalize
if True (DEFAULT) name will be normalized (lowercase, no simple
escapes) so "color", "COLOR" or "C\olor" will all be equivalent
If False may return **NOT** the effective value but the effective
for the unnormalized name.
"""
p = self.getProperty(name, normalize)
if p:
return p.value
else:
return u''
def getPropertyPriority(self, name, normalize=True):
"""
Returns the priority of the effective CSS property (e.g. the
"important" qualifier) if the property has been explicitly set in
this declaration block. The empty string if none exists.
name
of the CSS property, always lowercase (even if not normalized)
normalize
if True (DEFAULT) name will be normalized (lowercase, no simple
escapes) so "color", "COLOR" or "C\olor" will all be equivalent
If False may return **NOT** the effective value but the effective
for the unnormalized name.
"""
p = self.getProperty(name, normalize)
if p:
return p.priority
else:
return u''
def removeProperty(self, name, normalize=True):
"""
(DOM)
Used to remove a CSS property if it has been explicitly set within
this declaration block.
Returns the value of the property if it has been explicitly set for
this declaration block. Returns the empty string if the property
has not been set or the property name does not correspond to a
known CSS property
name
of the CSS property
normalize
if True (DEFAULT) name will be normalized (lowercase, no simple
escapes) so "color", "COLOR" or "C\olor" will all be equivalent.
The effective Property value is returned and *all* Properties
with ``Property.name == name`` are removed.
If False may return **NOT** the effective value but the effective
for the unnormalized ``name`` only. Also only the Properties with
the literal name ``name`` are removed.
raises DOMException
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this declaration is readonly or the property is
readonly.
"""
self._checkReadonly()
r = self.getPropertyValue(name, normalize=normalize)
newseq = self._tempSeq()
if normalize:
# remove all properties with name == nname
nname = self._normalize(name)
for item in self.seq:
if not (isinstance(item.value, Property) and item.value.name == nname):
newseq.appendItem(item)
else:
# remove all properties with literalname == name
for item in self.seq:
if not (isinstance(item.value, Property) and item.value.literalname == name):
newseq.appendItem(item)
self._setSeq(newseq)
return r
def setProperty(self, name, value=None, priority=u'', normalize=True):
"""
(DOM)
Used to set a property value and priority within this declaration
block.
name
of the CSS property to set (in W3C DOM the parameter is called
"propertyName"), always lowercase (even if not normalized)
If a property with this name is present it will be reset
cssutils also allowed name to be a Property object, all other
parameter are ignored in this case
value
the new value of the property, omit if name is already a Property
priority
the optional priority of the property (e.g. "important")
normalize
if True (DEFAULT) name will be normalized (lowercase, no simple
escapes) so "color", "COLOR" or "C\olor" will all be equivalent
DOMException on setting
- SYNTAX_ERR: (self)
Raised if the specified value has a syntax error and is
unparsable.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this declaration is readonly or the property is
readonly.
"""
self._checkReadonly()
if isinstance(name, Property):
newp = name
name = newp.literalname
else:
newp = Property(name, value, priority)
if not newp.wellformed:
self._log.warn(u'Invalid Property: %s: %s %s'
% (name, value, priority))
else:
nname = self._normalize(name)
properties = self.getProperties(name, all=(not normalize))
for property in reversed(properties):
if normalize and property.name == nname:
property.cssValue = newp.cssValue.cssText
property.priority = newp.priority
break
elif property.literalname == name:
property.cssValue = newp.cssValue.cssText
property.priority = newp.priority
break
else:
self.seq._readonly = False
self.seq.append(newp, 'Property')
self.seq._readonly = True
def item(self, index):
"""
(DOM)
Used to retrieve the properties that have been explicitly set in
this declaration block. The order of the properties retrieved using
this method does not have to be the order in which they were set.
This method can be used to iterate over all properties in this
declaration block.
index
of the property to retrieve, negative values behave like
negative indexes on Python lists, so -1 is the last element
returns the name of the property at this ordinal position. The
empty string if no property exists at this position.
ATTENTION:
Only properties with a different name are counted. If two
properties with the same name are present in this declaration
only the effective one is included.
``item()`` and ``length`` work on the same set here.
"""
names = list(self.__nnames())
try:
return names[index]
except IndexError:
return u''
length = property(lambda self: len(self.__nnames()),
doc="(DOM) The number of distinct properties that have been explicitly\
in this declaration block. The range of valid indices is 0 to\
length-1 inclusive. These are properties with a different ``name``\
only. ``item()`` and ``length`` work on the same set here.")
def __repr__(self):
return "cssutils.css.%s(cssText=%r)" % (
self.__class__.__name__, self.getCssText(separator=u' '))
def __str__(self):
return "<cssutils.css.%s object length=%r (all: %r) at 0x%x>" % (
self.__class__.__name__, self.length,
len(self.getProperties(all=True)), id(self))

View File

@ -0,0 +1,242 @@
"""CSSStyleRule implements DOM Level 2 CSS CSSStyleRule.
"""
__all__ = ['CSSStyleRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssstylerule.py 1284 2008-06-05 16:29:17Z cthedot $'
import xml.dom
import cssrule
import cssutils
from selectorlist import SelectorList
from cssstyledeclaration import CSSStyleDeclaration
class CSSStyleRule(cssrule.CSSRule):
"""
The CSSStyleRule object represents a ruleset specified (if any) in a CSS
style sheet. It provides access to a declaration block as well as to the
associated group of selectors.
Properties
==========
selectorList: of type SelectorList (cssutils only)
A list of all Selector elements for the rule set.
selectorText: of type DOMString
The textual representation of the selector for the rule set. The
implementation may have stripped out insignificant whitespace while
parsing the selector.
style: of type CSSStyleDeclaration, (DOM)
The declaration-block of this rule set.
type
the type of this rule, constant cssutils.CSSRule.STYLE_RULE
inherited properties:
- cssText
- parentRule
- parentStyleSheet
Format
======
ruleset::
: selector [ COMMA S* selector ]*
LBRACE S* declaration [ ';' S* declaration ]* '}' S*
;
"""
type = property(lambda self: cssrule.CSSRule.STYLE_RULE)
def __init__(self, selectorText=None, style=None, parentRule=None,
parentStyleSheet=None, readonly=False):
"""
:Parameters:
selectorText
string parsed into selectorList
style
string parsed into CSSStyleDeclaration for this CSSStyleRule
readonly
if True allows setting of properties in constructor only
"""
super(CSSStyleRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._selectorList = SelectorList(parentRule=self)
self._style = CSSStyleDeclaration(parentRule=self)
if selectorText:
self.selectorText = selectorText
if style:
self.style = style
self._readonly = readonly
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_CSSStyleRule(self)
def _setCssText(self, cssText):
"""
:param cssText:
a parseable string or a tuple of (cssText, dict-of-namespaces)
:Exceptions:
- `NAMESPACE_ERR`: (Selector)
Raised if the specified selector uses an unknown namespace
prefix.
- `SYNTAX_ERR`: (self, StyleDeclaration, etc)
Raised if the specified CSS string value has a syntax error and
is unparsable.
- `INVALID_MODIFICATION_ERR`: (self)
Raised if the specified CSS string value represents a different
type of rule than the current one.
- `HIERARCHY_REQUEST_ERR`: (CSSStylesheet)
Raised if the rule cannot be inserted at this point in the
style sheet.
- `NO_MODIFICATION_ALLOWED_ERR`: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSStyleRule, self)._setCssText(cssText)
# might be (cssText, namespaces)
cssText, namespaces = self._splitNamespacesOff(cssText)
try:
# use parent style sheet ones if available
namespaces = self.parentStyleSheet.namespaces
except AttributeError:
pass
tokenizer = self._tokenize2(cssText)
selectortokens = self._tokensupto2(tokenizer, blockstartonly=True)
styletokens = self._tokensupto2(tokenizer, blockendonly=True)
trail = self._nexttoken(tokenizer)
if trail:
self._log.error(u'CSSStyleRule: Trailing content: %s' %
self._valuestr(cssText), token=trail)
elif not selectortokens:
self._log.error(u'CSSStyleRule: No selector found: %r' %
self._valuestr(cssText))
elif self._tokenvalue(selectortokens[0]).startswith(u'@'):
self._log.error(u'CSSStyleRule: No style rule: %r' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
wellformed = True
bracetoken = selectortokens.pop()
if self._tokenvalue(bracetoken) != u'{':
wellformed = False
self._log.error(
u'CSSStyleRule: No start { of style declaration found: %r' %
self._valuestr(cssText), bracetoken)
elif not selectortokens:
wellformed = False
self._log.error(u'CSSStyleRule: No selector found: %r.' %
self._valuestr(cssText), bracetoken)
newselectorlist = SelectorList(selectorText=(selectortokens,
namespaces),
parentRule=self)
newstyle = CSSStyleDeclaration()
if not styletokens:
wellformed = False
self._log.error(
u'CSSStyleRule: No style declaration or "}" found: %r' %
self._valuestr(cssText))
else:
braceorEOFtoken = styletokens.pop()
val, typ = self._tokenvalue(braceorEOFtoken), self._type(braceorEOFtoken)
if val != u'}' and typ != 'EOF':
wellformed = False
self._log.error(
u'CSSStyleRule: No "}" after style declaration found: %r' %
self._valuestr(cssText))
else:
if 'EOF' == typ:
# add again as style needs it
styletokens.append(braceorEOFtoken)
newstyle.cssText = styletokens
if wellformed:
self._selectorList = newselectorlist
self.style = newstyle
cssText = property(_getCssText, _setCssText,
doc="(DOM) The parsable textual representation of the rule.")
def __getNamespaces(self):
"uses children namespaces if not attached to a sheet, else the sheet's ones"
try:
return self.parentStyleSheet.namespaces
except AttributeError:
return self.selectorList._namespaces
_namespaces = property(__getNamespaces, doc=u"""if this Rule is
attached to a CSSStyleSheet the namespaces of that sheet are mirrored
here. While the Rule is not attached the namespaces of selectorList
are used.""")
def _setSelectorList(self, selectorList):
"""
:param selectorList: selectorList, only content is used, not the actual
object
"""
self._checkReadonly()
self.selectorText = selectorList.selectorText
selectorList = property(lambda self: self._selectorList, _setSelectorList,
doc="The SelectorList of this rule.")
def _setSelectorText(self, selectorText):
"""
wrapper for cssutils SelectorList object
:param selectorText: of type string, might also be a comma separated list
of selectors
:Exceptions:
- `NAMESPACE_ERR`: (Selector)
Raised if the specified selector uses an unknown namespace
prefix.
- `SYNTAX_ERR`: (SelectorList, Selector)
Raised if the specified CSS string value has a syntax error
and is unparsable.
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if this rule is readonly.
"""
self._checkReadonly()
self._selectorList.selectorText = selectorText
selectorText = property(lambda self: self._selectorList.selectorText,
_setSelectorText,
doc="""(DOM) The textual representation of the selector for the
rule set.""")
def _setStyle(self, style):
"""
:param style: CSSStyleDeclaration or string, only the cssText of a
declaration is used, not the actual object
"""
self._checkReadonly()
if isinstance(style, basestring):
self._style.cssText = style
else:
# cssText would be serialized with optional preferences
# so use _seq!
self._style._seq = style._seq
style = property(lambda self: self._style, _setStyle,
doc="(DOM) The declaration-block of this rule set.")
wellformed = property(lambda self: self.selectorList.wellformed)
def __repr__(self):
if self._namespaces:
st = (self.selectorText, self._namespaces)
else:
st = self.selectorText
return "cssutils.css.%s(selectorText=%r, style=%r)" % (
self.__class__.__name__, st, self.style.cssText)
def __str__(self):
return "<cssutils.css.%s object selector=%r style=%r _namespaces=%r at 0x%x>" % (
self.__class__.__name__, self.selectorText, self.style.cssText,
self._namespaces, id(self))

View File

@ -0,0 +1,674 @@
"""
CSSStyleSheet implements DOM Level 2 CSS CSSStyleSheet.
Partly also:
- http://dev.w3.org/csswg/cssom/#the-cssstylesheet
- http://www.w3.org/TR/2006/WD-css3-namespace-20060828/
TODO:
- ownerRule and ownerNode
"""
__all__ = ['CSSStyleSheet']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssstylesheet.py 1429 2008-08-11 19:01:52Z cthedot $'
import xml.dom
import cssutils.stylesheets
from cssutils.util import _Namespaces, _SimpleNamespaces, _readUrl
from cssutils.helper import Deprecated
class CSSStyleSheet(cssutils.stylesheets.StyleSheet):
"""
The CSSStyleSheet interface represents a CSS style sheet.
Properties
==========
CSSOM
-----
cssRules
of type CSSRuleList, (DOM readonly)
encoding
reflects the encoding of an @charset rule or 'utf-8' (default)
if set to ``None``
ownerRule
of type CSSRule, readonly. If this sheet is imported this is a ref
to the @import rule that imports it.
Inherits properties from stylesheet.StyleSheet
cssutils
--------
cssText: string
a textual representation of the stylesheet
namespaces
reflects set @namespace rules of this rule.
A dict of {prefix: namespaceURI} mapping.
Format
======
stylesheet
: [ CHARSET_SYM S* STRING S* ';' ]?
[S|CDO|CDC]* [ import [S|CDO|CDC]* ]*
[ namespace [S|CDO|CDC]* ]* # according to @namespace WD
[ [ ruleset | media | page ] [S|CDO|CDC]* ]*
"""
def __init__(self, href=None, media=None, title=u'', disabled=None,
ownerNode=None, parentStyleSheet=None, readonly=False,
ownerRule=None):
"""
init parameters are the same as for stylesheets.StyleSheet
"""
super(CSSStyleSheet, self).__init__(
'text/css', href, media, title, disabled,
ownerNode, parentStyleSheet)
self._ownerRule = ownerRule
self.cssRules = cssutils.css.CSSRuleList()
self.cssRules.append = self.insertRule
self.cssRules.extend = self.insertRule
self._namespaces = _Namespaces(parentStyleSheet=self, log=self._log)
self._readonly = readonly
# used only during setting cssText by parse*()
self.__encodingOverride = None
self._fetcher = None
def __iter__(self):
"generator which iterates over cssRules."
for rule in self.cssRules:
yield rule
def _cleanNamespaces(self):
"removes all namespace rules with same namespaceURI but last one set"
rules = self.cssRules
namespaceitems = self.namespaces.items()
i = 0
while i < len(rules):
rule = rules[i]
if rule.type == rule.NAMESPACE_RULE and \
(rule.prefix, rule.namespaceURI) not in namespaceitems:
self.deleteRule(i)
else:
i += 1
def _getUsedURIs(self):
"returns set of URIs used in the sheet"
useduris = set()
for r1 in self:
if r1.STYLE_RULE == r1.type:
useduris.update(r1.selectorList._getUsedUris())
elif r1.MEDIA_RULE == r1.type:
for r2 in r1:
if r2.type == r2.STYLE_RULE:
useduris.update(r2.selectorList._getUsedUris())
return useduris
def _getCssText(self):
return cssutils.ser.do_CSSStyleSheet(self)
def _setCssText(self, cssText):
"""
(cssutils)
Parses ``cssText`` and overwrites the whole stylesheet.
:param cssText:
a parseable string or a tuple of (cssText, dict-of-namespaces)
:Exceptions:
- `NAMESPACE_ERR`:
If a namespace prefix is found which is not declared.
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if the rule is readonly.
- `SYNTAX_ERR`:
Raised if the specified CSS string value has a syntax error and
is unparsable.
"""
self._checkReadonly()
cssText, namespaces = self._splitNamespacesOff(cssText)
if not namespaces:
namespaces = _SimpleNamespaces(log=self._log)
tokenizer = self._tokenize2(cssText)
newseq = [] #cssutils.css.CSSRuleList()
# for closures: must be a mutable
new = {'encoding': None, # needed for setting encoding of @import rules
'namespaces': namespaces}
def S(expected, seq, token, tokenizer=None):
# @charset must be at absolute beginning of style sheet
if expected == 0:
return 1
else:
return expected
def COMMENT(expected, seq, token, tokenizer=None):
"special: sets parent*"
comment = cssutils.css.CSSComment([token],
parentStyleSheet=self.parentStyleSheet)
seq.append(comment)
return expected
def charsetrule(expected, seq, token, tokenizer):
rule = cssutils.css.CSSCharsetRule(parentStyleSheet=self)
rule.cssText = self._tokensupto2(tokenizer, token)
if expected > 0 or len(seq) > 0:
self._log.error(
u'CSSStylesheet: CSSCharsetRule only allowed at beginning of stylesheet.',
token, xml.dom.HierarchyRequestErr)
else:
if rule.wellformed:
seq.append(rule)
new['encoding'] = rule.encoding
return 1
def importrule(expected, seq, token, tokenizer):
if new['encoding']:
# set temporarily as used by _resolveImport
# save newEncoding which have been set by resolveImport
self.__newEncoding = new['encoding']
rule = cssutils.css.CSSImportRule(parentStyleSheet=self)
rule.cssText = self._tokensupto2(tokenizer, token)
if expected > 1:
self._log.error(
u'CSSStylesheet: CSSImportRule not allowed here.',
token, xml.dom.HierarchyRequestErr)
else:
if rule.wellformed:
#del rule._parentEncoding # remove as later it is read from this sheet!
seq.append(rule)
try:
# remove as only used temporarily but may not be set at all
del self.__newEncoding
except AttributeError, e:
pass
return 1
def namespacerule(expected, seq, token, tokenizer):
rule = cssutils.css.CSSNamespaceRule(
cssText=self._tokensupto2(tokenizer, token),
parentStyleSheet=self)
if expected > 2:
self._log.error(
u'CSSStylesheet: CSSNamespaceRule not allowed here.',
token, xml.dom.HierarchyRequestErr)
else:
if rule.wellformed:
seq.append(rule)
# temporary namespaces given to CSSStyleRule and @media
new['namespaces'][rule.prefix] = rule.namespaceURI
return 2
def fontfacerule(expected, seq, token, tokenizer):
rule = cssutils.css.CSSFontFaceRule(parentStyleSheet=self)
rule.cssText = self._tokensupto2(tokenizer, token)
if rule.wellformed:
seq.append(rule)
return 3
def mediarule(expected, seq, token, tokenizer):
rule = cssutils.css.CSSMediaRule()
rule.cssText = (self._tokensupto2(tokenizer, token),
new['namespaces'])
if rule.wellformed:
rule._parentStyleSheet=self
for r in rule:
r._parentStyleSheet=self
seq.append(rule)
return 3
def pagerule(expected, seq, token, tokenizer):
rule = cssutils.css.CSSPageRule(parentStyleSheet=self)
rule.cssText = self._tokensupto2(tokenizer, token)
if rule.wellformed:
seq.append(rule)
return 3
def unknownrule(expected, seq, token, tokenizer):
self._log.warn(
u'CSSStylesheet: Unknown @rule found.',
token, neverraise=True)
rule = cssutils.css.CSSUnknownRule(parentStyleSheet=self)
rule.cssText = self._tokensupto2(tokenizer, token)
if rule.wellformed:
seq.append(rule)
return expected
def ruleset(expected, seq, token, tokenizer):
rule = cssutils.css.CSSStyleRule()
rule.cssText = (self._tokensupto2(tokenizer, token),
new['namespaces'])
if rule.wellformed:
rule._parentStyleSheet=self
seq.append(rule)
return 3
# expected:
# ['CHARSET', 'IMPORT', 'NAMESPACE', ('PAGE', 'MEDIA', ruleset)]
wellformed, expected = self._parse(0, newseq, tokenizer,
{'S': S,
'COMMENT': COMMENT,
'CDO': lambda *ignored: None,
'CDC': lambda *ignored: None,
'CHARSET_SYM': charsetrule,
'FONT_FACE_SYM': fontfacerule,
'IMPORT_SYM': importrule,
'NAMESPACE_SYM': namespacerule,
'PAGE_SYM': pagerule,
'MEDIA_SYM': mediarule,
'ATKEYWORD': unknownrule
},
default=ruleset)
if wellformed:
del self.cssRules[:]
for rule in newseq:
self.insertRule(rule, _clean=False)
self._cleanNamespaces()
cssText = property(_getCssText, _setCssText,
"(cssutils) a textual representation of the stylesheet")
def _resolveImport(self, url):
"""Read (encoding, enctype, decodedContent) from ``url`` for @import
sheets."""
try:
# only available during parse of a complete sheet
selfAsParentEncoding = self.__newEncoding
except AttributeError:
try:
# explicit @charset
selfAsParentEncoding = self.cssRules[0].encoding
except (IndexError, AttributeError):
# default not UTF-8 but None!
selfAsParentEncoding = None
return _readUrl(url, fetcher=self._fetcher,
overrideEncoding=self.__encodingOverride,
parentEncoding=selfAsParentEncoding)
def _setCssTextWithEncodingOverride(self, cssText, encodingOverride=None,
encoding=None):
"""Set cssText but use ``encodingOverride`` to overwrite detected
encoding. This is used by parse and @import during setting of cssText.
If ``encoding`` is given use this but do not save it as encodingOverride"""
if encodingOverride:
# encoding during resolving of @import
self.__encodingOverride = encodingOverride
self.__newEncoding = encoding # save for nested @import
self.cssText = cssText
if encodingOverride:
# set encodingOverride explicit again!
self.encoding = self.__encodingOverride
# remove?
self.__encodingOverride = None
elif encoding:
# may e.g. be httpEncoding
self.encoding = encoding
def _setFetcher(self, fetcher=None):
"""sets @import URL loader, if None the default is used"""
self._fetcher = fetcher
def _setEncoding(self, encoding):
"""
sets encoding of charset rule if present or inserts new charsetrule
with given encoding. If encoding if None removes charsetrule if
present.
"""
try:
rule = self.cssRules[0]
except IndexError:
rule = None
if rule and rule.CHARSET_RULE == rule.type:
if encoding:
rule.encoding = encoding
else:
self.deleteRule(0)
elif encoding:
self.insertRule(cssutils.css.CSSCharsetRule(encoding=encoding), 0)
def _getEncoding(self):
"return encoding if @charset rule if given or default of 'utf-8'"
try:
return self.cssRules[0].encoding
except (IndexError, AttributeError):
return 'utf-8'
encoding = property(_getEncoding, _setEncoding,
"(cssutils) reflects the encoding of an @charset rule or 'UTF-8' (default) if set to ``None``")
namespaces = property(lambda self: self._namespaces,
doc="Namespaces used in this CSSStyleSheet.")
def add(self, rule):
"""
Adds rule to stylesheet at appropriate position.
Same as ``sheet.insertRule(rule, inOrder=True)``.
"""
return self.insertRule(rule, index=None, inOrder=True)
def deleteRule(self, index):
"""
Used to delete a rule from the style sheet.
:param index:
of the rule to remove in the StyleSheet's rule list. For an
index < 0 **no** INDEX_SIZE_ERR is raised but rules for
normal Python lists are used. E.g. ``deleteRule(-1)`` removes
the last rule in cssRules.
:Exceptions:
- `INDEX_SIZE_ERR`: (self)
Raised if the specified index does not correspond to a rule in
the style sheet's rule list.
- `NAMESPACE_ERR`: (self)
Raised if removing this rule would result in an invalid StyleSheet
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if this style sheet is readonly.
"""
self._checkReadonly()
try:
rule = self.cssRules[index]
except IndexError:
raise xml.dom.IndexSizeErr(
u'CSSStyleSheet: %s is not a valid index in the rulelist of length %i' % (
index, self.cssRules.length))
else:
if rule.type == rule.NAMESPACE_RULE:
# check all namespacerules if used
uris = [r.namespaceURI for r in self if r.type == r.NAMESPACE_RULE]
useduris = self._getUsedURIs()
if rule.namespaceURI in useduris and\
uris.count(rule.namespaceURI) == 1:
raise xml.dom.NoModificationAllowedErr(
u'CSSStyleSheet: NamespaceURI defined in this rule is used, cannot remove.')
return
rule._parentStyleSheet = None # detach
del self.cssRules[index] # delete from StyleSheet
def insertRule(self, rule, index=None, inOrder=False, _clean=True):
"""
Used to insert a new rule into the style sheet. The new rule now
becomes part of the cascade.
:Parameters:
rule
a parsable DOMString, in cssutils also a CSSRule or a
CSSRuleList
index
of the rule before the new rule will be inserted.
If the specified index is equal to the length of the
StyleSheet's rule collection, the rule will be added to the end
of the style sheet.
If index is not given or None rule will be appended to rule
list.
inOrder
if True the rule will be put to a proper location while
ignoring index but without raising HIERARCHY_REQUEST_ERR.
The resulting index is returned nevertheless
:returns: the index within the stylesheet's rule collection
:Exceptions:
- `HIERARCHY_REQUEST_ERR`: (self)
Raised if the rule cannot be inserted at the specified index
e.g. if an @import rule is inserted after a standard rule set
or other at-rule.
- `INDEX_SIZE_ERR`: (self)
Raised if the specified index is not a valid insertion point.
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if this style sheet is readonly.
- `SYNTAX_ERR`: (rule)
Raised if the specified rule has a syntax error and is
unparsable.
"""
self._checkReadonly()
# check position
if index is None:
index = len(self.cssRules)
elif index < 0 or index > self.cssRules.length:
raise xml.dom.IndexSizeErr(
u'CSSStyleSheet: Invalid index %s for CSSRuleList with a length of %s.' % (
index, self.cssRules.length))
return
if isinstance(rule, basestring):
# init a temp sheet which has the same properties as self
tempsheet = CSSStyleSheet(href=self.href,
media=self.media,
title=self.title,
parentStyleSheet=self.parentStyleSheet,
ownerRule=self.ownerRule)
tempsheet._ownerNode = self.ownerNode
tempsheet._fetcher = self._fetcher
# prepend encoding if in this sheet to be able to use it in
# @import rules encoding resolution
# do not add if new rule startswith "@charset" (which is exact!)
if not rule.startswith(u'@charset') and (self.cssRules and
self.cssRules[0].type == self.cssRules[0].CHARSET_RULE):
# rule 0 is @charset!
newrulescount, newruleindex = 2, 1
rule = self.cssRules[0].cssText + rule
else:
newrulescount, newruleindex = 1, 0
# parse the new rule(s)
tempsheet.cssText = (rule, self._namespaces)
if len(tempsheet.cssRules) != newrulescount or (not isinstance(
tempsheet.cssRules[newruleindex], cssutils.css.CSSRule)):
self._log.error(u'CSSStyleSheet: Not a CSSRule: %s' % rule)
return
rule = tempsheet.cssRules[newruleindex]
rule._parentStyleSheet = None # done later?
# TODO:
#tempsheet._namespaces = self._namespaces
elif isinstance(rule, cssutils.css.CSSRuleList):
# insert all rules
for i, r in enumerate(rule):
self.insertRule(r, index + i)
return index
if not rule.wellformed:
self._log.error(u'CSSStyleSheet: Invalid rules cannot be added.')
return
# CHECK HIERARCHY
# @charset
if rule.type == rule.CHARSET_RULE:
if inOrder:
index = 0
# always first and only
if (self.cssRules and self.cssRules[0].type == rule.CHARSET_RULE):
self.cssRules[0].encoding = rule.encoding
else:
self.cssRules.insert(0, rule)
elif index != 0 or (self.cssRules and
self.cssRules[0].type == rule.CHARSET_RULE):
self._log.error(
u'CSSStylesheet: @charset only allowed once at the beginning of a stylesheet.',
error=xml.dom.HierarchyRequestErr)
return
else:
self.cssRules.insert(index, rule)
# @unknown or comment
elif rule.type in (rule.UNKNOWN_RULE, rule.COMMENT) and not inOrder:
if index == 0 and self.cssRules and\
self.cssRules[0].type == rule.CHARSET_RULE:
self._log.error(
u'CSSStylesheet: @charset must be the first rule.',
error=xml.dom.HierarchyRequestErr)
return
else:
self.cssRules.insert(index, rule)
# @import
elif rule.type == rule.IMPORT_RULE:
if inOrder:
# automatic order
if rule.type in (r.type for r in self):
# find last of this type
for i, r in enumerate(reversed(self.cssRules)):
if r.type == rule.type:
index = len(self.cssRules) - i
break
else:
# find first point to insert
if self.cssRules and self.cssRules[0].type in (rule.CHARSET_RULE,
rule.COMMENT):
index = 1
else:
index = 0
else:
# after @charset
if index == 0 and self.cssRules and\
self.cssRules[0].type == rule.CHARSET_RULE:
self._log.error(
u'CSSStylesheet: Found @charset at index 0.',
error=xml.dom.HierarchyRequestErr)
return
# before @namespace, @page, @font-face, @media and stylerule
for r in self.cssRules[:index]:
if r.type in (r.NAMESPACE_RULE, r.MEDIA_RULE, r.PAGE_RULE,
r.STYLE_RULE, r.FONT_FACE_RULE):
self._log.error(
u'CSSStylesheet: Cannot insert @import here, found @namespace, @media, @page or CSSStyleRule before index %s.' %
index,
error=xml.dom.HierarchyRequestErr)
return
self.cssRules.insert(index, rule)
# @namespace
elif rule.type == rule.NAMESPACE_RULE:
if inOrder:
if rule.type in (r.type for r in self):
# find last of this type
for i, r in enumerate(reversed(self.cssRules)):
if r.type == rule.type:
index = len(self.cssRules) - i
break
else:
# find first point to insert
for i, r in enumerate(self.cssRules):
if r.type in (r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE,
r.FONT_FACE_RULE, r.UNKNOWN_RULE, r.COMMENT):
index = i # before these
break
else:
# after @charset and @import
for r in self.cssRules[index:]:
if r.type in (r.CHARSET_RULE, r.IMPORT_RULE):
self._log.error(
u'CSSStylesheet: Cannot insert @namespace here, found @charset or @import after index %s.' %
index,
error=xml.dom.HierarchyRequestErr)
return
# before @media and stylerule
for r in self.cssRules[:index]:
if r.type in (r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE,
r.FONT_FACE_RULE):
self._log.error(
u'CSSStylesheet: Cannot insert @namespace here, found @media, @page or CSSStyleRule before index %s.' %
index,
error=xml.dom.HierarchyRequestErr)
return
if not (rule.prefix in self.namespaces and
self.namespaces[rule.prefix] == rule.namespaceURI):
# no doublettes
self.cssRules.insert(index, rule)
if _clean:
self._cleanNamespaces()
# all other where order is not important
else:
if inOrder:
# simply add to end as no specific order
self.cssRules.append(rule)
index = len(self.cssRules) - 1
else:
for r in self.cssRules[index:]:
if r.type in (r.CHARSET_RULE, r.IMPORT_RULE, r.NAMESPACE_RULE):
self._log.error(
u'CSSStylesheet: Cannot insert rule here, found @charset, @import or @namespace before index %s.' %
index,
error=xml.dom.HierarchyRequestErr)
return
self.cssRules.insert(index, rule)
# post settings, TODO: for other rules which contain @rules
rule._parentStyleSheet = self
if rule.MEDIA_RULE == rule.type:
for r in rule:
r._parentStyleSheet = self
# ?
elif rule.IMPORT_RULE == rule.type:
rule.href = rule.href # try to reload stylesheet
return index
ownerRule = property(lambda self: self._ownerRule,
doc="(DOM attribute) NOT IMPLEMENTED YET")
@Deprecated('Use cssutils.replaceUrls(sheet, replacer) instead.')
def replaceUrls(self, replacer):
"""
**EXPERIMENTAL**
Utility method to replace all ``url(urlstring)`` values in
``CSSImportRules`` and ``CSSStyleDeclaration`` objects (properties).
``replacer`` must be a function which is called with a single
argument ``urlstring`` which is the current value of url()
excluding ``url(`` and ``)``. It still may have surrounding
single or double quotes though.
"""
cssutils.replaceUrls(self, replacer)
def setSerializer(self, cssserializer):
"""
Sets the global Serializer used for output of all stylesheet
output.
"""
if isinstance(cssserializer, cssutils.CSSSerializer):
cssutils.ser = cssserializer
else:
raise ValueError(u'Serializer must be an instance of cssutils.CSSSerializer.')
def setSerializerPref(self, pref, value):
"""
Sets Preference of CSSSerializer used for output of this
stylesheet. See cssutils.serialize.Preferences for possible
preferences to be set.
"""
cssutils.ser.prefs.__setattr__(pref, value)
def __repr__(self):
if self.media:
mediaText = self.media.mediaText
else:
mediaText = None
return "cssutils.css.%s(href=%r, media=%r, title=%r)" % (
self.__class__.__name__,
self.href, mediaText, self.title)
def __str__(self):
if self.media:
mediaText = self.media.mediaText
else:
mediaText = None
return "<cssutils.css.%s object encoding=%r href=%r "\
"media=%r title=%r namespaces=%r at 0x%x>" % (
self.__class__.__name__, self.encoding, self.href,
mediaText, self.title, self.namespaces.namespaces,
id(self))

View File

@ -0,0 +1,208 @@
"""CSSUnknownRule implements DOM Level 2 CSS CSSUnknownRule.
"""
__all__ = ['CSSUnknownRule']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssunknownrule.py 1170 2008-03-20 17:42:07Z cthedot $'
import xml.dom
import cssrule
import cssutils
class CSSUnknownRule(cssrule.CSSRule):
"""
represents an at-rule not supported by this user agent.
Properties
==========
inherited from CSSRule
- cssText
- type
cssutils only
-------------
atkeyword
the literal keyword used
seq
All parts of this rule excluding @KEYWORD but including CSSComments
wellformed
if this Rule is wellformed, for Unknown rules if an atkeyword is set
at all
Format
======
unknownrule:
@xxx until ';' or block {...}
"""
type = property(lambda self: cssrule.CSSRule.UNKNOWN_RULE)
def __init__(self, cssText=u'', parentRule=None,
parentStyleSheet=None, readonly=False):
"""
cssText
of type string
"""
super(CSSUnknownRule, self).__init__(parentRule=parentRule,
parentStyleSheet=parentStyleSheet)
self._atkeyword = None
if cssText:
self.cssText = cssText
self._readonly = readonly
def _getCssText(self):
""" returns serialized property cssText """
return cssutils.ser.do_CSSUnknownRule(self)
def _setCssText(self, cssText):
"""
DOMException on setting
- SYNTAX_ERR:
Raised if the specified CSS string value has a syntax error and
is unparsable.
- INVALID_MODIFICATION_ERR:
Raised if the specified CSS string value represents a different
type of rule than the current one.
- HIERARCHY_REQUEST_ERR: (never raised)
Raised if the rule cannot be inserted at this point in the
style sheet.
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
"""
super(CSSUnknownRule, self)._setCssText(cssText)
tokenizer = self._tokenize2(cssText)
attoken = self._nexttoken(tokenizer, None)
if not attoken or self._type(attoken) != self._prods.ATKEYWORD:
self._log.error(u'CSSUnknownRule: No CSSUnknownRule found: %s' %
self._valuestr(cssText),
error=xml.dom.InvalidModificationErr)
else:
# for closures: must be a mutable
new = {'nesting': [], # {} [] or ()
'wellformed': True
}
def CHAR(expected, seq, token, tokenizer=None):
type_, val, line, col = token
if expected != 'EOF':
if val in u'{[(':
new['nesting'].append(val)
elif val in u'}])':
opening = {u'}': u'{', u']': u'[', u')': u'('}[val]
try:
if new['nesting'][-1] == opening:
new['nesting'].pop()
else:
raise IndexError()
except IndexError:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Wrong nesting of {, [ or (.',
token=token)
if val in u'};' and not new['nesting']:
expected = 'EOF'
seq.append(val, type_, line=line, col=col)
return expected
else:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Expected end of rule.',
token=token)
return expected
def EOF(expected, seq, token, tokenizer=None):
"close all blocks and return 'EOF'"
for x in reversed(new['nesting']):
closing = {u'{': u'}', u'[': u']', u'(': u')'}[x]
seq.append(closing, closing)
new['nesting'] = []
return 'EOF'
def INVALID(expected, seq, token, tokenizer=None):
# makes rule invalid
self._log.error(u'CSSUnknownRule: Bad syntax.',
token=token, error=xml.dom.SyntaxErr)
new['wellformed'] = False
return expected
def STRING(expected, seq, token, tokenizer=None):
type_, val, line, col = token
val = self._stringtokenvalue(token)
if expected != 'EOF':
seq.append(val, type_, line=line, col=col)
return expected
else:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Expected end of rule.',
token=token)
return expected
def URI(expected, seq, token, tokenizer=None):
type_, val, line, col = token
val = self._uritokenvalue(token)
if expected != 'EOF':
seq.append(val, type_, line=line, col=col)
return expected
else:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Expected end of rule.',
token=token)
return expected
def default(expected, seq, token, tokenizer=None):
type_, val, line, col = token
if expected != 'EOF':
seq.append(val, type_, line=line, col=col)
return expected
else:
new['wellformed'] = False
self._log.error(u'CSSUnknownRule: Expected end of rule.',
token=token)
return expected
# unknown : ATKEYWORD S* ... ; | }
newseq = self._tempSeq()
wellformed, expected = self._parse(expected=None,
seq=newseq, tokenizer=tokenizer,
productions={'CHAR': CHAR,
'EOF': EOF,
'INVALID': INVALID,
'STRING': STRING,
'URI': URI,
'S': default # overwrite default default!
},
default=default,
new=new)
# wellformed set by parse
wellformed = wellformed and new['wellformed']
# post conditions
if expected != 'EOF':
wellformed = False
self._log.error(
u'CSSUnknownRule: No ending ";" or "}" found: %r' %
self._valuestr(cssText))
elif new['nesting']:
wellformed = False
self._log.error(
u'CSSUnknownRule: Unclosed "{", "[" or "(": %r' %
self._valuestr(cssText))
# set all
if wellformed:
self.atkeyword = self._tokenvalue(attoken)
self._setSeq(newseq)
cssText = property(fget=_getCssText, fset=_setCssText,
doc="(DOM) The parsable textual representation.")
wellformed = property(lambda self: bool(self.atkeyword))
def __repr__(self):
return "cssutils.css.%s(cssText=%r)" % (
self.__class__.__name__, self.cssText)
def __str__(self):
return "<cssutils.css.%s object cssText=%r at 0x%x>" % (
self.__class__.__name__, self.cssText, id(self))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,414 @@
"""Property is a single CSS property in a CSSStyleDeclaration
Internal use only, may be removed in the future!
"""
__all__ = ['Property']
__docformat__ = 'restructuredtext'
__version__ = '$Id: property.py 1305 2008-06-22 18:42:51Z cthedot $'
import xml.dom
import cssutils
import cssproperties
from cssvalue import CSSValue
from cssutils.helper import Deprecated
class Property(cssutils.util.Base):
"""
(cssutils) a CSS property in a StyleDeclaration of a CSSStyleRule
Properties
==========
cssText
a parsable textual representation of this property
name
normalized name of the property, e.g. "color" when name is "c\olor"
(since 0.9.5)
literalname (since 0.9.5)
original name of the property in the source CSS which is not normalized
e.g. "C\\OLor"
cssValue
the relevant CSSValue instance for this property
value
the string value of the property, same as cssValue.cssText
priority
of the property (currently only u"important" or None)
literalpriority
original priority of the property in the source CSS which is not
normalized e.g. "IM\portant"
seqs
combination of a list for seq of name, a CSSValue object, and
a list for seq of priority (empty or [!important] currently)
valid
if this Property is valid
wellformed
if this Property is syntactically ok
DEPRECATED normalname (since 0.9.5)
normalized name of the property, e.g. "color" when name is "c\olor"
Format
======
::
property = name
: IDENT S*
;
expr = value
: term [ operator term ]*
;
term
: unary_operator?
[ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
TIME S* | FREQ S* | function ]
| STRING S* | IDENT S* | URI S* | hexcolor
;
function
: FUNCTION S* expr ')' S*
;
/*
* There is a constraint on the color that it must
* have either 3 or 6 hex-digits (i.e., [0-9a-fA-F])
* after the "#"; e.g., "#000" is OK, but "#abcd" is not.
*/
hexcolor
: HASH S*
;
prio
: IMPORTANT_SYM S*
;
"""
def __init__(self, name=None, value=None, priority=u'', _mediaQuery=False):
"""
inits property
name
a property name string (will be normalized)
value
a property value string
priority
an optional priority string which currently must be u'',
u'!important' or u'important'
_mediaQuery boolean
if True value is optional as used by MediaQuery objects
"""
super(Property, self).__init__()
self.seqs = [[], None, []]
self.valid = False
self.wellformed = False
self._mediaQuery = _mediaQuery
if name:
self.name = name
else:
self._name = u''
self._literalname = u''
self.__normalname = u'' # DEPRECATED
if value:
self.cssValue = value
else:
self.seqs[1] = CSSValue()
if priority:
self.priority = priority
else:
self._priority = u''
self._literalpriority = u''
def _getCssText(self):
"""
returns serialized property cssText
"""
return cssutils.ser.do_Property(self)
def _setCssText(self, cssText):
"""
DOMException on setting
- NO_MODIFICATION_ALLOWED_ERR: (CSSRule)
Raised if the rule is readonly.
- SYNTAX_ERR: (self)
Raised if the specified CSS string value has a syntax error and
is unparsable.
"""
# check and prepare tokenlists for setting
tokenizer = self._tokenize2(cssText)
nametokens = self._tokensupto2(tokenizer, propertynameendonly=True)
if nametokens:
wellformed = True
valuetokens = self._tokensupto2(tokenizer,
propertyvalueendonly=True)
prioritytokens = self._tokensupto2(tokenizer,
propertypriorityendonly=True)
if self._mediaQuery and not valuetokens:
# MediaQuery may consist of name only
self.name = nametokens
self.cssValue = None
self.priority = None
return
# remove colon from nametokens
colontoken = nametokens.pop()
if self._tokenvalue(colontoken) != u':':
wellformed = False
self._log.error(u'Property: No ":" after name found: %r' %
self._valuestr(cssText), colontoken)
elif not nametokens:
wellformed = False
self._log.error(u'Property: No property name found: %r.' %
self._valuestr(cssText), colontoken)
if valuetokens:
if self._tokenvalue(valuetokens[-1]) == u'!':
# priority given, move "!" to prioritytokens
prioritytokens.insert(0, valuetokens.pop(-1))
else:
wellformed = False
self._log.error(u'Property: No property value found: %r.' %
self._valuestr(cssText), colontoken)
if wellformed:
self.wellformed = True
self.name = nametokens
self.cssValue = valuetokens
self.priority = prioritytokens
else:
self._log.error(u'Property: No property name found: %r.' %
self._valuestr(cssText))
cssText = property(fget=_getCssText, fset=_setCssText,
doc="A parsable textual representation.")
def _setName(self, name):
"""
DOMException on setting
- SYNTAX_ERR: (self)
Raised if the specified name has a syntax error and is
unparsable.
"""
# for closures: must be a mutable
new = {'literalname': None,
'wellformed': True}
def _ident(expected, seq, token, tokenizer=None):
# name
if 'name' == expected:
new['literalname'] = self._tokenvalue(token).lower()
seq.append(new['literalname'])
return 'EOF'
else:
new['wellformed'] = False
self._log.error(u'Property: Unexpected ident.', token)
return expected
newseq = []
wellformed, expected = self._parse(expected='name',
seq=newseq,
tokenizer=self._tokenize2(name),
productions={'IDENT': _ident})
wellformed = wellformed and new['wellformed']
# post conditions
# define a token for error logging
if isinstance(name, list):
token = name[0]
else:
token = None
if not new['literalname']:
wellformed = False
self._log.error(u'Property: No name found: %r' %
self._valuestr(name), token=token)
if wellformed:
self.wellformed = True
self._literalname = new['literalname']
self._name = self._normalize(self._literalname)
self.__normalname = self._name # DEPRECATED
self.seqs[0] = newseq
# validate
if self._name not in cssproperties.cssvalues:
self.valid = False
tokenizer=self._tokenize2(name)
self._log.info(u'Property: No CSS2 Property: %r.' %
new['literalname'], token=token, neverraise=True)
else:
self.valid = True
if self.cssValue:
self.cssValue._propertyName = self._name
self.valid = self.cssValue.valid
else:
self.wellformed = False
name = property(lambda self: self._name, _setName,
doc="Name of this property")
literalname = property(lambda self: self._literalname,
doc="Readonly literal (not normalized) name of this property")
def _getCSSValue(self):
return self.seqs[1]
def _setCSSValue(self, cssText):
"""
see css.CSSValue
DOMException on setting?
- SYNTAX_ERR: (self)
Raised if the specified CSS string value has a syntax error
(according to the attached property) or is unparsable.
- TODO: INVALID_MODIFICATION_ERR:
Raised if the specified CSS string value represents a different
type of values than the values allowed by the CSS property.
"""
if self._mediaQuery and not cssText:
self.seqs[1] = CSSValue()
else:
if not self.seqs[1]:
self.seqs[1] = CSSValue()
cssvalue = self.seqs[1]
cssvalue._propertyName = self.name
cssvalue.cssText = cssText
if cssvalue._value and cssvalue.wellformed:
self.seqs[1] = cssvalue
self.valid = self.valid and cssvalue.valid
self.wellformed = self.wellformed and cssvalue.wellformed
cssValue = property(_getCSSValue, _setCSSValue,
doc="(cssutils) CSSValue object of this property")
def _getValue(self):
if self.cssValue:
return self.cssValue._value
else:
return u''
def _setValue(self, value):
self.cssValue.cssText = value
self.valid = self.valid and self.cssValue.valid
self.wellformed = self.wellformed and self.cssValue.wellformed
value = property(_getValue, _setValue,
doc="The textual value of this Properties cssValue.")
def _setPriority(self, priority):
"""
priority
a string, currently either u'', u'!important' or u'important'
Format
======
::
prio
: IMPORTANT_SYM S*
;
"!"{w}"important" {return IMPORTANT_SYM;}
DOMException on setting
- SYNTAX_ERR: (self)
Raised if the specified priority has a syntax error and is
unparsable.
In this case a priority not equal to None, "" or "!{w}important".
As CSSOM defines CSSStyleDeclaration.getPropertyPriority resulting in
u'important' this value is also allowed to set a Properties priority
"""
if self._mediaQuery:
self._priority = u''
self._literalpriority = u''
if priority:
self._log.error(u'Property: No priority in a MediaQuery - ignored.')
return
if isinstance(priority, basestring) and\
u'important' == self._normalize(priority):
priority = u'!%s' % priority
# for closures: must be a mutable
new = {'literalpriority': u'',
'wellformed': True}
def _char(expected, seq, token, tokenizer=None):
# "!"
val = self._tokenvalue(token)
if u'!' == expected == val:
seq.append(val)
return 'important'
else:
new['wellformed'] = False
self._log.error(u'Property: Unexpected char.', token)
return expected
def _ident(expected, seq, token, tokenizer=None):
# "important"
val = self._tokenvalue(token)
normalval = self._tokenvalue(token, normalize=True)
if 'important' == expected == normalval:
new['literalpriority'] = val
seq.append(val)
return 'EOF'
else:
new['wellformed'] = False
self._log.error(u'Property: Unexpected ident.', token)
return expected
newseq = []
wellformed, expected = self._parse(expected='!',
seq=newseq,
tokenizer=self._tokenize2(priority),
productions={'CHAR': _char,
'IDENT': _ident})
wellformed = wellformed and new['wellformed']
# post conditions
if priority and not new['literalpriority']:
wellformed = False
self._log.info(u'Property: Invalid priority: %r.' %
self._valuestr(priority))
if wellformed:
self.wellformed = self.wellformed and wellformed
self._literalpriority = new['literalpriority']
self._priority = self._normalize(self.literalpriority)
self.seqs[2] = newseq
# validate
if self._priority not in (u'', u'important'):
self.valid = False
self._log.info(u'Property: No CSS2 priority value: %r.' %
self._priority, neverraise=True)
priority = property(lambda self: self._priority, _setPriority,
doc="(cssutils) Priority of this property")
literalpriority = property(lambda self: self._literalpriority,
doc="Readonly literal (not normalized) priority of this property")
def __repr__(self):
return "cssutils.css.%s(name=%r, value=%r, priority=%r)" % (
self.__class__.__name__,
self.literalname, self.cssValue.cssText, self.priority)
def __str__(self):
return "<%s.%s object name=%r value=%r priority=%r at 0x%x>" % (
self.__class__.__module__, self.__class__.__name__,
self.name, self.cssValue.cssText, self.priority, id(self))
@Deprecated(u'Use property ``name`` instead (since cssutils 0.9.5).')
def _getNormalname(self):
return self.__normalname
normalname = property(_getNormalname,
doc="DEPRECATED since 0.9.5, use name instead")

View File

@ -0,0 +1,800 @@
"""Selector is a single Selector of a CSSStyleRule SelectorList.
Partly implements
http://www.w3.org/TR/css3-selectors/
TODO
- .contains(selector)
- .isSubselector(selector)
"""
__all__ = ['Selector']
__docformat__ = 'restructuredtext'
__version__ = '$Id: selector.py 1429 2008-08-11 19:01:52Z cthedot $'
import xml.dom
import cssutils
from cssutils.util import _SimpleNamespaces
class Selector(cssutils.util.Base2):
"""
(cssutils) a single selector in a SelectorList of a CSSStyleRule
Properties
==========
element
Effective element target of this selector
parentList: of type SelectorList, readonly
The SelectorList that contains this selector or None if this
Selector is not attached to a SelectorList.
selectorText
textual representation of this Selector
seq
sequence of Selector parts including comments
specificity (READONLY)
tuple of (a, b, c, d) where:
a
presence of style in document, always 0 if not used on a document
b
number of ID selectors
c
number of .class selectors
d
number of Element (type) selectors
wellformed
if this selector is wellformed regarding the Selector spec
Format
======
::
# implemented in SelectorList
selectors_group
: selector [ COMMA S* selector ]*
;
selector
: simple_selector_sequence [ combinator simple_selector_sequence ]*
;
combinator
/* combinators can be surrounded by white space */
: PLUS S* | GREATER S* | TILDE S* | S+
;
simple_selector_sequence
: [ type_selector | universal ]
[ HASH | class | attrib | pseudo | negation ]*
| [ HASH | class | attrib | pseudo | negation ]+
;
type_selector
: [ namespace_prefix ]? element_name
;
namespace_prefix
: [ IDENT | '*' ]? '|'
;
element_name
: IDENT
;
universal
: [ namespace_prefix ]? '*'
;
class
: '.' IDENT
;
attrib
: '[' S* [ namespace_prefix ]? IDENT S*
[ [ PREFIXMATCH |
SUFFIXMATCH |
SUBSTRINGMATCH |
'=' |
INCLUDES |
DASHMATCH ] S* [ IDENT | STRING ] S*
]? ']'
;
pseudo
/* '::' starts a pseudo-element, ':' a pseudo-class */
/* Exceptions: :first-line, :first-letter, :before and :after. */
/* Note that pseudo-elements are restricted to one per selector and */
/* occur only in the last simple_selector_sequence. */
: ':' ':'? [ IDENT | functional_pseudo ]
;
functional_pseudo
: FUNCTION S* expression ')'
;
expression
/* In CSS3, the expressions are identifiers, strings, */
/* or of the form "an+b" */
: [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
;
negation
: NOT S* negation_arg S* ')'
;
negation_arg
: type_selector | universal | HASH | class | attrib | pseudo
;
"""
def __init__(self, selectorText=None, parentList=None,
readonly=False):
"""
:Parameters:
selectorText
initial value of this selector
parentList
a SelectorList
readonly
default to False
"""
super(Selector, self).__init__()
self.__namespaces = _SimpleNamespaces(log=self._log)
self._element = None
self._parent = parentList
self._specificity = (0, 0, 0, 0)
if selectorText:
self.selectorText = selectorText
self._readonly = readonly
def __getNamespaces(self):
"uses own namespaces if not attached to a sheet, else the sheet's ones"
try:
return self._parent.parentRule.parentStyleSheet.namespaces
except AttributeError:
return self.__namespaces
_namespaces = property(__getNamespaces, doc="""if this Selector is attached
to a CSSStyleSheet the namespaces of that sheet are mirrored here.
While the Selector (or parent SelectorList or parentRule(s) of that are
not attached a own dict of {prefix: namespaceURI} is used.""")
element = property(lambda self: self._element,
doc=u"Effective element target of this selector.")
parentList = property(lambda self: self._parent,
doc="(DOM) The SelectorList that contains this Selector or\
None if this Selector is not attached to a SelectorList.")
def _getSelectorText(self):
"""
returns serialized format
"""
return cssutils.ser.do_css_Selector(self)
def _setSelectorText(self, selectorText):
"""
:param selectorText:
parsable string or a tuple of (selectorText, dict-of-namespaces).
Given namespaces are ignored if this object is attached to a
CSSStyleSheet!
:Exceptions:
- `NAMESPACE_ERR`: (self)
Raised if the specified selector uses an unknown namespace
prefix.
- `SYNTAX_ERR`: (self)
Raised if the specified CSS string value has a syntax error
and is unparsable.
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if this rule is readonly.
"""
self._checkReadonly()
# might be (selectorText, namespaces)
selectorText, namespaces = self._splitNamespacesOff(selectorText)
try:
# uses parent stylesheets namespaces if available, otherwise given ones
namespaces = self.parentList.parentRule.parentStyleSheet.namespaces
except AttributeError:
pass
tokenizer = self._tokenize2(selectorText)
if not tokenizer:
self._log.error(u'Selector: No selectorText given.')
else:
# prepare tokenlist:
# "*" -> type "universal"
# "*"|IDENT + "|" -> combined to "namespace_prefix"
# "|" -> type "namespace_prefix"
# "." + IDENT -> combined to "class"
# ":" + IDENT, ":" + FUNCTION -> pseudo-class
# FUNCTION "not(" -> negation
# "::" + IDENT, "::" + FUNCTION -> pseudo-element
tokens = []
for t in tokenizer:
typ, val, lin, col = t
if val == u':' and tokens and\
self._tokenvalue(tokens[-1]) == ':':
# combine ":" and ":"
tokens[-1] = (typ, u'::', lin, col)
elif typ == 'IDENT' and tokens\
and self._tokenvalue(tokens[-1]) == u'.':
# class: combine to .IDENT
tokens[-1] = ('class', u'.'+val, lin, col)
elif typ == 'IDENT' and tokens and \
self._tokenvalue(tokens[-1]).startswith(u':') and\
not self._tokenvalue(tokens[-1]).endswith(u'('):
# pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b"
if self._tokenvalue(tokens[-1]).startswith(u'::'):
t = 'pseudo-element'
else:
t = 'pseudo-class'
tokens[-1] = (t, self._tokenvalue(tokens[-1])+val, lin, col)
elif typ == 'FUNCTION' and val == u'not(' and tokens and \
u':' == self._tokenvalue(tokens[-1]):
tokens[-1] = ('negation', u':' + val, lin, tokens[-1][3])
elif typ == 'FUNCTION' and tokens\
and self._tokenvalue(tokens[-1]).startswith(u':'):
# pseudo-X: combine to :FUNCTION( or ::FUNCTION(
if self._tokenvalue(tokens[-1]).startswith(u'::'):
t = 'pseudo-element'
else:
t = 'pseudo-class'
tokens[-1] = (t, self._tokenvalue(tokens[-1])+val, lin, col)
elif val == u'*' and tokens and\
self._type(tokens[-1]) == 'namespace_prefix' and\
self._tokenvalue(tokens[-1]).endswith(u'|'):
# combine prefix|*
tokens[-1] = ('universal', self._tokenvalue(tokens[-1])+val,
lin, col)
elif val == u'*':
# universal: "*"
tokens.append(('universal', val, lin, col))
elif val == u'|' and tokens and\
self._type(tokens[-1]) in (self._prods.IDENT, 'universal') and\
self._tokenvalue(tokens[-1]).find(u'|') == -1:
# namespace_prefix: "IDENT|" or "*|"
tokens[-1] = ('namespace_prefix',
self._tokenvalue(tokens[-1])+u'|', lin, col)
elif val == u'|':
# namespace_prefix: "|"
tokens.append(('namespace_prefix', val, lin, col))
else:
tokens.append(t)
# TODO: back to generator but not elegant at all!
tokenizer = (t for t in tokens)
# for closures: must be a mutable
new = {'context': [''], # stack of: 'attrib', 'negation', 'pseudo'
'element': None,
'_PREFIX': None,
'specificity': [0, 0, 0, 0], # mutable, finally a tuple!
'wellformed': True
}
# used for equality checks and setting of a space combinator
S = u' '
def append(seq, val, typ=None, token=None):
"""
appends to seq
namespace_prefix, IDENT will be combined to a tuple
(prefix, name) where prefix might be None, the empty string
or a prefix.
Saved are also:
- specificity definition: style, id, class/att, type
- element: the element this Selector is for
"""
context = new['context'][-1]
if token:
line, col = token[2], token[3]
else:
line, col = None, None
if typ == '_PREFIX':
# SPECIAL TYPE: save prefix for combination with next
new['_PREFIX'] = val[:-1]
# handle next time
return
if new['_PREFIX'] is not None:
# as saved from before and reset to None
prefix, new['_PREFIX'] = new['_PREFIX'], None
elif typ == 'universal' and '|' in val:
# val == *|* or prefix|*
prefix, val = val.split('|')
else:
prefix = None
# namespace
if (typ.endswith('-selector') or typ == 'universal') and not (
'attribute-selector' == typ and not prefix):
# att **IS NOT** in default ns
if prefix == u'*':
# *|name: in ANY_NS
namespaceURI = cssutils._ANYNS
elif prefix is None:
# e or *: default namespace with prefix u'' or local-name()
namespaceURI = namespaces.get(u'', None)
elif prefix == u'':
# |name or |*: in no (or the empty) namespace
namespaceURI = u''
else:
# explicit namespace prefix
# does not raise KeyError, see _SimpleNamespaces
namespaceURI = namespaces[prefix]
if namespaceURI is None:
new['wellformed'] = False
self._log.error(
u'Selector: No namespaceURI found for prefix %r' %
prefix, token=token, error=xml.dom.NamespaceErr)
return
# val is now (namespaceprefix, name) tuple
val = (namespaceURI, val)
# specificity
if not context or context == 'negation':
if 'id' == typ:
new['specificity'][1] += 1
elif 'class' == typ or '[' == val:
new['specificity'][2] += 1
elif typ in ('type-selector', 'negation-type-selector',
'pseudo-element'):
new['specificity'][3] += 1
if not context and typ in ('type-selector', 'universal'):
# define element
new['element'] = val
seq.append(val, typ, line=line, col=col)
# expected constants
simple_selector_sequence = 'type_selector universal HASH class attrib pseudo negation '
simple_selector_sequence2 = 'HASH class attrib pseudo negation '
element_name = 'element_name'
negation_arg = 'type_selector universal HASH class attrib pseudo'
negationend = ')'
attname = 'prefix attribute'
attname2 = 'attribute'
attcombinator = 'combinator ]' # optional
attvalue = 'value' # optional
attend = ']'
expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT'
expression = expressionstart + ' )'
combinator = ' combinator'
def _COMMENT(expected, seq, token, tokenizer=None):
"special implementation for comment token"
append(seq, cssutils.css.CSSComment([token]), 'COMMENT',
token=token)
return expected
def _S(expected, seq, token, tokenizer=None):
# S
context = new['context'][-1]
if context.startswith('pseudo-'):
if seq and seq[-1].value not in u'+-':
# e.g. x:func(a + b)
append(seq, S, 'S', token=token)
return expected
elif context != 'attrib' and 'combinator' in expected:
append(seq, S, 'descendant', token=token)
return simple_selector_sequence + combinator
else:
return expected
def _universal(expected, seq, token, tokenizer=None):
# *|* or prefix|*
context = new['context'][-1]
val = self._tokenvalue(token)
if 'universal' in expected:
append(seq, val, 'universal', token=token)
if 'negation' == context:
return negationend
else:
return simple_selector_sequence2 + combinator
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected universal.', token=token)
return expected
def _namespace_prefix(expected, seq, token, tokenizer=None):
# prefix| => element_name
# or prefix| => attribute_name if attrib
context = new['context'][-1]
val = self._tokenvalue(token)
if 'attrib' == context and 'prefix' in expected:
# [PREFIX|att]
append(seq, val, '_PREFIX', token=token)
return attname2
elif 'type_selector' in expected:
# PREFIX|*
append(seq, val, '_PREFIX', token=token)
return element_name
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected namespace prefix.', token=token)
return expected
def _pseudo(expected, seq, token, tokenizer=None):
# pseudo-class or pseudo-element :a ::a :a( ::a(
"""
/* '::' starts a pseudo-element, ':' a pseudo-class */
/* Exceptions: :first-line, :first-letter, :before and :after. */
/* Note that pseudo-elements are restricted to one per selector and */
/* occur only in the last simple_selector_sequence. */
"""
context = new['context'][-1]
val, typ = self._tokenvalue(token, normalize=True), self._type(token)
if 'pseudo' in expected:
if val in (':first-line', ':first-letter', ':before', ':after'):
# always pseudo-element ???
typ = 'pseudo-element'
append(seq, val, typ, token=token)
if val.endswith(u'('):
# function
new['context'].append(typ) # "pseudo-" "class" or "element"
return expressionstart
elif 'negation' == context:
return negationend
elif 'pseudo-element' == typ:
# only one per element, check at ) also!
return combinator
else:
return simple_selector_sequence2 + combinator
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected start of pseudo.', token=token)
return expected
def _expression(expected, seq, token, tokenizer=None):
# [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
context = new['context'][-1]
val, typ = self._tokenvalue(token), self._type(token)
if context.startswith('pseudo-'):
append(seq, val, typ, token=token)
return expression
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected %s.' % typ, token=token)
return expected
def _attcombinator(expected, seq, token, tokenizer=None):
# context: attrib
# PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES |
# DASHMATCH
context = new['context'][-1]
val, typ = self._tokenvalue(token), self._type(token)
if 'attrib' == context and 'combinator' in expected:
# combinator in attrib
append(seq, val, typ.lower(), token=token)
return attvalue
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected %s.' % typ, token=token)
return expected
def _string(expected, seq, token, tokenizer=None):
# identifier
context = new['context'][-1]
typ, val = self._type(token), self._stringtokenvalue(token)
# context: attrib
if 'attrib' == context and 'value' in expected:
# attrib: [...=VALUE]
append(seq, val, typ, token=token)
return attend
# context: pseudo
elif context.startswith('pseudo-'):
# :func(...)
append(seq, val, typ, token=token)
return expression
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected STRING.', token=token)
return expected
def _ident(expected, seq, token, tokenizer=None):
# identifier
context = new['context'][-1]
val, typ = self._tokenvalue(token), self._type(token)
# context: attrib
if 'attrib' == context and 'attribute' in expected:
# attrib: [...|ATT...]
append(seq, val, 'attribute-selector', token=token)
return attcombinator
elif 'attrib' == context and 'value' in expected:
# attrib: [...=VALUE]
append(seq, val, 'attribute-value', token=token)
return attend
# context: negation
elif 'negation' == context:
# negation: (prefix|IDENT)
append(seq, val, 'negation-type-selector', token=token)
return negationend
# context: pseudo
elif context.startswith('pseudo-'):
# :func(...)
append(seq, val, typ, token=token)
return expression
elif 'type_selector' in expected or element_name == expected:
# element name after ns or complete type_selector
append(seq, val, 'type-selector', token=token)
return simple_selector_sequence2 + combinator
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected IDENT.',
token=token)
return expected
def _class(expected, seq, token, tokenizer=None):
# .IDENT
context = new['context'][-1]
val = self._tokenvalue(token)
if 'class' in expected:
append(seq, val, 'class', token=token)
if 'negation' == context:
return negationend
else:
return simple_selector_sequence2 + combinator
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected class.', token=token)
return expected
def _hash(expected, seq, token, tokenizer=None):
# #IDENT
context = new['context'][-1]
val = self._tokenvalue(token)
if 'HASH' in expected:
append(seq, val, 'id', token=token)
if 'negation' == context:
return negationend
else:
return simple_selector_sequence2 + combinator
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected HASH.', token=token)
return expected
def _char(expected, seq, token, tokenizer=None):
# + > ~ ) [ ] + -
context = new['context'][-1]
val = self._tokenvalue(token)
# context: attrib
if u']' == val and 'attrib' == context and ']' in expected:
# end of attrib
append(seq, val, 'attribute-end', token=token)
context = new['context'].pop() # attrib is done
context = new['context'][-1]
if 'negation' == context:
return negationend
else:
return simple_selector_sequence2 + combinator
elif u'=' == val and 'attrib' == context and 'combinator' in expected:
# combinator in attrib
append(seq, val, 'equals', token=token)
return attvalue
# context: negation
elif u')' == val and 'negation' == context and u')' in expected:
# not(negation_arg)"
append(seq, val, 'negation-end', token=token)
new['context'].pop() # negation is done
context = new['context'][-1]
return simple_selector_sequence + combinator
# context: pseudo (at least one expression)
elif val in u'+-' and context.startswith('pseudo-'):
# :func(+ -)"
_names = {'+': 'plus', '-': 'minus'}
if val == u'+' and seq and seq[-1].value == S:
seq.replace(-1, val, _names[val])
else:
append(seq, val, _names[val],
token=token)
return expression
elif u')' == val and context.startswith('pseudo-') and\
expression == expected:
# :func(expression)"
append(seq, val, 'function-end', token=token)
new['context'].pop() # pseudo is done
if 'pseudo-element' == context:
return combinator
else:
return simple_selector_sequence + combinator
# context: ROOT
elif u'[' == val and 'attrib' in expected:
# start of [attrib]
append(seq, val, 'attribute-start', token=token)
new['context'].append('attrib')
return attname
elif val in u'+>~' and 'combinator' in expected:
# no other combinator except S may be following
_names = {
'>': 'child',
'+': 'adjacent-sibling',
'~': 'following-sibling'}
if seq and seq[-1].value == S:
seq.replace(-1, val, _names[val])
else:
append(seq, val, _names[val], token=token)
return simple_selector_sequence
elif u',' == val:
# not a selectorlist
new['wellformed'] = False
self._log.error(
u'Selector: Single selector only.',
error=xml.dom.InvalidModificationErr,
token=token)
return expected
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected CHAR.', token=token)
return expected
def _negation(expected, seq, token, tokenizer=None):
# not(
context = new['context'][-1]
val = self._tokenvalue(token, normalize=True)
if 'negation' in expected:
new['context'].append('negation')
append(seq, val, 'negation-start', token=token)
return negation_arg
else:
new['wellformed'] = False
self._log.error(
u'Selector: Unexpected negation.', token=token)
return expected
# expected: only|not or mediatype, mediatype, feature, and
newseq = self._tempSeq()
wellformed, expected = self._parse(expected=simple_selector_sequence,
seq=newseq, tokenizer=tokenizer,
productions={'CHAR': _char,
'class': _class,
'HASH': _hash,
'STRING': _string,
'IDENT': _ident,
'namespace_prefix': _namespace_prefix,
'negation': _negation,
'pseudo-class': _pseudo,
'pseudo-element': _pseudo,
'universal': _universal,
# pseudo
'NUMBER': _expression,
'DIMENSION': _expression,
# attribute
'PREFIXMATCH': _attcombinator,
'SUFFIXMATCH': _attcombinator,
'SUBSTRINGMATCH': _attcombinator,
'DASHMATCH': _attcombinator,
'INCLUDES': _attcombinator,
'S': _S,
'COMMENT': _COMMENT})
wellformed = wellformed and new['wellformed']
# post condition
if len(new['context']) > 1 or not newseq:
wellformed = False
self._log.error(u'Selector: Invalid or incomplete selector: %s' %
self._valuestr(selectorText))
if expected == 'element_name':
wellformed = False
self._log.error(u'Selector: No element name found: %s' %
self._valuestr(selectorText))
if expected == simple_selector_sequence and newseq:
wellformed = False
self._log.error(u'Selector: Cannot end with combinator: %s' %
self._valuestr(selectorText))
if newseq and hasattr(newseq[-1].value, 'strip') and \
newseq[-1].value.strip() == u'':
del newseq[-1]
# set
if wellformed:
self.__namespaces = namespaces
self._element = new['element']
self._specificity = tuple(new['specificity'])
self._setSeq(newseq)
# filter that only used ones are kept
self.__namespaces = self._getUsedNamespaces()
selectorText = property(_getSelectorText, _setSelectorText,
doc="(DOM) The parsable textual representation of the selector.")
specificity = property(lambda self: self._specificity,
doc="Specificity of this selector (READONLY).")
wellformed = property(lambda self: bool(len(self.seq)))
def __repr__(self):
if self.__getNamespaces():
st = (self.selectorText, self._getUsedNamespaces())
else:
st = self.selectorText
return u"cssutils.css.%s(selectorText=%r)" % (
self.__class__.__name__, st)
def __str__(self):
return u"<cssutils.css.%s object selectorText=%r specificity=%r _namespaces=%r at 0x%x>" % (
self.__class__.__name__, self.selectorText, self.specificity,
self._getUsedNamespaces(), id(self))
def _getUsedUris(self):
"returns list of actually used URIs in this Selector"
uris = set()
for item in self.seq:
type_, val = item.type, item.value
if type_.endswith(u'-selector') or type_ == u'universal' and \
type(val) == tuple and val[0] not in (None, u'*'):
uris.add(val[0])
return uris
def _getUsedNamespaces(self):
"returns actually used namespaces only"
useduris = self._getUsedUris()
namespaces = _SimpleNamespaces(log=self._log)
for p, uri in self._namespaces.items():
if uri in useduris:
namespaces[p] = uri
return namespaces

View File

@ -0,0 +1,249 @@
"""SelectorList is a list of CSS Selector objects.
TODO
- remove duplicate Selectors. -> CSSOM canonicalize
- ??? CSS2 gives a special meaning to the comma (,) in selectors.
However, since it is not known if the comma may acquire other
meanings in future versions of CSS, the whole statement should be
ignored if there is an error anywhere in the selector, even though
the rest of the selector may look reasonable in CSS2.
Illegal example(s):
For example, since the "&" is not a valid token in a CSS2 selector,
a CSS2 user agent must ignore the whole second line, and not set
the color of H3 to red:
"""
__all__ = ['SelectorList']
__docformat__ = 'restructuredtext'
__version__ = '$Id: selectorlist.py 1174 2008-03-20 17:43:07Z cthedot $'
import xml.dom
import cssutils
from selector import Selector
class SelectorList(cssutils.util.Base, cssutils.util.ListSeq):
"""
(cssutils) a list of Selectors of a CSSStyleRule
Properties
==========
length: of type unsigned long, readonly
The number of Selector elements in the list.
parentRule: of type CSSRule, readonly
The CSS rule that contains this selector list or None if this
list is not attached to a CSSRule.
selectorText: of type DOMString
The textual representation of the selector for the rule set. The
implementation may have stripped out insignificant whitespace while
parsing the selector.
seq: (internal use!)
A list of Selector objects
wellformed
if this selectorlist is wellformed regarding the Selector spec
"""
def __init__(self, selectorText=None, parentRule=None,
readonly=False):
"""
initializes SelectorList with optional selectorText
:Parameters:
selectorText
parsable list of Selectors
parentRule
the parent CSSRule if available
"""
super(SelectorList, self).__init__()
self._parentRule = parentRule
if selectorText:
self.selectorText = selectorText
self._readonly = readonly
def __prepareset(self, newSelector, namespaces=None):
"used by appendSelector and __setitem__"
if not namespaces:
namespaces = {}
self._checkReadonly()
if not isinstance(newSelector, Selector):
newSelector = Selector((newSelector, namespaces),
parentList=self)
if newSelector.wellformed:
newSelector._parent = self # maybe set twice but must be!
return newSelector
def __setitem__(self, index, newSelector):
"""
overwrites ListSeq.__setitem__
Any duplicate Selectors are **not** removed.
"""
newSelector = self.__prepareset(newSelector)
if newSelector:
self.seq[index] = newSelector
def append(self, newSelector):
"same as appendSelector(newSelector)"
self.appendSelector(newSelector)
length = property(lambda self: len(self),
doc="The number of Selector elements in the list.")
def __getNamespaces(self):
"uses children namespaces if not attached to a sheet, else the sheet's ones"
try:
return self.parentRule.parentStyleSheet.namespaces
except AttributeError:
namespaces = {}
for selector in self.seq:
namespaces.update(selector._namespaces)
return namespaces
_namespaces = property(__getNamespaces, doc="""if this SelectorList is
attached to a CSSStyleSheet the namespaces of that sheet are mirrored
here. While the SelectorList (or parentRule(s) are
not attached the namespaces of all children Selectors are used.""")
parentRule = property(lambda self: self._parentRule,
doc="(DOM) The CSS rule that contains this SelectorList or\
None if this SelectorList is not attached to a CSSRule.")
def _getSelectorText(self):
"returns serialized format"
return cssutils.ser.do_css_SelectorList(self)
def _setSelectorText(self, selectorText):
"""
:param selectorText:
comma-separated list of selectors or a tuple of
(selectorText, dict-of-namespaces)
:Exceptions:
- `NAMESPACE_ERR`: (Selector)
Raised if the specified selector uses an unknown namespace
prefix.
- `SYNTAX_ERR`: (self)
Raised if the specified CSS string value has a syntax error
and is unparsable.
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if this rule is readonly.
"""
self._checkReadonly()
# might be (selectorText, namespaces)
selectorText, namespaces = self._splitNamespacesOff(selectorText)
try:
# use parent's only if available
namespaces = self.parentRule.parentStyleSheet.namespaces
except AttributeError:
pass
wellformed = True
tokenizer = self._tokenize2(selectorText)
newseq = []
expected = True
while True:
# find all upto and including next ",", EOF or nothing
selectortokens = self._tokensupto2(tokenizer, listseponly=True)
if selectortokens:
if self._tokenvalue(selectortokens[-1]) == ',':
expected = selectortokens.pop()
else:
expected = None
selector = Selector((selectortokens, namespaces),
parentList=self)
if selector.wellformed:
newseq.append(selector)
else:
wellformed = False
self._log.error(u'SelectorList: Invalid Selector: %s' %
self._valuestr(selectortokens))
else:
break
# post condition
if u',' == expected:
wellformed = False
self._log.error(u'SelectorList: Cannot end with ",": %r' %
self._valuestr(selectorText))
elif expected:
wellformed = False
self._log.error(u'SelectorList: Unknown Syntax: %r' %
self._valuestr(selectorText))
if wellformed:
self.seq = newseq
# for selector in newseq:
# self.appendSelector(selector)
selectorText = property(_getSelectorText, _setSelectorText,
doc="""(cssutils) The textual representation of the selector for
a rule set.""")
wellformed = property(lambda self: bool(len(self.seq)))
def appendSelector(self, newSelector):
"""
Append newSelector (a string will be converted to a new
Selector).
:param newSelector:
comma-separated list of selectors or a tuple of
(selectorText, dict-of-namespaces)
:returns: New Selector or None if newSelector is not wellformed.
:Exceptions:
- `NAMESPACE_ERR`: (self)
Raised if the specified selector uses an unknown namespace
prefix.
- `SYNTAX_ERR`: (self)
Raised if the specified CSS string value has a syntax error
and is unparsable.
- `NO_MODIFICATION_ALLOWED_ERR`: (self)
Raised if this rule is readonly.
"""
self._checkReadonly()
# might be (selectorText, namespaces)
newSelector, namespaces = self._splitNamespacesOff(newSelector)
try:
# use parent's only if available
namespaces = self.parentRule.parentStyleSheet.namespaces
except AttributeError:
# use already present namespaces plus new given ones
_namespaces = self._namespaces
_namespaces.update(namespaces)
namespaces = _namespaces
newSelector = self.__prepareset(newSelector, namespaces)
if newSelector:
seq = self.seq[:]
del self.seq[:]
for s in seq:
if s.selectorText != newSelector.selectorText:
self.seq.append(s)
self.seq.append(newSelector)
return newSelector
def __repr__(self):
if self._namespaces:
st = (self.selectorText, self._namespaces)
else:
st = self.selectorText
return "cssutils.css.%s(selectorText=%r)" % (
self.__class__.__name__, st)
def __str__(self):
return "<cssutils.css.%s object selectorText=%r _namespaces=%r at 0x%x>" % (
self.__class__.__name__, self.selectorText, self._namespaces,
id(self))
def _getUsedUris(self):
"used by CSSStyleSheet to check if @namespace rules are needed"
uris = set()
for s in self:
uris.update(s._getUsedUris())
return uris

View File

@ -0,0 +1,131 @@
"""productions for CSS 2.1
CSS2_1_MACROS and CSS2_1_PRODUCTIONS are from both
http://www.w3.org/TR/CSS21/grammar.html and
http://www.w3.org/TR/css3-syntax/#grammar0
"""
__all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS']
__docformat__ = 'restructuredtext'
__version__ = '$Id: css2productions.py 1394 2008-07-27 13:29:22Z cthedot $'
# option case-insensitive
MACROS = {
'h': r'[0-9a-f]',
#'nonascii': r'[\200-\377]',
'nonascii': r'[^\0-\177]', # CSS3
'unicode': r'\\{h}{1,6}(\r\n|[ \t\r\n\f])?',
'escape': r'{unicode}|\\[^\r\n\f0-9a-f]',
'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}',
'nmchar': r'[_a-zA-Z0-9-]|{nonascii}|{escape}',
'string1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*\"',
'string2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*\'",
'invalid1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*',
'invalid2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*",
'comment': r'\/\*[^*]*\*+([^/*][^*]*\*+)*\/',
# CSS list 080725 19:43
# \/\*([^*\\]|{escape})*\*+(([^/*\\]|{escape})[^*]*\*+)*\/
'ident': r'[-]?{nmstart}{nmchar}*',
'name': r'{nmchar}+',
# CHANGED TO SPEC: added "-?"
'num': r'-?[0-9]*\.[0-9]+|[0-9]+',
'string': r'{string1}|{string2}',
'invalid': r'{invalid1}|{invalid2}',
'url': r'([!#$%&*-~]|{nonascii}|{escape})*',
's': r'[ \t\r\n\f]+',
'w': r'{s}?',
'nl': r'\n|\r\n|\r|\f',
'range': r'\?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))',
'A': r'a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?',
'C': r'c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?',
'D': r'd|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?',
'E': r'e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?',
'F': r'f|\\0{0,4}(46|66)(\r\n|[ \t\r\n\f])?',
'G': r'g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g',
'H': r'h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h',
'I': r'i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i',
'K': r'k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k',
'M': r'm|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m',
'N': r'n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n',
'O': r'o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o',
'P': r'p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p',
'R': r'r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r',
'S': r's|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s',
'T': r't|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t',
'X': r'x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x',
'Z': r'z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z',
}
PRODUCTIONS = [
('URI', r'url\({w}{string}{w}\)'), #"url("{w}{string}{w}")" {return URI;}
('URI', r'url\({w}{url}{w}\)'), #"url("{w}{url}{w}")" {return URI;}
('FUNCTION', r'{ident}\('), #{ident}"(" {return FUNCTION;}
('IMPORT_SYM', r'@{I}{M}{P}{O}{R}{T}'), #"@import" {return IMPORT_SYM;}
('PAGE_SYM', r'@{P}{A}{G}{E}'), #"@page" {return PAGE_SYM;}
('MEDIA_SYM', r'@{M}{E}{D}{I}{A}'), #"@media" {return MEDIA_SYM;}
('FONT_FACE_SYM', r'@{F}{O}{N}{T}\-{F}{A}{C}{E}'), #"@font-face" {return FONT_FACE_SYM;}
# CHANGED TO SPEC: only @charset
('CHARSET_SYM', r'@charset '), #"@charset " {return CHARSET_SYM;}
('NAMESPACE_SYM', r'@{N}{A}{M}{E}{S}{P}{A}{C}{E}'), #"@namespace" {return NAMESPACE_SYM;}
# CHANGED TO SPEC: ATKEYWORD
('ATKEYWORD', r'\@{ident}'),
('IDENT', r'{ident}'), #{ident} {return IDENT;}
('STRING', r'{string}'), #{string} {return STRING;}
('INVALID', r'{invalid}'), # {return INVALID; /* unclosed string */}
('HASH', r'\#{name}'), #"#"{name} {return HASH;}
('PERCENTAGE', r'{num}%'), #{num}% {return PERCENTAGE;}
('LENGTH', r'{num}{E}{M}'), #{num}em {return EMS;}
('LENGTH', r'{num}{E}{X}'), #{num}ex {return EXS;}
('LENGTH', r'{num}{P}{X}'), #{num}px {return LENGTH;}
('LENGTH', r'{num}{C}{M}'), #{num}cm {return LENGTH;}
('LENGTH', r'{num}{M}{M}'), #{num}mm {return LENGTH;}
('LENGTH', r'{num}{I}{N}'), #{num}in {return LENGTH;}
('LENGTH', r'{num}{P}{T}'), #{num}pt {return LENGTH;}
('LENGTH', r'{num}{P}{C}'), #{num}pc {return LENGTH;}
('ANGLE', r'{num}{D}{E}{G}'), #{num}deg {return ANGLE;}
('ANGLE', r'{num}{R}{A}{D}'), #{num}rad {return ANGLE;}
('ANGLE', r'{num}{G}{R}{A}{D}'), #{num}grad {return ANGLE;}
('TIME', r'{num}{M}{S}'), #{num}ms {return TIME;}
('TIME', r'{num}{S}'), #{num}s {return TIME;}
('FREQ', r'{num}{H}{Z}'), #{num}Hz {return FREQ;}
('FREQ', r'{num}{K}{H}{Z}'), #{num}kHz {return FREQ;}
('DIMEN', r'{num}{ident}'), #{num}{ident} {return DIMEN;}
('NUMBER', r'{num}'), #{num} {return NUMBER;}
#('UNICODERANGE', r'U\+{range}'), #U\+{range} {return UNICODERANGE;}
#('UNICODERANGE', r'U\+{h}{1,6}-{h}{1,6}'), #U\+{h}{1,6}-{h}{1,6} {return UNICODERANGE;}
# --- CSS3 ---
('UNICODE-RANGE', r'[0-9A-F?]{1,6}(\-[0-9A-F]{1,6})?'),
('CDO', r'\<\!\-\-'), #"<!--" {return CDO;}
('CDC', r'\-\-\>'), #"-->" {return CDC;}
('S', r'{s}'),# {return S;}
# \/\*[^*]*\*+([^/*][^*]*\*+)*\/ /* ignore comments */
# {s}+\/\*[^*]*\*+([^/*][^*]*\*+)*\/ {unput(' '); /*replace by space*/}
('INCLUDES', r'\~\='), #"~=" {return INCLUDES;}
('DASHMATCH', r'\|\='), #"|=" {return DASHMATCH;}
('LBRACE', r'\{'), #{w}"{" {return LBRACE;}
('PLUS', r'\+'), #{w}"+" {return PLUS;}
('GREATER', r'\>'), #{w}">" {return GREATER;}
('COMMA', r'\,'), #{w}"," {return COMMA;}
('IMPORTANT_SYM', r'\!({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T}'), #"!{w}important" {return IMPORTANT_SYM;}
('COMMENT', '\/\*[^*]*\*+([^/][^*]*\*+)*\/'), # /* ignore comments */
('CLASS', r'\.'), #. {return *yytext;}
# --- CSS3! ---
('CHAR', r'[^"\']'),
]
class CSSProductions(object):
pass
for i, t in enumerate(PRODUCTIONS):
setattr(CSSProductions, t[0].replace('-', '_'), t[0])

View File

@ -0,0 +1,62 @@
"""productions for CSS 3
CSS3_MACROS and CSS3_PRODUCTIONS are from http://www.w3.org/TR/css3-syntax
"""
__all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS']
__docformat__ = 'restructuredtext'
__version__ = '$Id: css3productions.py 1116 2008-03-05 13:52:23Z cthedot $'
# a complete list of css3 macros
MACROS = {
'ident': r'[-]?{nmstart}{nmchar}*',
'name': r'{nmchar}+',
'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}',
'nonascii': r'[^\0-\177]',
'unicode': r'\\[0-9a-f]{1,6}{wc}?',
'escape': r'{unicode}|\\[ -~\200-\777]',
# 'escape': r'{unicode}|\\[ -~\200-\4177777]',
'nmchar': r'[-_a-zA-Z0-9]|{nonascii}|{escape}',
# CHANGED TO SPEC: added "-?"
'num': r'-?[0-9]*\.[0-9]+|[0-9]+', #r'[-]?\d+|[-]?\d*\.\d+',
'string': r'''\'({stringchar}|\")*\'|\"({stringchar}|\')*\"''',
'stringchar': r'{urlchar}| |\\{nl}',
'urlchar': r'[\x09\x21\x23-\x26\x27-\x7E]|{nonascii}|{escape}',
# what if \r\n, \n matches first?
'nl': r'\n|\r\n|\r|\f',
'w': r'{wc}*',
'wc': r'\t|\r|\n|\f|\x20'
}
# The following productions are the complete list of tokens in CSS3, the productions are **ordered**:
PRODUCTIONS = [
('BOM', r'\xFEFF'),
('URI', r'url\({w}({string}|{urlchar}*){w}\)'),
('FUNCTION', r'{ident}\('),
('ATKEYWORD', r'\@{ident}'),
('IDENT', r'{ident}'),
('STRING', r'{string}'),
('HASH', r'\#{name}'),
('PERCENTAGE', r'{num}\%'),
('DIMENSION', r'{num}{ident}'),
('NUMBER', r'{num}'),
#???
('UNICODE-RANGE', ur'[0-9A-F?]{1,6}(\-[0-9A-F]{1,6})?'),
('CDO', r'\<\!\-\-'),
('CDC', r'\-\-\>'),
('S', r'{wc}+'),
('INCLUDES', '\~\='),
('DASHMATCH', r'\|\='),
('PREFIXMATCH', r'\^\='),
('SUFFIXMATCH', r'\$\='),
('SUBSTRINGMATCH', r'\*\='),
('COMMENT', r'\/\*[^*]*\*+([^/][^*]*\*+)*\/'),
('CHAR', r'[^"\']'),
]
class CSSProductions(object):
"has attributes for all PRODUCTIONS"
pass
for i, t in enumerate(PRODUCTIONS):
setattr(CSSProductions, t[0].replace('-', '_'), t[0])

View File

@ -0,0 +1,121 @@
"""productions for cssutils based on a mix of CSS 2.1 and CSS 3 Syntax
productions
- http://www.w3.org/TR/css3-syntax
- http://www.w3.org/TR/css3-syntax/#grammar0
open issues
- numbers contain "-" if present
- HASH: #aaa is, #000 is not anymore,
CSS2.1: 'nmchar': r'[_a-z0-9-]|{nonascii}|{escape}',
CSS3: 'nmchar': r'[_a-z-]|{nonascii}|{escape}',
"""
__all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS']
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssproductions.py 1378 2008-07-15 20:02:19Z cthedot $'
# a complete list of css3 macros
MACROS = {
'ident': r'[-]?{nmstart}{nmchar}*',
'name': r'{nmchar}+',
'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}',
'nonascii': r'[^\0-\177]',
'unicode': r'\\[0-9a-f]{1,6}(?:{nl}|{wc})?',
'escape': r'{unicode}|\\[ -~\200-\777]',
# 'escape': r'{unicode}|\\[ -~\200-\4177777]',
'nmchar': r'[-_a-zA-Z0-9]|{nonascii}|{escape}',
'num': r'[0-9]*\.[0-9]+|[0-9]+', #r'[-]?\d+|[-]?\d*\.\d+',
'string': r"""\'({stringesc1}|{stringchar}|")*\'""" + "|" + '''\"({stringesc2}|{stringchar}|')*\"''',
# seems an error in CSS 3 but is allowed in CSS 2.1
'stringesc1' : r"\\'",
'stringesc2' : r'\\"',
'stringchar': r'{urlchar}| |\\{nl}',
# urlchar ::= [#x9#x21#x23-#x26#x27-#x7E] | nonascii | escape
# 0x27 is "'" which should not be in here..., should ) be in here???
'urlchar': r'[\x09\x21\x23-\x26\x28-\x7E]|{nonascii}|{escape}',
# from CSS2.1
'invalid': r'{invalid1}|{invalid2}',
'invalid1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*',
'invalid2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*",
# \r\n should be counted as one char see unicode above
'nl': r'\n|\r\n|\r|\f',
'w': r'{wc}*',
'wc': r'\t|\r|\n|\f|\x20',
'comment': r'\/\*[^*]*\*+([^/][^*]*\*+)*\/',
'A': r'A|a|\\0{0,4}(?:41|61)(?:\r\n|[ \t\r\n\f])?',
'C': r'C|c|\\0{0,4}(?:43|63)(?:\r\n|[ \t\r\n\f])?',
'D': r'D|d|\\0{0,4}(?:44|64)(?:\r\n|[ \t\r\n\f])?',
'E': r'E|e|\\0{0,4}(?:45|65)(?:\r\n|[ \t\r\n\f])?',
'F': r'F|f|\\0{0,4}(?:46|66)(?:\r\n|[ \t\r\n\f])?',
'G': r'G|g|\\0{0,4}(?:47|67)(?:\r\n|[ \t\r\n\f])?|\\G|\\g',
'H': r'H|h|\\0{0,4}(?:48|68)(?:\r\n|[ \t\r\n\f])?|\\H|\\h',
'I': r'I|i|\\0{0,4}(?:49|69)(?:\r\n|[ \t\r\n\f])?|\\I|\\i',
'K': r'K|k|\\0{0,4}(?:4b|6b)(?:\r\n|[ \t\r\n\f])?|\\K|\\k',
'L': r'L|l|\\0{0,4}(?:4c|6c)(?:\r\n|[ \t\r\n\f])?|\\L|\\l',
'M': r'M|m|\\0{0,4}(?:4d|6d)(?:\r\n|[ \t\r\n\f])?|\\M|\\m',
'N': r'N|n|\\0{0,4}(?:4e|6e)(?:\r\n|[ \t\r\n\f])?|\\N|\\n',
'O': r'O|o|\\0{0,4}(?:4f|6f)(?:\r\n|[ \t\r\n\f])?|\\O|\\o',
'P': r'P|p|\\0{0,4}(?:50|70)(?:\r\n|[ \t\r\n\f])?|\\P|\\p',
'R': r'R|r|\\0{0,4}(?:52|72)(?:\r\n|[ \t\r\n\f])?|\\R|\\r',
'S': r'S|s|\\0{0,4}(?:53|73)(?:\r\n|[ \t\r\n\f])?|\\S|\\s',
'T': r'T|t|\\0{0,4}(?:54|74)(?:\r\n|[ \t\r\n\f])?|\\T|\\t',
'U': r'U|u|\\0{0,4}(?:55|75)(?:\r\n|[ \t\r\n\f])?|\\U|\\u',
'X': r'X|x|\\0{0,4}(?:58|78)(?:\r\n|[ \t\r\n\f])?|\\X|\\x',
'Z': r'Z|z|\\0{0,4}(?:5a|7a)(?:\r\n|[ \t\r\n\f])?|\\Z|\\z',
}
# The following productions are the complete list of tokens
# used by cssutils, a mix of CSS3 and some CSS2.1 productions.
# The productions are **ordered**:
PRODUCTIONS = [
('BOM', r'\xFEFF'), # will only be checked at beginning of CSS
('S', r'{wc}+'), # 1st in list of general productions
('URI', r'{U}{R}{L}\({w}({string}|{urlchar}*){w}\)'),
('FUNCTION', r'{ident}\('),
('IDENT', r'{ident}'),
('STRING', r'{string}'),
('INVALID', r'{invalid}'), # from CSS2.1
('HASH', r'\#{name}'),
('PERCENTAGE', r'{num}\%'),
('DIMENSION', r'{num}{ident}'),
('NUMBER', r'{num}'),
# valid ony at start so not checked everytime
#('CHARSET_SYM', r'@charset '), # from Errata includes ending space!
('ATKEYWORD', r'@{ident}'), # other keywords are done in the tokenizer
#('UNICODE-RANGE', r'[0-9A-F?]{1,6}(\-[0-9A-F]{1,6})?'), #???
('CDO', r'\<\!\-\-'),
('CDC', r'\-\-\>'),
('INCLUDES', '\~\='),
('DASHMATCH', r'\|\='),
('PREFIXMATCH', r'\^\='),
('SUFFIXMATCH', r'\$\='),
('SUBSTRINGMATCH', r'\*\='),
# checked specially if fullsheet is parsed
('COMMENT', r'{comment}'), #r'\/\*[^*]*\*+([^/][^*]*\*+)*\/'),
('CHAR', r'[^"\']') # MUST always be last
]
class CSSProductions(object):
"""
most attributes are set later
"""
EOF = True
# removed from productions as they simply are ATKEYWORD until
# tokenizing
CHARSET_SYM = 'CHARSET_SYM'
FONT_FACE_SYM = 'FONT_FACE_SYM'
MEDIA_SYM = 'MEDIA_SYM'
IMPORT_SYM = 'IMPORT_SYM'
NAMESPACE_SYM = 'NAMESPACE_SYM'
PAGE_SYM = 'PAGE_SYM'
for i, t in enumerate(PRODUCTIONS):
setattr(CSSProductions, t[0].replace('-', '_'), t[0])

View File

@ -0,0 +1,117 @@
#!/usr/bin/env python
"""cssutils ErrorHandler
ErrorHandler
used as log with usual levels (debug, info, warn, error)
if instanciated with ``raiseExceptions=True`` raises exeptions instead
of logging
log
defaults to instance of ErrorHandler for any kind of log message from
lexerm, parser etc.
- raiseExceptions = [False, True]
- setloglevel(loglevel)
"""
__all__ = ['ErrorHandler']
__docformat__ = 'restructuredtext'
__version__ = '$Id: errorhandler.py 1361 2008-07-13 18:12:40Z cthedot $'
import logging
import urllib2
import xml.dom
from helper import Deprecated
class _ErrorHandler(object):
"""
handles all errors and log messages
"""
def __init__(self, log, defaultloglevel=logging.INFO,
raiseExceptions=True):
"""
inits log if none given
log
for parse messages, default logs to sys.stderr
defaultloglevel
if none give this is logging.DEBUG
raiseExceptions
- True: Errors will be raised e.g. during building
- False: Errors will be written to the log, this is the
default behaviour when parsing
"""
if log:
self._log = log
else:
import sys
self._log = logging.getLogger('CSSUTILS')
hdlr = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter('%(levelname)s\t%(message)s')
hdlr.setFormatter(formatter)
self._log.addHandler(hdlr)
self._log.setLevel(defaultloglevel)
self.raiseExceptions = raiseExceptions
def __getattr__(self, name):
"use self._log items"
calls = ('debug', 'info', 'warn', 'error', 'critical', 'fatal')
other = ('setLevel', 'getEffectiveLevel', 'addHandler', 'removeHandler')
if name in calls:
self._logcall = getattr(self._log, name)
return self.__handle
elif name in other:
return getattr(self._log, name)
else:
raise AttributeError(
'(errorhandler) No Attribute %r found' % name)
def __handle(self, msg=u'', token=None, error=xml.dom.SyntaxErr,
neverraise=False, args=None):
"""
handles all calls
logs or raises exception
"""
if token:
if isinstance(token, tuple):
msg = u'%s [%s:%s: %s]' % (
msg, token[2], token[3], token[1])
else:
msg = u'%s [%s:%s: %s]' % (
msg, token.line, token.col, token.value)
if error and self.raiseExceptions and not neverraise:
if isinstance(error, urllib2.HTTPError) or isinstance(error, urllib2.URLError):
raise error
else:
raise error(msg)
else:
self._logcall(msg)
def setLog(self, log):
"""set log of errorhandler's log"""
self._log = log
@Deprecated('Use setLog() instead.')
def setlog(self, log):
self.setLog(log)
@Deprecated('Use setLevel() instead.')
def setloglevel(self, level):
self.setLevel(level)
class ErrorHandler(_ErrorHandler):
"Singleton, see _ErrorHandler"
instance = None
def __init__(self,
log=None, defaultloglevel=logging.INFO, raiseExceptions=True):
if ErrorHandler.instance is None:
ErrorHandler.instance = _ErrorHandler(log=log,
defaultloglevel=defaultloglevel,
raiseExceptions=raiseExceptions)
self.__dict__ = ErrorHandler.instance.__dict__

View File

@ -0,0 +1,51 @@
"""cssutils helper
"""
__all__ = ['Deprecated', 'normalize']
__docformat__ = 'restructuredtext'
__version__ = '$Id: errorhandler.py 1234 2008-05-22 20:26:12Z cthedot $'
import re
class Deprecated(object):
"""This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used.
It accepts a single paramter ``msg`` which is shown with the warning.
It should contain information which function or method to use instead.
"""
def __init__(self, msg):
self.msg = msg
def __call__(self, func):
def newFunc(*args, **kwargs):
import warnings
warnings.warn("Call to deprecated method %r. %s" %
(func.__name__, self.msg),
category=DeprecationWarning,
stacklevel=2)
return func(*args, **kwargs)
newFunc.__name__ = func.__name__
newFunc.__doc__ = func.__doc__
newFunc.__dict__.update(func.__dict__)
return newFunc
# simple escapes, all non unicodes
_simpleescapes = re.compile(ur'(\\[^0-9a-fA-F])').sub
def normalize(x):
"""
normalizes x, namely:
- remove any \ before non unicode sequences (0-9a-zA-Z) so for
x=="c\olor\" return "color" (unicode escape sequences should have
been resolved by the tokenizer already)
- lowercase
"""
if x:
def removeescape(matchobj):
return matchobj.group(0)[1:]
x = _simpleescapes(removeescape, x)
return x.lower()
else:
return x

View File

@ -0,0 +1,183 @@
#!/usr/bin/env python
"""a validating CSSParser
"""
__all__ = ['CSSParser']
__docformat__ = 'restructuredtext'
__version__ = '$Id: parse.py 1418 2008-08-09 19:27:50Z cthedot $'
import codecs
import os
import urllib
from helper import Deprecated
import tokenize2
import cssutils
class CSSParser(object):
"""
parses a CSS StyleSheet string or file and
returns a DOM Level 2 CSS StyleSheet object
Usage::
parser = CSSParser()
# optionally
parser.setFetcher(fetcher)
sheet = parser.parseFile('test1.css', 'ascii')
print sheet.cssText
"""
def __init__(self, log=None, loglevel=None, raiseExceptions=None,
fetcher=None):
"""
log
logging object
loglevel
logging loglevel
raiseExceptions
if log should simply log (default) or raise errors during
parsing. Later while working with the resulting sheets
the setting used in cssutils.log.raiseExeptions is used
fetcher
see ``setFetchUrl(fetcher)``
"""
if log is not None:
cssutils.log.setLog(log)
if loglevel is not None:
cssutils.log.setLevel(loglevel)
# remember global setting
self.__globalRaising = cssutils.log.raiseExceptions
if raiseExceptions:
self.__parseRaising = raiseExceptions
else:
# DEFAULT during parse
self.__parseRaising = False
self.__tokenizer = tokenize2.Tokenizer()
self.setFetcher(fetcher)
def __parseSetting(self, parse):
"""during parse exceptions may be handled differently depending on
init parameter ``raiseExceptions``
"""
if parse:
cssutils.log.raiseExceptions = self.__parseRaising
else:
cssutils.log.raiseExceptions = self.__globalRaising
def parseString(self, cssText, encoding=None, href=None, media=None,
title=None):
"""Return parsed CSSStyleSheet from given string cssText.
Raises errors during retrieving (e.g. UnicodeDecodeError).
cssText
CSS string to parse
encoding
If ``None`` the encoding will be read from BOM or an @charset
rule or defaults to UTF-8.
If given overrides any found encoding including the ones for
imported sheets.
It also will be used to decode ``cssText`` if given as a (byte)
string.
href
The href attribute to assign to the parsed style sheet.
Used to resolve other urls in the parsed sheet like @import hrefs
media
The media attribute to assign to the parsed style sheet
(may be a MediaList, list or a string)
title
The title attribute to assign to the parsed style sheet
"""
self.__parseSetting(True)
if isinstance(cssText, str):
cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0]
sheet = cssutils.css.CSSStyleSheet(href=href,
media=cssutils.stylesheets.MediaList(media),
title=title)
sheet._setFetcher(self.__fetcher)
# tokenizing this ways closes open constructs and adds EOF
sheet._setCssTextWithEncodingOverride(self.__tokenizer.tokenize(cssText,
fullsheet=True),
encodingOverride=encoding)
self.__parseSetting(False)
return sheet
def parseFile(self, filename, encoding=None,
href=None, media=None, title=None):
"""Retrieve and return a CSSStyleSheet from given filename.
Raises errors during retrieving (e.g. IOError).
filename
of the CSS file to parse, if no ``href`` is given filename is
converted to a (file:) URL and set as ``href`` of resulting
stylesheet.
If href is given it is set as ``sheet.href``. Either way
``sheet.href`` is used to resolve e.g. stylesheet imports via
@import rules.
encoding
Value ``None`` defaults to encoding detection via BOM or an
@charset rule.
Other values override detected encoding for the sheet at
``filename`` including any imported sheets.
for other parameters see ``parseString``
"""
if not href:
# prepend // for file URL, urllib does not do this?
href = u'file:' + urllib.pathname2url(os.path.abspath(filename))
return self.parseString(open(filename, 'rb').read(),
encoding=encoding, # read returns a str
href=href, media=media, title=title)
def parseUrl(self, href, encoding=None, media=None, title=None):
"""Retrieve and return a CSSStyleSheet from given href (an URL).
In case of any errors while reading the URL returns None.
href
URL of the CSS file to parse, will also be set as ``href`` of
resulting stylesheet
encoding
Value ``None`` defaults to encoding detection via HTTP, BOM or an
@charset rule.
A value overrides detected encoding for the sheet at ``href``
including any imported sheets.
for other parameters see ``parseString``
"""
encoding, enctype, text = cssutils.util._readUrl(href,
overrideEncoding=encoding)
if enctype == 5:
# do not used if defaulting to UTF-8
encoding = None
if text is not None:
return self.parseString(text, encoding=encoding,
href=href, media=media, title=title)
def setFetcher(self, fetcher=None):
"""Replace the default URL fetch function with a custom one.
The fetcher function gets a single parameter
``url``
the URL to read
and returns ``(encoding, content)`` where ``encoding`` is the HTTP
charset normally given via the Content-Type header (which may simply
omit the charset) and ``content`` being the (byte) string content.
The Mimetype should be 'text/css' but this has to be checked by the
fetcher itself (the default fetcher emits a warning if encountering
a different mimetype).
Calling ``setFetcher`` with ``fetcher=None`` resets cssutils
to use its default function.
"""
self.__fetcher = fetcher
@Deprecated('Use cssutils.CSSParser().parseFile() instead.')
def parse(self, filename, encoding=None,
href=None, media=None, title=None):
self.parseFile(filename, encoding, href, media, title)

View File

@ -0,0 +1,371 @@
"""classes and functions used by cssutils scripts
"""
__all__ = ['CSSCapture', 'csscombine']
__docformat__ = 'restructuredtext'
__version__ = '$Id: parse.py 1323 2008-07-06 18:13:57Z cthedot $'
import codecs
import errno
import HTMLParser
import logging
import os
import sys
import urllib2
import urlparse
import cssutils
try:
import cssutils.encutils as encutils
except ImportError:
try:
import encutils
except ImportError:
sys.exit("You need encutils from http://cthedot.de/encutils/")
# types of sheets in HTML
LINK = 0 # <link rel="stylesheet" type="text/css" href="..." [@title="..." @media="..."]/>
STYLE = 1 # <style type="text/css" [@title="..."]>...</style>
class CSSCaptureHTMLParser(HTMLParser.HTMLParser):
"""CSSCapture helper: Parse given data for link and style elements"""
curtag = u''
sheets = [] # (type, [atts, cssText])
def _loweratts(self, atts):
return dict([(a.lower(), v.lower()) for a, v in atts])
def handle_starttag(self, tag, atts):
if tag == u'link':
atts = self._loweratts(atts)
if u'text/css' == atts.get(u'type', u''):
self.sheets.append((LINK, atts))
elif tag == u'style':
# also get content of style
atts = self._loweratts(atts)
if u'text/css' == atts.get(u'type', u''):
self.sheets.append((STYLE, [atts, u'']))
self.curtag = tag
else:
# close as only intersting <style> cannot contain any elements
self.curtag = u''
def handle_data(self, data):
if self.curtag == u'style':
self.sheets[-1][1][1] = data # replace cssText
def handle_comment(self, data):
# style might have comment content, treat same as data
self.handle_data(data)
def handle_endtag(self, tag):
# close as style cannot contain any elements
self.curtag = u''
class CSSCapture(object):
"""
Retrieve all CSS stylesheets including embedded for a given URL.
Optional setting of User-Agent used for retrieval possible
to handle browser sniffing servers.
raises urllib2.HTTPError
"""
def __init__(self, ua=None, log=None, defaultloglevel=logging.INFO):
"""
initialize a new Capture object
ua
init User-Agent to use for requests
log
supply a log object which is used instead of the default
log which writes to sys.stderr
defaultloglevel
constant of logging package which defines the level of the
default log if no explicit log given
"""
self._ua = ua
if log:
self._log = log
else:
self._log = logging.getLogger('CSSCapture')
hdlr = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter('%(message)s')
hdlr.setFormatter(formatter)
self._log.addHandler(hdlr)
self._log.setLevel(defaultloglevel)
self._log.debug(u'Using default log')
self._htmlparser = CSSCaptureHTMLParser()
self._cssparser = cssutils.CSSParser(log = self._log)
def _doRequest(self, url):
"""Do an HTTP request
Return (url, rawcontent)
url might have been changed by server due to redirects etc
"""
self._log.debug(u' CSSCapture._doRequest\n * URL: %s' % url)
req = urllib2.Request(url)
if self._ua:
req.add_header('User-agent', self._ua)
self._log.info(' * Using User-Agent: %s', self._ua)
try:
res = urllib2.urlopen(req)
except urllib2.HTTPError, e:
self._log.critical(' %s\n%s %s\n%s' % (
e.geturl(), e.code, e.msg, e.headers))
return None, None
# get real url
if url != res.geturl():
url = res.geturl()
self._log.info(' URL retrieved: %s', url)
return url, res
def _createStyleSheet(self, href=None,
media=None,
parentStyleSheet=None,
title=u'',
cssText=None,
encoding=None):
"""
Return CSSStyleSheet read from href or if cssText is given use that.
encoding
used if inline style found, same as self.docencoding
"""
if cssText is None:
encoding, enctype, cssText = cssutils.util._readUrl(href, parentEncoding=self.docencoding)
encoding = None # already decoded???
sheet = self._cssparser.parseString(cssText, href=href, media=media, title=title,
encoding=encoding)
if not sheet:
return None
else:
self._log.info(u' %s\n' % sheet)
self._nonparsed[sheet] = cssText
return sheet
def _findStyleSheets(self, docurl, doctext):
"""
parse text for stylesheets
fills stylesheetlist with all found StyleSheets
docurl
to build a full url of found StyleSheets @href
doctext
to parse
"""
# TODO: ownerNode should be set to the <link> node
self._htmlparser.feed(doctext)
for typ, data in self._htmlparser.sheets:
sheet = None
if LINK == typ:
self._log.info(u'+ PROCESSING <link> %r' % data)
atts = data
href = urlparse.urljoin(docurl, atts.get(u'href', None))
sheet = self._createStyleSheet(href=href,
media=atts.get(u'media', None),
title=atts.get(u'title', None))
elif STYLE == typ:
self._log.info(u'+ PROCESSING <style> %r' % data)
atts, cssText = data
sheet = self._createStyleSheet(cssText=cssText,
href = docurl,
media=atts.get(u'media', None),
title=atts.get(u'title', None),
encoding=self.docencoding)
if sheet:
sheet._href = None # inline have no href!
print sheet.cssText
if sheet:
self.stylesheetlist.append(sheet)
self._doImports(sheet, base=docurl)
def _doImports(self, parentStyleSheet, base=None):
"""
handle all @import CSS stylesheet recursively
found CSS stylesheets are appended to stylesheetlist
"""
# TODO: only if not parsed these have to be read extra!
for rule in parentStyleSheet.cssRules:
if rule.type == rule.IMPORT_RULE:
self._log.info(u'+ PROCESSING @import:')
self._log.debug(u' IN: %s\n' % parentStyleSheet.href)
sheet = rule.styleSheet
href = urlparse.urljoin(base, rule.href)
if sheet:
self._log.info(u' %s\n' % sheet)
self.stylesheetlist.append(sheet)
self._doImports(sheet, base=href)
def capture(self, url):
"""
Capture all stylesheets at given URL's HTML document.
Any HTTPError is raised to caller.
url
to capture CSS from
Returns ``cssutils.stylesheets.StyleSheetList``.
"""
self._log.info(u'\nCapturing CSS from URL:\n %s\n', url)
self._nonparsed = {}
self.stylesheetlist = cssutils.stylesheets.StyleSheetList()
# used to save inline styles
scheme, loc, path, query, fragment = urlparse.urlsplit(url)
self._filename = os.path.basename(path)
# get url content
url, res = self._doRequest(url)
if not res:
sys.exit(1)
rawdoc = res.read()
self.docencoding = encutils.getEncodingInfo(
res, rawdoc, log=self._log).encoding
self._log.info(u'\nUsing Encoding: %s\n', self.docencoding)
doctext = rawdoc.decode(self.docencoding)
# fill list of stylesheets and list of raw css
self._findStyleSheets(url, doctext)
return self.stylesheetlist
def saveto(self, dir, saveraw=False, minified=False):
"""
saves css in "dir" in the same layout as on the server
internal stylesheets are saved as "dir/__INLINE_STYLE__.html.css"
dir
directory to save files to
saveparsed
save literal CSS from server or save the parsed CSS
minified
save minified CSS
Both parsed and minified (which is also parsed of course) will
loose information which cssutils is unable to understand or where
it is simple buggy. You might to first save the raw version before
parsing of even minifying it.
"""
msg = 'parsed'
if saveraw:
msg = 'raw'
if minified:
cssutils.ser.prefs.useMinified()
msg = 'minified'
inlines = 0
for i, sheet in enumerate(self.stylesheetlist):
url = sheet.href
if not url:
inlines += 1
url = u'%s_INLINE_%s.css' % (self._filename, inlines)
# build savepath
scheme, loc, path, query, fragment = urlparse.urlsplit(url)
# no absolute path
if path and path.startswith('/'):
path = path[1:]
path = os.path.normpath(path)
path, fn = os.path.split(path)
savepath = os.path.join(dir, path)
savefn = os.path.join(savepath, fn)
try:
os.makedirs(savepath)
except OSError, e:
if e.errno != errno.EEXIST:
raise e
self._log.debug(u'Path "%s" already exists.', savepath)
self._log.info(u'SAVING %s, %s %r' % (i+1, msg, savefn))
sf = open(savefn, 'wb')
if saveraw:
cssText = self._nonparsed[sheet]
uf = codecs.getwriter('css')(sf)
uf.write(cssText)
else:
sf.write(sheet.cssText)
sf.close()
def csscombine(proxypath, sourceencoding=None, targetencoding='utf-8',
minify=True):
"""Combine sheets referred to by @import rules in given CSS proxy sheet
into a single new sheet.
:returns: combined cssText, normal or minified
:Parameters:
`proxypath`
url or path to a CSSStyleSheet which imports other sheets which
are then combined into one sheet
`sourceencoding`
encoding of the source sheets including the proxy sheet
`targetencoding`
encoding of the combined stylesheet, default 'utf-8'
`minify`
defines if the combined sheet should be minified, default True
"""
log = cssutils.log
log.info('Combining files in proxy %r' % proxypath, neverraise=True)
if sourceencoding is not None:
log.info('Using source encoding %r' % sourceencoding,
neverraise=True)
src = cssutils.parseFile(proxypath, encoding=sourceencoding)
srcpath = os.path.dirname(proxypath)
combined = cssutils.css.CSSStyleSheet()
for rule in src.cssRules:
if rule.type == rule.IMPORT_RULE:
fn = os.path.join(srcpath, rule.href)
log.info('Processing @import %r' % fn,
neverraise=True)
importsheet = cssutils.parseFile(fn, encoding=sourceencoding)
importsheet.encoding = None # remove @charset
combined.add(cssutils.css.CSSComment(cssText=u'/* %s */' %
rule.cssText))
for x in importsheet.cssRules:
if x.type == x.IMPORT_RULE:
log.info('Nested @imports are not combined: %s' % x.cssText,
neverraise=True)
combined.add(x)
else:
combined.add(rule)
log.info('Setting target encoding %r' % targetencoding, neverraise=True)
combined.encoding = targetencoding
if minify:
# save old setting and use own serializer
oldser = cssutils.ser
cssutils.setSerializer(cssutils.serialize.CSSSerializer())
cssutils.ser.prefs.useMinified()
cssText = combined.cssText
cssutils.setSerializer(oldser)
else:
cssText = combined.cssText
return cssText

View File

@ -0,0 +1,4 @@
from csscombine import csscombine
__all__ = ["csscapture", "csscombine", "cssparse"]

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
"""Retrieve all CSS stylesheets including embedded for a given URL.
Retrieve as StyleSheetList or save to disk - raw, parsed or minified version.
TODO:
- maybe use DOM 3 load/save?
- logger class which handles all cases when no log is given...
- saveto: why does urllib2 hang?
"""
__all__ = ['CSSCapture']
__docformat__ = 'restructuredtext'
__version__ = '$Id: csscapture.py 1332 2008-07-09 13:12:56Z cthedot $'
import logging
import optparse
import sys
from cssutils.script import CSSCapture
def main(args=None):
usage = "usage: %prog [options] URL"
parser = optparse.OptionParser(usage=usage)
parser.add_option('-d', '--debug', action='store_true', dest='debug',
help='show debug messages during capturing')
parser.add_option('-m', '--minified', action='store_true', dest='minified',
help='saves minified version of captured files')
parser.add_option('-n', '--notsave', action='store_true', dest='notsave',
help='if given files are NOT saved, only log is written')
# parser.add_option('-r', '--saveraw', action='store_true', dest='saveraw',
# help='if given saves raw css otherwise cssutils\' parsed files')
parser.add_option('-s', '--saveto', action='store', dest='saveto',
help='saving retrieved files to "saveto", defaults to "_CSSCapture_SAVED"')
parser.add_option('-u', '--useragent', action='store', dest='ua',
help='useragent to use for request of URL, default is urllib2s default')
options, url = parser.parse_args()
# TODO:
options.saveraw = False
if not url:
parser.error('no URL given')
else:
url = url[0]
if options.debug:
level = logging.DEBUG
else:
level = logging.INFO
# START
c = CSSCapture(ua=options.ua, defaultloglevel=level)
stylesheetlist = c.capture(url)
if options.notsave is None or not options.notsave:
if options.saveto:
saveto = options.saveto
else:
saveto = u'_CSSCapture_SAVED'
c.saveto(saveto, saveraw=options.saveraw, minified=options.minified)
else:
for i, s in enumerate(stylesheetlist):
print u'''%s.
encoding: %r
title: %r
href: %r''' % (i + 1, s.encoding, s.title, s.href)
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python
"""Combine sheets referred to by @import rules in a given CSS proxy sheet
into a single new sheet.
- proxy currently is a path (no URI!)
- in @import rules only relative paths do work for now but should be used
anyway
- currently no nested @imports are resolved
- messages are send to stderr
- output to stdout.
Example::
csscombine sheets\csscombine-proxy.css -m -t ascii -s utf-8
1>combined.css 2>log.txt
results in log.txt::
COMBINING sheets/csscombine-proxy.css
USING SOURCE ENCODING: css
* PROCESSING @import sheets\csscombine-1.css
* PROCESSING @import sheets\csscombine-2.css
INFO Nested @imports are not combined: @import "1.css";
SETTING TARGET ENCODING: ascii
and combined.css::
@charset "ascii";@import"1.css";@namespaces2"uri";s2|sheet-1{top:1px}s2|sheet-2{top:2px}proxy{top:3px}
or without option -m::
@charset "ascii";
@import "1.css";
@namespace s2 "uri";
@namespace other "other";
/* proxy sheet were imported sheets should be combined */
/* non-ascii chars: \F6 \E4 \FC */
/* @import "csscombine-1.css"; */
/* combined sheet 1 */
s2|sheet-1 {
top: 1px
}
/* @import url(csscombine-2.css); */
/* combined sheet 2 */
s2|sheet-2 {
top: 2px
}
proxy {
top: 3px
}
TODO
- URL or file hrefs? URI should be default
- no nested @imports are resolved yet
- maybe add a config file which is used?
"""
__all__ = ['csscombine']
__docformat__ = 'restructuredtext'
__version__ = '$Id: csscombine.py 1332 2008-07-09 13:12:56Z cthedot $'
import optparse
import sys
from cssutils.script import csscombine
def main(args=None):
usage = "usage: %prog [options] path"
parser = optparse.OptionParser(usage=usage)
parser.add_option('-s', '--sourceencoding', action='store',
dest='sourceencoding',
help='encoding of input, defaulting to "css". If given overwrites other encoding information like @charset declarations')
parser.add_option('-t', '--targetencoding', action='store',
dest='targetencoding',
help='encoding of output, defaulting to "UTF-8"', default='utf-8')
parser.add_option('-m', '--minify', action='store_true', dest='minify',
default=False,
help='saves minified version of combined files, defaults to False')
options, path = parser.parse_args()
if not path:
parser.error('no path given')
else:
path = path[0]
print csscombine(path, options.sourceencoding, options.targetencoding,
options.minify)
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
"""utility script to parse given filenames or string
"""
__docformat__ = 'restructuredtext'
__version__ = '$Id: cssparse.py 1327 2008-07-08 21:17:12Z cthedot $'
import cssutils
import logging
import optparse
import sys
def main(args=None):
"""
Parses given filename(s) or string (using optional encoding) and prints
the parsed style sheet to stdout.
Redirect stdout to save CSS. Redirect stderr to save parser log infos.
"""
usage = """usage: %prog [options] filename1.css [filename2.css ...]
[>filename_combined.css] [2>parserinfo.log] """
p = optparse.OptionParser(usage=usage)
p.add_option('-e', '--encoding', action='store', dest='encoding',
help='encoding of the file')
p.add_option('-d', '--debug', action='store_true', dest='debug',
help='activate debugging output')
p.add_option('-m', '--minify', action='store_true', dest='minify',
help='minify parsed CSS', default=False)
p.add_option('-s', '--string', action='store_true', dest='string',
help='parse given string')
(options, params) = p.parse_args(args)
if not params:
p.error("no filename given")
if options.debug:
p = cssutils.CSSParser(loglevel=logging.DEBUG)
else:
p = cssutils.CSSParser()
if options.minify:
cssutils.ser.prefs.useMinified()
if options.string:
sheet = p.parseString(u''.join(params), encoding=options.encoding)
print sheet.cssText
print
sys.stderr.write('\n')
else:
for filename in params:
sys.stderr.write('=== CSS FILE: "%s" ===\n' % filename)
sheet = p.parseFile(filename, encoding=options.encoding)
print sheet.cssText
print
sys.stderr.write('\n')
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,882 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""serializer classes for CSS classes
"""
__all__ = ['CSSSerializer', 'Preferences']
__docformat__ = 'restructuredtext'
__version__ = '$Id: serialize.py 1419 2008-08-09 19:28:06Z cthedot $'
import codecs
import re
import cssutils
def _escapecss(e):
"""
Escapes characters not allowed in the current encoding the CSS way
with a backslash followed by a uppercase hex code point
E.g. the german umlaut 'ä' is escaped as \E4
"""
s = e.object[e.start:e.end]
return u''.join([ur'\%s ' % str(hex(ord(x)))[2:] # remove 0x from hex
.upper() for x in s]), e.end
codecs.register_error('escapecss', _escapecss)
class Preferences(object):
"""
controls output of CSSSerializer
defaultAtKeyword = True
Should the literal @keyword from src CSS be used or the default
form, e.g. if ``True``: ``@import`` else: ``@i\mport``
defaultPropertyName = True
Should the normalized propertyname be used or the one given in
the src file, e.g. if ``True``: ``color`` else: ``c\olor``
Only used if ``keepAllProperties==False``.
defaultPropertyPriority = True
Should the normalized or literal priority be used, e.g. '!important'
or u'!Im\portant'
importHrefFormat = None
Uses hreftype if ``None`` or explicit ``'string'`` or ``'uri'``
indent = 4 * ' '
Indentation of e.g Properties inside a CSSStyleDeclaration
indentSpecificities = False
Indent rules with subset of Selectors and higher Specitivity
keepAllProperties = True
If ``True`` all properties set in the original CSSStylesheet
are kept meaning even properties set twice with the exact same
same name are kept!
keepComments = True
If ``False`` removes all CSSComments
keepEmptyRules = False
defines if empty rules like e.g. ``a {}`` are kept in the resulting
serialized sheet
keepUsedNamespaceRulesOnly = False
if True only namespace rules which are actually used are kept
lineNumbers = False
Only used if a complete CSSStyleSheet is serialized.
lineSeparator = u'\\n'
How to end a line. This may be set to e.g. u'' for serializing of
CSSStyleDeclarations usable in HTML style attribute.
listItemSpacer = u' '
string which is used in ``css.SelectorList``, ``css.CSSValue`` and
``stylesheets.MediaList`` after the comma
omitLastSemicolon = True
If ``True`` omits ; after last property of CSSStyleDeclaration
paranthesisSpacer = u' '
string which is used before an opening paranthesis like in a
``css.CSSMediaRule`` or ``css.CSSStyleRule``
propertyNameSpacer = u' '
string which is used after a Property name colon
selectorCombinatorSpacer = u' '
string which is used before and after a Selector combinator like +, > or ~.
CSSOM defines a single space for this which is also the default in cssutils.
spacer = u' '
general spacer, used e.g. by CSSUnknownRule
validOnly = False **DO NOT CHANGE YET**
if True only valid (currently Properties) are kept
A Property is valid if it is a known Property with a valid value.
Currently CSS 2.1 values as defined in cssproperties.py would be
valid.
"""
def __init__(self, **initials):
"""
Always use named instead of positional parameters
"""
self.useDefaults()
for key, value in initials.items():
if value:
self.__setattr__(key, value)
def useDefaults(self):
"reset all preference options to the default value"
self.defaultAtKeyword = True
self.defaultPropertyName = True
self.defaultPropertyPriority = True
self.importHrefFormat = None
self.indent = 4 * u' '
self.indentSpecificities = False
self.keepAllProperties = True
self.keepComments = True
self.keepEmptyRules = False
self.keepUsedNamespaceRulesOnly = False
self.lineNumbers = False
self.lineSeparator = u'\n'
self.listItemSpacer = u' '
self.omitLastSemicolon = True
self.paranthesisSpacer = u' '
self.propertyNameSpacer = u' '
self.selectorCombinatorSpacer = u' '
self.spacer = u' '
self.validOnly = False # should not be changed currently!!!
def useMinified(self):
"""
sets options to achive a minified stylesheet
you may want to set preferences with this convenience method
and set settings you want adjusted afterwards
"""
self.importHrefFormat = 'string'
self.indent = u''
self.keepComments = False
self.keepEmptyRules = False
self.keepUsedNamespaceRulesOnly = True
self.lineNumbers = False
self.lineSeparator = u''
self.listItemSpacer = u''
self.omitLastSemicolon = True
self.paranthesisSpacer = u''
self.propertyNameSpacer = u''
self.selectorCombinatorSpacer = u''
self.spacer = u''
self.validOnly = False
def __repr__(self):
return u"cssutils.css.%s(%s)" % (self.__class__.__name__,
u', '.join(['\n %s=%r' % (p, self.__getattribute__(p)) for p in self.__dict__]
))
def __str__(self):
return u"<cssutils.css.%s object %s at 0x%x" % (self.__class__.__name__,
u' '.join(['%s=%r' % (p, self.__getattribute__(p)) for p in self.__dict__]
),
id(self))
class Out(object):
"""
a simple class which makes appended items available as a combined string
"""
def __init__(self, ser):
self.ser = ser
self.out = []
def _remove_last_if_S(self):
if self.out and not self.out[-1].strip():
# remove trailing S
del self.out[-1]
def append(self, val, typ=None, space=True, keepS=False, indent=False,
lineSeparator=False):
"""Appends val. Adds a single S after each token except as follows:
- typ COMMENT
uses cssText depending on self.ser.prefs.keepComments
- typ "Property", cssutils.css.CSSRule.UNKNOWN_RULE
uses cssText
- typ STRING
escapes ser._string
- typ S
ignored except ``keepS=True``
- typ URI
calls ser_uri
- val ``{``
adds LF after
- val ``;``
removes S before and adds LF after
- val ``, :``
removes S before
- val ``+ > ~``
encloses in prefs.selectorCombinatorSpacer
- some other vals
add ``*spacer`` except ``space=False``
"""
if val or 'STRING' == typ:
# PRE
if 'COMMENT' == typ:
if self.ser.prefs.keepComments:
val = val.cssText
else:
return
elif typ in ('Property', cssutils.css.CSSRule.UNKNOWN_RULE):
val = val.cssText
elif 'S' == typ and not keepS:
return
elif 'STRING' == typ:
# may be empty but MUST not be None
if val is None:
return
val = self.ser._string(val)
elif 'URI' == typ:
val = self.ser._uri(val)
elif val in u'+>~,:{;)]':
self._remove_last_if_S()
# APPEND
if indent:
self.out.append(self.ser._indentblock(val, self.ser._level+1))
else:
self.out.append(val)
# POST
if lineSeparator:
# Property , ...
pass
elif val in u'+>~': # enclose selector combinator
self.out.insert(-1, self.ser.prefs.selectorCombinatorSpacer)
self.out.append(self.ser.prefs.selectorCombinatorSpacer)
elif u',' == val: # list
self.out.append(self.ser.prefs.listItemSpacer)
elif u':' == val: # prop
self.out.append(self.ser.prefs.propertyNameSpacer)
elif u'{' == val: # block start
self.out.insert(-1, self.ser.prefs.paranthesisSpacer)
self.out.append(self.ser.prefs.lineSeparator)
elif u';' == val: # end or prop or block
self.out.append(self.ser.prefs.lineSeparator)
elif val not in u'}[]()' and space:
self.out.append(self.ser.prefs.spacer)
def value(self, delim=u'', end=None):
"returns all items joined by delim"
self._remove_last_if_S()
if end:
self.out.append(end)
return delim.join(self.out)
class CSSSerializer(object):
"""
Methods to serialize a CSSStylesheet and its parts
To use your own serializing method the easiest is to subclass CSS
Serializer and overwrite the methods you like to customize.
"""
# chars not in URI without quotes around
__forbidden_in_uri_matcher = re.compile(ur'''.*?[\)\s\;]''', re.U).match
def __init__(self, prefs=None):
"""
prefs
instance of Preferences
"""
if not prefs:
prefs = Preferences()
self.prefs = prefs
self._level = 0 # current nesting level
# TODO:
self._selectors = [] # holds SelectorList
self._selectorlevel = 0 # current specificity nesting level
def _atkeyword(self, rule, default):
"returns default or source atkeyword depending on prefs"
if self.prefs.defaultAtKeyword:
return default
else:
return rule.atkeyword
def _indentblock(self, text, level):
"""
indent a block like a CSSStyleDeclaration to the given level
which may be higher than self._level (e.g. for CSSStyleDeclaration)
"""
if not self.prefs.lineSeparator:
return text
return self.prefs.lineSeparator.join(
[u'%s%s' % (level * self.prefs.indent, line)
for line in text.split(self.prefs.lineSeparator)]
)
def _propertyname(self, property, actual):
"""
used by all styledeclarations to get the propertyname used
dependent on prefs setting defaultPropertyName and
keepAllProperties
"""
if self.prefs.defaultPropertyName and not self.prefs.keepAllProperties:
return property.name
else:
return actual
def _linenumnbers(self, text):
if self.prefs.lineNumbers:
pad = len(str(text.count(self.prefs.lineSeparator)+1))
out = []
for i, line in enumerate(text.split(self.prefs.lineSeparator)):
out.append((u'%*i: %s') % (pad, i+1, line))
text = self.prefs.lineSeparator.join(out)
return text
def _string(self, s):
"""
returns s encloded between "..." and escaped delim charater ",
escape line breaks \\n \\r and \\f
"""
# \n = 0xa, \r = 0xd, \f = 0xc
s = s.replace('\n', '\\a ').replace(
'\r', '\\d ').replace(
'\f', '\\c ')
return u'"%s"' % s.replace('"', u'\\"')
def _uri(self, uri):
"""returns uri enclosed in url() and "..." if necessary"""
if CSSSerializer.__forbidden_in_uri_matcher(uri):
return 'url(%s)' % self._string(uri)
else:
return 'url(%s)' % uri
def _valid(self, x):
"checks items valid property and prefs.validOnly"
return not self.prefs.validOnly or (self.prefs.validOnly and
x.valid)
def do_CSSStyleSheet(self, stylesheet):
"""serializes a complete CSSStyleSheet"""
useduris = stylesheet._getUsedURIs()
out = []
for rule in stylesheet.cssRules:
if self.prefs.keepUsedNamespaceRulesOnly and\
rule.NAMESPACE_RULE == rule.type and\
rule.namespaceURI not in useduris and (
rule.prefix or None not in useduris):
continue
cssText = rule.cssText
if cssText:
out.append(cssText)
text = self._linenumnbers(self.prefs.lineSeparator.join(out))
# get encoding of sheet, defaults to UTF-8
try:
encoding = stylesheet.cssRules[0].encoding
except (IndexError, AttributeError):
encoding = 'UTF-8'
return text.encode(encoding, 'escapecss')
def do_CSSComment(self, rule):
"""
serializes CSSComment which consists only of commentText
"""
if rule._cssText and self.prefs.keepComments:
return rule._cssText
else:
return u''
def do_CSSCharsetRule(self, rule):
"""
serializes CSSCharsetRule
encoding: string
always @charset "encoding";
no comments or other things allowed!
"""
if rule.wellformed:
return u'@charset %s;' % self._string(rule.encoding)
else:
return u''
def do_CSSFontFaceRule(self, rule):
"""
serializes CSSFontFaceRule
style
CSSStyleDeclaration
+ CSSComments
"""
styleText = self.do_css_CSSStyleDeclaration(rule.style)
if styleText and rule.wellformed:
out = Out(self)
out.append(self._atkeyword(rule, u'@font-face'))
for item in rule.seq:
# assume comments {
out.append(item.value, item.type)
out.append(u'{')
out.append(u'%s%s}' % (styleText, self.prefs.lineSeparator),
indent=1)
return out.value()
else:
return u''
def do_CSSImportRule(self, rule):
"""
serializes CSSImportRule
href
string
media
optional cssutils.stylesheets.medialist.MediaList
name
optional string
+ CSSComments
"""
if rule.wellformed:
out = Out(self)
out.append(self._atkeyword(rule, u'@import'))
for item in rule.seq:
typ, val = item.type, item.value
if 'href' == typ:
# "href" or url(href)
if self.prefs.importHrefFormat == 'string' or (
self.prefs.importHrefFormat != 'uri' and
rule.hreftype == 'string'):
out.append(val, 'STRING')
else:
if not len(self.prefs.spacer):
out.append(u' ')
out.append(val, 'URI')
elif 'media' == typ:
# media
mediaText = self.do_stylesheets_medialist(val)
if mediaText and mediaText != u'all':
out.append(mediaText)
elif 'name' == typ:
out.append(val, 'STRING')
else:
out.append(val, typ)
return out.value(end=u';')
else:
return u''
def do_CSSNamespaceRule(self, rule):
"""
serializes CSSNamespaceRule
uri
string
prefix
string
+ CSSComments
"""
if rule.wellformed:
out = Out(self)
out.append(self._atkeyword(rule, u'@namespace'))
if not len(self.prefs.spacer):
out.append(u' ')
for item in rule.seq:
typ, val = item.type, item.value
if 'namespaceURI' == typ:
out.append(val, 'STRING')
else:
out.append(val, typ)
return out.value(end=u';')
else:
return u''
def do_CSSMediaRule(self, rule):
"""
serializes CSSMediaRule
+ CSSComments
"""
# TODO: use Out()?
# mediaquery
if not rule.media.wellformed:
return u''
# @media
out = [self._atkeyword(rule, u'@media')]
if not len(self.prefs.spacer):
# for now always with space as only webkit supports @mediaall?
out.append(u' ')
else:
out.append(self.prefs.spacer) # might be empty
out.append(self.do_stylesheets_medialist(rule.media))
# name, seq contains content after name only (Comments)
if rule.name:
out.append(self.prefs.spacer)
nameout = Out(self)
nameout.append(self._string(rule.name))
for item in rule.seq:
nameout.append(item.value, item.type)
out.append(nameout.value())
# {
out.append(self.prefs.paranthesisSpacer)
out.append(u'{')
out.append(self.prefs.lineSeparator)
# rules
rulesout = []
for r in rule.cssRules:
rtext = r.cssText
if rtext:
# indent each line of cssText
rulesout.append(self._indentblock(rtext, self._level + 1))
rulesout.append(self.prefs.lineSeparator)
if not self.prefs.keepEmptyRules and not u''.join(rulesout).strip():
return u''
out.extend(rulesout)
# }
out.append(u'%s}' % ((self._level + 1) * self.prefs.indent))
return u''.join(out)
def do_CSSPageRule(self, rule):
"""
serializes CSSPageRule
selectorText
string
style
CSSStyleDeclaration
+ CSSComments
"""
styleText = self.do_css_CSSStyleDeclaration(rule.style)
if styleText and rule.wellformed:
out = Out(self)
out.append(self._atkeyword(rule, u'@page'))
if not len(self.prefs.spacer):
out.append(u' ')
for item in rule.seq:
out.append(item.value, item.type)
out.append(u'{')
out.append(u'%s%s}' % (styleText, self.prefs.lineSeparator),
indent=1)
return out.value()
else:
return u''
def do_CSSUnknownRule(self, rule):
"""
serializes CSSUnknownRule
anything until ";" or "{...}"
+ CSSComments
"""
if rule.wellformed:
out = Out(self)
out.append(rule.atkeyword)
if not len(self.prefs.spacer):
out.append(u' ')
stacks = []
for item in rule.seq:
typ, val = item.type, item.value
# PRE
if u'}' == val:
# close last open item on stack
stackblock = stacks.pop().value()
if stackblock:
val = self._indentblock(
stackblock + self.prefs.lineSeparator + val,
min(1, len(stacks)+1))
else:
val = self._indentblock(val, min(1, len(stacks)+1))
# APPEND
if stacks:
stacks[-1].append(val, typ)
else:
out.append(val, typ)
# POST
if u'{' == val:
# new stack level
stacks.append(Out(self))
return out.value()
else:
return u''
def do_CSSStyleRule(self, rule):
"""
serializes CSSStyleRule
selectorList
style
+ CSSComments
"""
# TODO: use Out()
# prepare for element nested rules
# TODO: sort selectors!
if self.prefs.indentSpecificities:
# subselectorlist?
elements = set([s.element for s in rule.selectorList])
specitivities = [s.specificity for s in rule.selectorList]
for selector in self._selectors:
lastelements = set([s.element for s in selector])
if elements.issubset(lastelements):
# higher specificity?
lastspecitivities = [s.specificity for s in selector]
if specitivities > lastspecitivities:
self._selectorlevel += 1
break
elif self._selectorlevel > 0:
self._selectorlevel -= 1
else:
# save new reference
self._selectors.append(rule.selectorList)
self._selectorlevel = 0
# TODO ^ RESOLVE!!!!
selectorText = self.do_css_SelectorList(rule.selectorList)
if not selectorText or not rule.wellformed:
return u''
self._level += 1
styleText = u''
try:
styleText = self.do_css_CSSStyleDeclaration(rule.style)
finally:
self._level -= 1
if not styleText:
if self.prefs.keepEmptyRules:
return u'%s%s{}' % (selectorText,
self.prefs.paranthesisSpacer)
else:
return self._indentblock(
u'%s%s{%s%s%s%s}' % (
selectorText,
self.prefs.paranthesisSpacer,
self.prefs.lineSeparator,
self._indentblock(styleText, self._level + 1),
self.prefs.lineSeparator,
(self._level + 1) * self.prefs.indent),
self._selectorlevel)
def do_css_SelectorList(self, selectorlist):
"comma-separated list of Selectors"
# does not need Out() as it is too simple
if selectorlist.wellformed:
out = []
for part in selectorlist.seq:
if isinstance(part, cssutils.css.Selector):
out.append(part.selectorText)
else:
out.append(part) # should not happen
sep = u',%s' % self.prefs.listItemSpacer
return sep.join(out)
else:
return u''
def do_css_Selector(self, selector):
"""
a single Selector including comments
an element has syntax (namespaceURI, name) where namespaceURI may be:
- cssutils._ANYNS => ``*|name``
- None => ``name``
- u'' => ``|name``
- any other value: => ``prefix|name``
"""
if selector.wellformed:
out = Out(self)
DEFAULTURI = selector._namespaces.get('', None)
for item in selector.seq:
typ, val = item.type, item.value
if type(val) == tuple:
# namespaceURI|name (element or attribute)
namespaceURI, name = val
if DEFAULTURI == namespaceURI or (not DEFAULTURI and
namespaceURI is None):
out.append(name, typ, space=False)
else:
if namespaceURI == cssutils._ANYNS:
prefix = u'*'
else:
try:
prefix = selector._namespaces.prefixForNamespaceURI(
namespaceURI)
except IndexError:
prefix = u''
out.append(u'%s|%s' % (prefix, name), typ, space=False)
else:
out.append(val, typ, space=False, keepS=True)
return out.value()
else:
return u''
def do_css_CSSStyleDeclaration(self, style, separator=None):
"""
Style declaration of CSSStyleRule
"""
# # TODO: use Out()
# may be comments only
if len(style.seq) > 0:
if separator is None:
separator = self.prefs.lineSeparator
if self.prefs.keepAllProperties:
# all
seq = style.seq
else:
# only effective ones
_effective = style.getProperties()
seq = [item for item in style.seq
if (isinstance(item.value, cssutils.css.Property)
and item.value in _effective)
or not isinstance(item.value, cssutils.css.Property)]
out = []
for i, item in enumerate(seq):
typ, val = item.type, item.value
if isinstance(val, cssutils.css.CSSComment):
# CSSComment
if self.prefs.keepComments:
out.append(val.cssText)
out.append(separator)
elif isinstance(val, cssutils.css.Property):
# PropertySimilarNameList
out.append(self.do_Property(val))
if not (self.prefs.omitLastSemicolon and i==len(seq)-1):
out.append(u';')
out.append(separator)
elif isinstance(val, cssutils.css.CSSUnknownRule):
# @rule
out.append(val.cssText)
out.append(separator)
else:
# ?
out.append(val)
out.append(separator)
if out and out[-1] == separator:
del out[-1]
return u''.join(out)
else:
return u''
def do_Property(self, property):
"""
Style declaration of CSSStyleRule
Property has a seqs attribute which contains seq lists for
name, a CSSvalue and a seq list for priority
"""
# TODO: use Out()
out = []
if property.seqs[0] and property.wellformed and self._valid(property):
nameseq, cssvalue, priorityseq = property.seqs
#name
for part in nameseq:
if hasattr(part, 'cssText'):
out.append(part.cssText)
elif property.literalname == part:
out.append(self._propertyname(property, part))
else:
out.append(part)
if out and (not property._mediaQuery or
property._mediaQuery and cssvalue.cssText):
# MediaQuery may consist of name only
out.append(u':')
out.append(self.prefs.propertyNameSpacer)
# value
out.append(cssvalue.cssText)
# priority
if out and priorityseq:
out.append(u' ')
for part in priorityseq:
if hasattr(part, 'cssText'): # comments
out.append(part.cssText)
else:
if part == property.literalpriority and\
self.prefs.defaultPropertyPriority:
out.append(property.priority)
else:
out.append(part)
return u''.join(out)
def do_Property_priority(self, priorityseq):
"""
a Properties priority "!" S* "important"
"""
# TODO: use Out()
out = []
for part in priorityseq:
if hasattr(part, 'cssText'): # comments
out.append(u' ')
out.append(part.cssText)
out.append(u' ')
else:
out.append(part)
return u''.join(out).strip()
def do_css_CSSValue(self, cssvalue):
"""
serializes a CSSValue
"""
# TODO: use Out()
# TODO: use self._valid(cssvalue)?
if not cssvalue:
return u''
else:
sep = u',%s' % self.prefs.listItemSpacer
out = []
for part in cssvalue.seq:
if hasattr(part, 'cssText'):
# comments or CSSValue if a CSSValueList
out.append(part.cssText)
elif isinstance(part, basestring) and part == u',':
out.append(sep)
else:
# TODO: escape func parameter if STRING!
if part and part[0] == part[-1] and part[0] in '\'"':
# string has " " around it in CSSValue!
part = self._string(part[1:-1])
out.append(part)
return (u''.join(out)).strip()
def do_stylesheets_medialist(self, medialist):
"""
comma-separated list of media, default is 'all'
If "all" is in the list, every other media *except* "handheld" will
be stripped. This is because how Opera handles CSS for PDAs.
"""
if len(medialist) == 0:
return u'all'
else:
sep = u',%s' % self.prefs.listItemSpacer
return sep.join((mq.mediaText for mq in medialist))
def do_stylesheets_mediaquery(self, mediaquery):
"""
a single media used in medialist
"""
if mediaquery.wellformed:
out = []
for part in mediaquery.seq:
if isinstance(part, cssutils.css.Property): # Property
out.append(u'(%s)' % part.cssText)
elif hasattr(part, 'cssText'): # comments
out.append(part.cssText)
else:
# TODO: media queries!
out.append(part)
return u' '.join(out)
else:
return u''

View File

@ -0,0 +1,18 @@
"""
Document Object Model Level 2 Style Sheets
http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/stylesheets.html
currently implemented:
- MediaList
- MediaQuery (http://www.w3.org/TR/css3-mediaqueries/)
- StyleSheet
- StyleSheetList
"""
__all__ = ['MediaList', 'MediaQuery', 'StyleSheet', 'StyleSheetList']
__docformat__ = 'restructuredtext'
__version__ = '$Id: __init__.py 1116 2008-03-05 13:52:23Z cthedot $'
from medialist import *
from mediaquery import *
from stylesheet import *
from stylesheetlist import *

View File

@ -0,0 +1,256 @@
"""
MediaList implements DOM Level 2 Style Sheets MediaList.
TODO:
- delete: maybe if deleting from all, replace *all* with all others?
- is unknown media an exception?
"""
__all__ = ['MediaList']
__docformat__ = 'restructuredtext'
__version__ = '$Id: medialist.py 1423 2008-08-11 12:43:22Z cthedot $'
import xml.dom
import cssutils
from cssutils.css import csscomment
from mediaquery import MediaQuery
class MediaList(cssutils.util.Base, cssutils.util.ListSeq):
"""
Provides the abstraction of an ordered collection of media,
without defining or constraining how this collection is
implemented.
A media is always an instance of MediaQuery.
An empty list is the same as a list that contains the medium "all".
Properties
==========
length:
The number of MediaQuery objects in the list.
mediaText: of type DOMString
The parsable textual representation of this MediaList
self: a list (cssutils)
All MediaQueries in this MediaList
wellformed:
if this list is wellformed
Format
======
::
medium [ COMMA S* medium ]*
New::
<media_query> [, <media_query> ]*
"""
def __init__(self, mediaText=None, readonly=False):
"""
mediaText
unicodestring of parsable comma separared media
or a list of media
"""
super(MediaList, self).__init__()
self._wellformed = False
if isinstance(mediaText, list):
mediaText = u','.join(mediaText)
if mediaText:
self.mediaText = mediaText
self._readonly = readonly
length = property(lambda self: len(self),
doc="(DOM readonly) The number of media in the list.")
def _getMediaText(self):
"""
returns serialized property mediaText
"""
return cssutils.ser.do_stylesheets_medialist(self)
def _setMediaText(self, mediaText):
"""
mediaText
simple value or comma-separated list of media
DOMException
- SYNTAX_ERR: (MediaQuery)
Raised if the specified string value has a syntax error and is
unparsable.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this media list is readonly.
"""
self._checkReadonly()
wellformed = True
tokenizer = self._tokenize2(mediaText)
newseq = []
expected = None
while True:
# find all upto and including next ",", EOF or nothing
mqtokens = self._tokensupto2(tokenizer, listseponly=True)
if mqtokens:
if self._tokenvalue(mqtokens[-1]) == ',':
expected = mqtokens.pop()
else:
expected = None
mq = MediaQuery(mqtokens)
if mq.wellformed:
newseq.append(mq)
else:
wellformed = False
self._log.error(u'MediaList: Invalid MediaQuery: %s' %
self._valuestr(mqtokens))
else:
break
# post condition
if expected:
wellformed = False
self._log.error(u'MediaList: Cannot end with ",".')
if wellformed:
del self[:]
for mq in newseq:
self.appendMedium(mq)
self._wellformed = True
mediaText = property(_getMediaText, _setMediaText,
doc="""(DOM) The parsable textual representation of the media list.
This is a comma-separated list of media.""")
wellformed = property(lambda self: self._wellformed)
def __prepareset(self, newMedium):
# used by appendSelector and __setitem__
self._checkReadonly()
if not isinstance(newMedium, MediaQuery):
newMedium = MediaQuery(newMedium)
if newMedium.wellformed:
return newMedium
def __setitem__(self, index, newMedium):
"""
overwrites ListSeq.__setitem__
Any duplicate items are **not** removed.
"""
newMedium = self.__prepareset(newMedium)
if newMedium:
self.seq[index] = newMedium
# TODO: remove duplicates?
def appendMedium(self, newMedium):
"""
(DOM)
Adds the medium newMedium to the end of the list. If the newMedium
is already used, it is first removed.
newMedium
a string or a MediaQuery object
returns if newMedium is wellformed
DOMException
- INVALID_CHARACTER_ERR: (self)
If the medium contains characters that are invalid in the
underlying style language.
- INVALID_MODIFICATION_ERR (self)
If mediaText is "all" and a new medium is tried to be added.
Exception is "handheld" which is set in any case (Opera does handle
"all, handheld" special, this special case might be removed in the
future).
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this list is readonly.
"""
newMedium = self.__prepareset(newMedium)
if newMedium:
mts = [self._normalize(mq.mediaType) for mq in self]
newmt = self._normalize(newMedium.mediaType)
if newmt in mts:
self.deleteMedium(newmt)
self.seq.append(newMedium)
elif u'all' == newmt:
# remove all except handheld (Opera)
h = None
for mq in self:
if mq.mediaType == u'handheld':
h = mq
del self[:]
self.seq.append(newMedium)
if h:
self.append(h)
elif u'all' in mts:
if u'handheld' == newmt:
self.seq.append(newMedium)
self._log.warn(u'MediaList: Already specified "all" but still setting new medium: %r' %
newMedium, error=xml.dom.InvalidModificationErr, neverraise=True)
else:
self._log.warn(u'MediaList: Ignoring new medium %r as already specified "all" (set ``mediaText`` instead).' %
newMedium, error=xml.dom.InvalidModificationErr)
else:
self.seq.append(newMedium)
return True
else:
return False
def append(self, newMedium):
"overwrites ListSeq.append"
self.appendMedium(newMedium)
def deleteMedium(self, oldMedium):
"""
(DOM)
Deletes the medium indicated by oldMedium from the list.
DOMException
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this list is readonly.
- NOT_FOUND_ERR: (self)
Raised if oldMedium is not in the list.
"""
self._checkReadonly()
oldMedium = self._normalize(oldMedium)
for i, mq in enumerate(self):
if self._normalize(mq.mediaType) == oldMedium:
del self[i]
break
else:
self._log.error(u'"%s" not in this MediaList' % oldMedium,
error=xml.dom.NotFoundErr)
# raise xml.dom.NotFoundErr(
# u'"%s" not in this MediaList' % oldMedium)
def item(self, index):
"""
(DOM)
Returns the mediaType of the index'th element in the list.
If index is greater than or equal to the number of media in the
list, returns None.
"""
try:
return self[index].mediaType
except IndexError:
return None
def __repr__(self):
return "cssutils.stylesheets.%s(mediaText=%r)" % (
self.__class__.__name__, self.mediaText)
def __str__(self):
return "<cssutils.stylesheets.%s object mediaText=%r at 0x%x>" % (
self.__class__.__name__, self.mediaText, id(self))

View File

@ -0,0 +1,237 @@
"""
MediaQuery, see http://www.w3.org/TR/css3-mediaqueries/
A cssutils own implementation, not defined in official DOM
TODO:
add possibility to
part of a media_query_list: <media_query> [, <media_query> ]*
see stylesheets.MediaList
"""
__all__ = ['MediaQuery']
__docformat__ = 'restructuredtext'
__version__ = '$Id: mediaquery.py 1363 2008-07-13 18:14:26Z cthedot $'
import re
import xml.dom
import cssutils
class MediaQuery(cssutils.util.Base):
"""
A Media Query consists of a media type and one or more
expressions involving media features.
Properties
==========
mediaText: of type DOMString
The parsable textual representation of this MediaQuery
mediaType: of type DOMString
one of MEDIA_TYPES like e.g. 'print'
seq: a list (cssutils)
All parts of this MediaQuery including CSSComments
wellformed:
if this query is wellformed
Format
======
::
media_query: [[only | not]? <media_type> [ and <expression> ]*]
| <expression> [ and <expression> ]*
expression: ( <media_feature> [: <value>]? )
media_type: all | aural | braille | handheld | print |
projection | screen | tty | tv | embossed
media_feature: width | min-width | max-width
| height | min-height | max-height
| device-width | min-device-width | max-device-width
| device-height | min-device-height | max-device-height
| device-aspect-ratio | min-device-aspect-ratio | max-device-aspect-ratio
| color | min-color | max-color
| color-index | min-color-index | max-color-index
| monochrome | min-monochrome | max-monochrome
| resolution | min-resolution | max-resolution
| scan | grid
"""
MEDIA_TYPES = [u'all', u'aural', u'braille', u'embossed', u'handheld',
u'print', u'projection', u'screen', u'tty', u'tv']
# From the HTML spec (see MediaQuery):
# "[...] character that isn't a US ASCII letter [a-zA-Z] (Unicode
# decimal 65-90, 97-122), digit [0-9] (Unicode hex 30-39), or hyphen (45)."
# so the following is a valid mediaType
__mediaTypeMatch = re.compile(ur'^[-a-zA-Z0-9]+$', re.U).match
def __init__(self, mediaText=None, readonly=False):
"""
mediaText
unicodestring of parsable media
"""
super(MediaQuery, self).__init__()
self.seq = []
self._mediaType = u''
if mediaText:
self.mediaText = mediaText # sets self._mediaType too
self._readonly = readonly
def _getMediaText(self):
"""
returns serialized property mediaText
"""
return cssutils.ser.do_stylesheets_mediaquery(self)
def _setMediaText(self, mediaText):
"""
mediaText
a single media query string, e.g. "print and (min-width: 25cm)"
DOMException
- SYNTAX_ERR: (self)
Raised if the specified string value has a syntax error and is
unparsable.
- INVALID_CHARACTER_ERR: (self)
Raised if the given mediaType is unknown.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this media query is readonly.
"""
self._checkReadonly()
tokenizer = self._tokenize2(mediaText)
if not tokenizer:
self._log.error(u'MediaQuery: No MediaText given.')
else:
# for closures: must be a mutable
new = {'mediatype': None,
'wellformed': True }
def _ident_or_dim(expected, seq, token, tokenizer=None):
# only|not or mediatype or and
val = self._tokenvalue(token)
nval = self._normalize(val)
if expected.endswith('mediatype'):
if nval in (u'only', u'not'):
# only or not
seq.append(val)
return 'mediatype'
else:
# mediatype
new['mediatype'] = val
seq.append(val)
return 'and'
elif 'and' == nval and expected.startswith('and'):
seq.append(u'and')
return 'feature'
else:
new['wellformed'] = False
self._log.error(
u'MediaQuery: Unexpected syntax.', token=token)
return expected
def _char(expected, seq, token, tokenizer=None):
# starting a feature which basically is a CSS Property
# but may simply be a property name too
val = self._tokenvalue(token)
if val == u'(' and expected == 'feature':
proptokens = self._tokensupto2(
tokenizer, funcendonly=True)
if proptokens and u')' == self._tokenvalue(proptokens[-1]):
proptokens.pop()
property = cssutils.css.Property(_mediaQuery=True)
property.cssText = proptokens
seq.append(property)
return 'and or EOF'
else:
new['wellformed'] = False
self._log.error(
u'MediaQuery: Unexpected syntax, expected "and" but found "%s".' %
val, token)
return expected
# expected: only|not or mediatype, mediatype, feature, and
newseq = []
wellformed, expected = self._parse(expected='only|not or mediatype',
seq=newseq, tokenizer=tokenizer,
productions={'IDENT': _ident_or_dim, # e.g. "print"
'DIMENSION': _ident_or_dim, # e.g. "3d"
'CHAR': _char})
wellformed = wellformed and new['wellformed']
# post conditions
if not new['mediatype']:
wellformed = False
self._log.error(u'MediaQuery: No mediatype found: %s' %
self._valuestr(mediaText))
if wellformed:
# set
self.mediaType = new['mediatype']
self.seq = newseq
mediaText = property(_getMediaText, _setMediaText,
doc="""(DOM) The parsable textual representation of the media list.
This is a comma-separated list of media.""")
def _getMediaType(self):
"""
returns serialized property mediaText
"""
return self._mediaType
def _setMediaType(self, mediaType):
"""
mediaType
one of MEDIA_TYPES
DOMException
- SYNTAX_ERR: (self)
Raised if the specified string value has a syntax error and is
unparsable.
- INVALID_CHARACTER_ERR: (self)
Raised if the given mediaType is unknown.
- NO_MODIFICATION_ALLOWED_ERR: (self)
Raised if this media query is readonly.
"""
self._checkReadonly()
nmediaType = self._normalize(mediaType)
if not MediaQuery.__mediaTypeMatch(nmediaType):
self._log.error(
u'MediaQuery: Syntax Error in media type "%s".' % mediaType,
error=xml.dom.SyntaxErr)
else:
if nmediaType not in MediaQuery.MEDIA_TYPES:
self._log.warn(
u'MediaQuery: Unknown media type "%s".' % mediaType,
error=xml.dom.InvalidCharacterErr)
return
# set
self._mediaType = mediaType
# update seq
for i, x in enumerate(self.seq):
if isinstance(x, basestring):
if self._normalize(x) in (u'only', u'not'):
continue
else:
self.seq[i] = mediaType
break
else:
self.seq.insert(0, mediaType)
mediaType = property(_getMediaType, _setMediaType,
doc="""(DOM) media type (one of MediaQuery.MEDIA_TYPES) of this MediaQuery.""")
wellformed = property(lambda self: bool(len(self.seq)))
def __repr__(self):
return "cssutils.stylesheets.%s(mediaText=%r)" % (
self.__class__.__name__, self.mediaText)
def __str__(self):
return "<cssutils.stylesheets.%s object mediaText=%r at 0x%x>" % (
self.__class__.__name__, self.mediaText, id(self))

View File

@ -0,0 +1,101 @@
"""
StyleSheet implements DOM Level 2 Style Sheets StyleSheet.
"""
__all__ = ['StyleSheet']
__docformat__ = 'restructuredtext'
__version__ = '$Id: stylesheet.py 1284 2008-06-05 16:29:17Z cthedot $'
import urlparse
import cssutils
class StyleSheet(cssutils.util.Base2):
"""
The StyleSheet interface is the abstract base interface
for any type of style sheet. It represents a single style
sheet associated with a structured document.
In HTML, the StyleSheet interface represents either an
external style sheet, included via the HTML LINK element,
or an inline STYLE element (-ch: also an @import stylesheet?).
In XML, this interface represents
an external style sheet, included via a style sheet
processing instruction.
"""
def __init__(self, type='text/css',
href=None,
media=None,
title=u'',
disabled=None,
ownerNode=None,
parentStyleSheet=None):
"""
type: readonly
This specifies the style sheet language for this
style sheet. The style sheet language is specified
as a content type (e.g. "text/css"). The content
type is often specified in the ownerNode. Also see
the type attribute definition for the LINK element
in HTML 4.0, and the type pseudo-attribute for the
XML style sheet processing instruction.
href: readonly
If the style sheet is a linked style sheet, the value
of this attribute is its location. For inline style
sheets, the value of this attribute is None. See the
href attribute definition for the LINK element in HTML
4.0, and the href pseudo-attribute for the XML style
sheet processing instruction.
media: of type MediaList, readonly
The intended destination media for style information.
The media is often specified in the ownerNode. If no
media has been specified, the MediaList will be empty.
See the media attribute definition for the LINK element
in HTML 4.0, and the media pseudo-attribute for the XML
style sheet processing instruction. Modifying the media
list may cause a change to the attribute disabled.
title: readonly
The advisory title. The title is often specified in
the ownerNode. See the title attribute definition for
the LINK element in HTML 4.0, and the title
pseudo-attribute for the XML style sheet processing
instruction.
disabled: False if the style sheet is applied to the
document. True if it is not. Modifying this attribute
may cause a new resolution of style for the document.
A stylesheet only applies if both an appropriate medium
definition is present and the disabled attribute is False.
So, if the media doesn't apply to the current user agent,
the disabled attribute is ignored.
ownerNode: of type Node, readonly
The node that associates this style sheet with the
document. For HTML, this may be the corresponding LINK
or STYLE element. For XML, it may be the linking
processing instruction. For style sheets that are
included by other style sheets, the value of this
attribute is None.
parentStyleSheet: of type StyleSheet, readonly
For style sheet languages that support the concept
of style sheet inclusion, this attribute represents
the including style sheet, if one exists. If the style
sheet is a top-level style sheet, or the style sheet
language does not support inclusion, the value of this
attribute is None.
"""
super(StyleSheet, self).__init__()
self._href = href
self._ownerNode = ownerNode
self._parentStyleSheet = parentStyleSheet
self._type = type
self.disabled = bool(disabled)
self.media = media
self.title = title
href = property(lambda self: self._href)
ownerNode = property(lambda self: self._ownerNode)
parentStyleSheet = property(lambda self: self._parentStyleSheet)
type = property(lambda self: self._type, doc=u'Default: "ext/css"')

View File

@ -0,0 +1,35 @@
"""
StyleSheetList implements DOM Level 2 Style Sheets StyleSheetList.
"""
__all__ = ['StyleSheetList']
__docformat__ = 'restructuredtext'
__version__ = '$Id: stylesheetlist.py 1116 2008-03-05 13:52:23Z cthedot $'
class StyleSheetList(list):
"""
Interface StyleSheetList (introduced in DOM Level 2)
The StyleSheetList interface provides the abstraction of an ordered
collection of style sheets.
The items in the StyleSheetList are accessible via an integral index,
starting from 0.
This Python implementation is based on a standard Python list so e.g.
allows ``examplelist[index]`` usage.
"""
def item(self, index):
"""
Used to retrieve a style sheet by ordinal index. If index is
greater than or equal to the number of style sheets in the list,
this returns None.
"""
try:
return self[index]
except IndexError:
return None
length = property(lambda self: len(self),
doc="""The number of StyleSheets in the list. The range of valid
child stylesheet indices is 0 to length-1 inclusive.""")

View File

@ -0,0 +1,177 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""New CSS Tokenizer (a generator)
"""
__all__ = ['Tokenizer', 'CSSProductions']
__docformat__ = 'restructuredtext'
__version__ = '$Id: tokenize2.py 1420 2008-08-09 19:28:34Z cthedot $'
import re
from helper import normalize
from cssproductions import *
class Tokenizer(object):
"""
generates a list of Token tuples:
(Tokenname, value, startline, startcolumn)
"""
_atkeywords = {
u'@font-face': CSSProductions.FONT_FACE_SYM,
u'@import': CSSProductions.IMPORT_SYM,
u'@media': CSSProductions.MEDIA_SYM,
u'@namespace': CSSProductions.NAMESPACE_SYM,
u'@page': CSSProductions.PAGE_SYM
}
_linesep = u'\n'
def __init__(self, macros=None, productions=None):
"""
inits tokenizer with given macros and productions which default to
cssutils own macros and productions
"""
if not macros:
macros = MACROS
if not productions:
productions = PRODUCTIONS
self.tokenmatches = self._compile_productions(
self._expand_macros(macros,
productions))
self.commentmatcher = [x[1] for x in self.tokenmatches if x[0] == 'COMMENT'][0]
self.urimatcher = [x[1] for x in self.tokenmatches if x[0] == 'URI'][0]
self.unicodesub = re.compile(r'\\[0-9a-fA-F]{1,6}(?:\r\n|[\t|\r|\n|\f|\x20])?').sub
def _expand_macros(self, macros, productions):
"""returns macro expanded productions, order of productions is kept"""
def macro_value(m):
return '(?:%s)' % macros[m.groupdict()['macro']]
expanded = []
for key, value in productions:
while re.search(r'{[a-zA-Z][a-zA-Z0-9-]*}', value):
value = re.sub(r'{(?P<macro>[a-zA-Z][a-zA-Z0-9-]*)}',
macro_value, value)
expanded.append((key, value))
return expanded
def _compile_productions(self, expanded_productions):
"""compile productions into callable match objects, order is kept"""
compiled = []
for key, value in expanded_productions:
compiled.append((key, re.compile('^(?:%s)' % value, re.U).match))
return compiled
def tokenize(self, text, fullsheet=False):
"""Generator: Tokenize text and yield tokens, each token is a tuple
of::
(nname, value, line, col)
The token value will contain a normal string, meaning CSS unicode
escapes have been resolved to normal characters. The serializer
escapes needed characters back to unicode escapes depending on
the stylesheet target encoding.
text
to be tokenized
fullsheet
if ``True`` appends EOF token as last one and completes incomplete
COMMENT or INVALID (to STRING) tokens
"""
def _repl(m):
"used by unicodesub"
num = int(m.group(0)[1:], 16)
if num < 0x10000:
return unichr(num)
else:
return m.group(0)
def _normalize(value):
"normalize and do unicodesub"
return normalize(self.unicodesub(_repl, value))
line = col = 1
# check for BOM first as it should only be max one at the start
(BOM, matcher), productions = self.tokenmatches[0], self.tokenmatches[1:]
match = matcher(text)
if match:
found = match.group(0)
yield (BOM, found, line, col)
text = text[len(found):]
# check for @charset which is valid only at start of CSS
if text.startswith('@charset '):
found = '@charset ' # production has trailing S!
yield (CSSProductions.CHARSET_SYM, found, line, col)
text = text[len(found):]
col += len(found)
while text:
# speed test for most used CHARs
c = text[0]
if c in '{}:;,':
yield ('CHAR', c, line, col)
col += 1
text = text[1:]
else:
# check all other productions, at least CHAR must match
for name, matcher in productions:
if fullsheet and name == 'CHAR' and text.startswith(u'/*'):
# before CHAR production test for incomplete comment
possiblecomment = u'%s*/' % text
match = self.commentmatcher(possiblecomment)
if match:
yield ('COMMENT', possiblecomment, line, col)
text = None # eats all remaining text
break
match = matcher(text) # if no match try next production
if match:
found = match.group(0) # needed later for line/col
if fullsheet:
# check if found may be completed into a full token
if 'INVALID' == name and text == found:
# complete INVALID to STRING with start char " or '
name, found = 'STRING', '%s%s' % (found, found[0])
elif 'FUNCTION' == name and\
u'url(' == _normalize(found):
# FUNCTION url( is fixed to URI if fullsheet
# FUNCTION production MUST BE after URI production!
for end in (u"')", u'")', u')'):
possibleuri = '%s%s' % (text, end)
match = self.urimatcher(possibleuri)
if match:
name, found = 'URI', match.group(0)
break
if name in ('DIMENSION', 'IDENT', 'STRING', 'URI',
'HASH', 'COMMENT', 'FUNCTION', 'INVALID'):
# may contain unicode escape, replace with normal char
# but do not _normalize (?)
value = self.unicodesub(_repl, found)
else:
if 'ATKEYWORD' == name:
# get actual ATKEYWORD SYM
if '@charset' == found and ' ' == text[len(found):len(found)+1]:
# only this syntax!
name = CSSProductions.CHARSET_SYM
found += ' '
else:
name = self._atkeywords.get(_normalize(found), 'ATKEYWORD')
value = found # should not contain unicode escape (?)
yield (name, value, line, col)
text = text[len(found):]
nls = found.count(self._linesep)
line += nls
if nls:
col = len(found[found.rfind(self._linesep):])
else:
col += len(found)
break
if fullsheet:
yield ('EOF', u'', line, col)

View File

@ -0,0 +1,817 @@
"""base classes and helper functions for css and stylesheets packages
"""
__all__ = []
__docformat__ = 'restructuredtext'
__version__ = '$Id: util.py 1429 2008-08-11 19:01:52Z cthedot $'
import codecs
from itertools import ifilter
import types
import urllib2
import xml.dom
from helper import normalize
import tokenize2
import cssutils
class Base(object):
"""
Base class for most CSS and StyleSheets classes
**Superceded by Base2 which is used for new seq handling class.**
See cssutils.util.Base2
Contains helper methods for inheriting classes helping parsing
``_normalize`` is static as used by Preferences.
"""
__tokenizer2 = tokenize2.Tokenizer()
_log = cssutils.log
_prods = tokenize2.CSSProductions
# for more on shorthand properties see
# http://www.dustindiaz.com/css-shorthand/
# format: shorthand: [(propname, mandatorycheck?)*]
_SHORTHANDPROPERTIES = {
u'background': [],
u'background-position': [],
u'border': [],
u'border-left': [],
u'border-right': [],
u'border-top': [],
u'border-bottom': [],
#u'border-color': [], # list or single but same values
#u'border-style': [], # list or single but same values
#u'border-width': [], # list or single but same values
u'cue': [],
u'font': [],
u'list-style': [],
#u'margin': [], # list or single but same values
u'outline': [],
#u'padding': [], # list or single but same values
u'pause': []
}
@staticmethod
def _normalize(x):
"""
normalizes x, namely:
- remove any \ before non unicode sequences (0-9a-zA-Z) so for
x=="c\olor\" return "color" (unicode escape sequences should have
been resolved by the tokenizer already)
- lowercase
"""
return normalize(x)
def _checkReadonly(self):
"raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
if hasattr(self, '_readonly') and self._readonly:
raise xml.dom.NoModificationAllowedErr(
u'%s is readonly.' % self.__class__)
return True
return False
def _splitNamespacesOff(self, text_namespaces_tuple):
"""
returns tuple (text, dict-of-namespaces) or if no namespaces are
in cssText returns (cssText, {})
used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
CSSStyleSheet
"""
if isinstance(text_namespaces_tuple, tuple):
return text_namespaces_tuple[0], _SimpleNamespaces(self._log,
text_namespaces_tuple[1])
else:
return text_namespaces_tuple, _SimpleNamespaces(log=self._log)
def _tokenize2(self, textortokens):
"""
returns tokens of textortokens which may already be tokens in which
case simply returns input
"""
if not textortokens:
return None
elif isinstance(textortokens, basestring):
# needs to be tokenized
return self.__tokenizer2.tokenize(
textortokens)
elif types.GeneratorType == type(textortokens):
# already tokenized
return textortokens
elif isinstance(textortokens, tuple):
# a single token (like a comment)
return [textortokens]
else:
# already tokenized but return generator
return (x for x in textortokens)
def _nexttoken(self, tokenizer, default=None):
"returns next token in generator tokenizer or the default value"
try:
return tokenizer.next()
except (StopIteration, AttributeError):
return default
def _type(self, token):
"returns type of Tokenizer token"
if token:
return token[0]
else:
return None
def _tokenvalue(self, token, normalize=False):
"returns value of Tokenizer token"
if token and normalize:
return Base._normalize(token[1])
elif token:
return token[1]
else:
return None
def _stringtokenvalue(self, token):
"""
for STRING returns the actual content without surrounding "" or ''
and without respective escapes, e.g.::
"with \" char" => with " char
"""
if token:
value = token[1]
return value.replace('\\'+value[0], value[0])[1:-1]
else:
return None
def _uritokenvalue(self, token):
"""
for URI returns the actual content without surrounding url()
or url(""), url('') and without respective escapes, e.g.::
url("\"") => "
"""
if token:
value = token[1][4:-1].strip()
if (value[0] in '\'"') and (value[0] == value[-1]):
# a string "..." or '...'
value = value.replace('\\'+value[0], value[0])[1:-1]
return value
else:
return None
def _tokensupto2(self,
tokenizer,
starttoken=None,
blockstartonly=False, # {
blockendonly=False, # }
mediaendonly=False,
importmediaqueryendonly=False, # ; or STRING
mediaqueryendonly=False, # { or STRING
semicolon=False, # ;
propertynameendonly=False, # :
propertyvalueendonly=False, # ! ; }
propertypriorityendonly=False, # ; }
selectorattendonly=False, # ]
funcendonly=False, # )
listseponly=False, # ,
separateEnd=False # returns (resulttokens, endtoken)
):
"""
returns tokens upto end of atrule and end index
end is defined by parameters, might be ; } ) or other
default looks for ending "}" and ";"
"""
ends = u';}'
endtypes = ()
brace = bracket = parant = 0 # {}, [], ()
if blockstartonly: # {
ends = u'{'
brace = -1 # set to 0 with first {
elif blockendonly: # }
ends = u'}'
brace = 1
elif mediaendonly: # }
ends = u'}'
brace = 1 # rules } and mediarules }
elif importmediaqueryendonly:
# end of mediaquery which may be ; or STRING
ends = u';'
endtypes = ('STRING',)
elif mediaqueryendonly:
# end of mediaquery which may be { or STRING
# special case, see below
ends = u'{'
brace = -1 # set to 0 with first {
endtypes = ('STRING',)
elif semicolon:
ends = u';'
elif propertynameendonly: # : and ; in case of an error
ends = u':;'
elif propertyvalueendonly: # ; or !important
ends = u';!'
elif propertypriorityendonly: # ;
ends = u';'
elif selectorattendonly: # ]
ends = u']'
if starttoken and self._tokenvalue(starttoken) == u'[':
bracket = 1
elif funcendonly: # )
ends = u')'
parant = 1
elif listseponly: # ,
ends = u','
resulttokens = []
if starttoken:
resulttokens.append(starttoken)
if tokenizer:
for token in tokenizer:
typ, val, line, col = token
if 'EOF' == typ:
resulttokens.append(token)
break
if u'{' == val:
brace += 1
elif u'}' == val:
brace -= 1
elif u'[' == val:
bracket += 1
elif u']' == val:
bracket -= 1
# function( or single (
elif u'(' == val or \
Base._prods.FUNCTION == typ:
parant += 1
elif u')' == val:
parant -= 1
resulttokens.append(token)
if (brace == bracket == parant == 0) and (
val in ends or typ in endtypes):
break
elif mediaqueryendonly and brace == -1 and (
bracket == parant == 0) and typ in endtypes:
# mediaqueryendonly with STRING
break
if separateEnd:
# TODO: use this method as generator, then this makes sense
if resulttokens:
return resulttokens[:-1], resulttokens[-1]
else:
return resulttokens, None
else:
return resulttokens
def _valuestr(self, t):
"""
returns string value of t (t may be a string, a list of token tuples
or a single tuple in format (type, value, line, col).
Mainly used to get a string value of t for error messages.
"""
if not t:
return u''
elif isinstance(t, basestring):
return t
else:
return u''.join([x[1] for x in t])
def _adddefaultproductions(self, productions, new=None):
"""
adds default productions if not already present, used by
_parse only
each production should return the next expected token
normaly a name like "uri" or "EOF"
some have no expectation like S or COMMENT, so simply return
the current value of self.__expected
"""
def ATKEYWORD(expected, seq, token, tokenizer=None):
"default impl for unexpected @rule"
if expected != 'EOF':
# TODO: parentStyleSheet=self
rule = cssutils.css.CSSUnknownRule()
rule.cssText = self._tokensupto2(tokenizer, token)
if rule.wellformed:
seq.append(rule)
return expected
else:
new['wellformed'] = False
self._log.error(u'Expected EOF.', token=token)
return expected
def COMMENT(expected, seq, token, tokenizer=None):
"default implementation for COMMENT token adds CSSCommentRule"
seq.append(cssutils.css.CSSComment([token]))
return expected
def S(expected, seq, token, tokenizer=None):
"default implementation for S token, does nothing"
return expected
def EOF(expected=None, seq=None, token=None, tokenizer=None):
"default implementation for EOF token"
return 'EOF'
p = {'ATKEYWORD': ATKEYWORD,
'COMMENT': COMMENT,
'S': S,
'EOF': EOF # only available if fullsheet
}
p.update(productions)
return p
def _parse(self, expected, seq, tokenizer, productions, default=None,
new=None):
"""
puts parsed tokens in seq by calling a production with
(seq, tokenizer, token)
expected
a name what token or value is expected next, e.g. 'uri'
seq
to add rules etc to
tokenizer
call tokenizer.next() to get next token
productions
callbacks {tokentype: callback}
default
default callback if tokentype not in productions
new
used to init default productions
returns (wellformed, expected) which the last prod might have set
"""
wellformed = True
if tokenizer:
prods = self._adddefaultproductions(productions, new)
for token in tokenizer:
p = prods.get(token[0], default)
if p:
expected = p(expected, seq, token, tokenizer)
else:
wellformed = False
self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
return wellformed, expected
class Base2(Base):
"""
Base class for new seq handling, used by Selector for now only
"""
def __init__(self):
self._seq = Seq()
def _setSeq(self, newseq):
"""
sets newseq and makes it readonly
"""
newseq._readonly = True
self._seq = newseq
seq = property(lambda self: self._seq, doc="seq for most classes")
def _tempSeq(self, readonly=False):
"get a writeable Seq() which is added later"
return Seq(readonly=readonly)
def _adddefaultproductions(self, productions, new=None):
"""
adds default productions if not already present, used by
_parse only
each production should return the next expected token
normaly a name like "uri" or "EOF"
some have no expectation like S or COMMENT, so simply return
the current value of self.__expected
"""
def ATKEYWORD(expected, seq, token, tokenizer=None):
"default impl for unexpected @rule"
if expected != 'EOF':
# TODO: parentStyleSheet=self
rule = cssutils.css.CSSUnknownRule()
rule.cssText = self._tokensupto2(tokenizer, token)
if rule.wellformed:
seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
line=token[2], col=token[3])
return expected
else:
new['wellformed'] = False
self._log.error(u'Expected EOF.', token=token)
return expected
def COMMENT(expected, seq, token, tokenizer=None):
"default impl, adds CSSCommentRule if not token == EOF"
if expected == 'EOF':
new['wellformed'] = False
self._log.error(u'Expected EOF but found comment.', token=token)
seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
return expected
def S(expected, seq, token, tokenizer=None):
"default impl, does nothing if not token == EOF"
if expected == 'EOF':
new['wellformed'] = False
self._log.error(u'Expected EOF but found whitespace.', token=token)
return expected
def EOF(expected=None, seq=None, token=None, tokenizer=None):
"default implementation for EOF token"
return 'EOF'
defaultproductions = {'ATKEYWORD': ATKEYWORD,
'COMMENT': COMMENT,
'S': S,
'EOF': EOF # only available if fullsheet
}
defaultproductions.update(productions)
return defaultproductions
class Seq(object):
"""
property seq of Base2 inheriting classes, holds a list of Item objects.
used only by Selector for now
is normally readonly, only writable during parsing
"""
def __init__(self, readonly=True):
"""
only way to write to a Seq is to initialize it with new items
each itemtuple has (value, type, line) where line is optional
"""
self._seq = []
self._readonly = readonly
def __delitem__(self, i):
del self._seq[i]
def __getitem__(self, i):
return self._seq[i]
def __setitem__(self, i, (val, typ, line, col)):
self._seq[i] = Item(val, typ, line, col)
def __iter__(self):
return iter(self._seq)
def __len__(self):
return len(self._seq)
def append(self, val, typ, line=None, col=None):
"if not readonly add new Item()"
if self._readonly:
raise AttributeError('Seq is readonly.')
else:
self._seq.append(Item(val, typ, line, col))
def appendItem(self, item):
"if not readonly add item which must be an Item"
if self._readonly:
raise AttributeError('Seq is readonly.')
else:
self._seq.append(item)
def replace(self, index=-1, val=None, typ=None, line=None, col=None):
"""
if not readonly replace Item at index with new Item or
simply replace value or type
"""
if self._readonly:
raise AttributeError('Seq is readonly.')
else:
self._seq[index] = Item(val, typ, line, col)
def __repr__(self):
"returns a repr same as a list of tuples of (value, type)"
return u'cssutils.%s.%s([\n %s])' % (self.__module__,
self.__class__.__name__,
u',\n '.join([u'(%r, %r)' % (item.type, item.value)
for item in self._seq]
))
def __str__(self):
return "<cssutils.%s.%s object length=%r at 0x%x>" % (
self.__module__, self.__class__.__name__, len(self), id(self))
class Item(object):
"""
an item in the seq list of classes (successor to tuple items in old seq)
each item has attributes:
type
a sematic type like "element", "attribute"
value
the actual value which may be a string, number etc or an instance
of e.g. a CSSComment
*line*
**NOT IMPLEMENTED YET, may contain the line in the source later**
"""
def __init__(self, value, type, line=None, col=None):
self.__value = value
self.__type = type
self.__line = line
self.__col = col
type = property(lambda self: self.__type)
value = property(lambda self: self.__value)
line = property(lambda self: self.__line)
col = property(lambda self: self.__col)
def __repr__(self):
return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
self.__module__, self.__class__.__name__,
self.__value, self.__type, self.__line, self.__col)
class ListSeq(object):
"""
(EXPERIMENTAL)
A base class used for list classes like css.SelectorList or
stylesheets.MediaList
adds list like behaviour running on inhering class' property ``seq``
- item in x => bool
- len(x) => integer
- get, set and del x[i]
- for item in x
- append(item)
some methods must be overwritten in inheriting class
"""
def __init__(self):
self.seq = [] # does not need to use ``Seq`` as simple list only
def __contains__(self, item):
return item in self.seq
def __delitem__(self, index):
del self.seq[index]
def __getitem__(self, index):
return self.seq[index]
def __iter__(self):
def gen():
for x in self.seq:
yield x
return gen()
def __len__(self):
return len(self.seq)
def __setitem__(self, index, item):
"must be overwritten"
raise NotImplementedError
def append(self, item):
"must be overwritten"
raise NotImplementedError
class _Namespaces(object):
"""
A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
Works on effective namespaces, so e.g. if::
@namespace p1 "uri";
@namespace p2 "uri";
only the second rule is effective and kept.
namespaces
a dictionary {prefix: namespaceURI} containing the effective namespaces
only. These are the latest set in the CSSStyleSheet.
parentStyleSheet
the parent CSSStyleSheet
"""
def __init__(self, parentStyleSheet, log=None, *args):
"no initial values are set, only the relevant sheet is"
self.parentStyleSheet = parentStyleSheet
self._log = log
def __contains__(self, prefix):
return prefix in self.namespaces
def __delitem__(self, prefix):
"""deletes CSSNamespaceRule(s) with rule.prefix == prefix
prefix '' and None are handled the same
"""
if not prefix:
prefix = u''
delrule = self.__findrule(prefix)
for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE,
self.parentStyleSheet.cssRules)):
if rule == delrule:
self.parentStyleSheet.deleteRule(i)
return
self._log.error('Prefix %r not found.' % prefix,
error=xml.dom.NamespaceErr)
def __getitem__(self, prefix):
try:
return self.namespaces[prefix]
except KeyError, e:
self._log.error('Prefix %r not found.' % prefix,
error=xml.dom.NamespaceErr)
def __iter__(self):
return self.namespaces.__iter__()
def __len__(self):
return len(self.namespaces)
def __setitem__(self, prefix, namespaceURI):
"replaces prefix or sets new rule, may raise NoModificationAllowedErr"
if not prefix:
prefix = u'' # None or ''
rule = self.__findrule(prefix)
if not rule:
self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule(
prefix=prefix,
namespaceURI=namespaceURI),
inOrder=True)
else:
if prefix in self.namespaces:
rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr
if namespaceURI in self.namespaces.values():
rule.prefix = prefix
def __findrule(self, prefix):
# returns namespace rule where prefix == key
for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE,
reversed(self.parentStyleSheet.cssRules)):
if rule.prefix == prefix:
return rule
def __getNamespaces(self):
namespaces = {}
for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE,
reversed(self.parentStyleSheet.cssRules)):
if rule.namespaceURI not in namespaces.values():
namespaces[rule.prefix] = rule.namespaceURI
return namespaces
namespaces = property(__getNamespaces,
doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
'@namespace rules.')
def get(self, prefix, default):
return self.namespaces.get(prefix, default)
def items(self):
return self.namespaces.items()
def keys(self):
return self.namespaces.keys()
def values(self):
return self.namespaces.values()
def prefixForNamespaceURI(self, namespaceURI):
"""
returns effective prefix for given namespaceURI or raises IndexError
if this cannot be found"""
for prefix, uri in self.namespaces.items():
if uri == namespaceURI:
return prefix
raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
def __str__(self):
return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % (
self.__class__.__name__, str(self.parentStyleSheet), id(self))
class _SimpleNamespaces(_Namespaces):
"""
namespaces used in objects like Selector as long as they are not connected
to a CSSStyleSheet
"""
def __init__(self, log=None, *args):
"""init"""
super(_SimpleNamespaces, self).__init__(parentStyleSheet=None, log=log)
self.__namespaces = dict(*args)
def __setitem__(self, prefix, namespaceURI):
self.__namespaces[prefix] = namespaceURI
namespaces = property(lambda self: self.__namespaces,
doc=u'Dict Wrapper for self.sheets @namespace rules.')
def __str__(self):
return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
self.__class__.__name__, self.namespaces, id(self))
def __repr__(self):
return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
self.namespaces)
def _defaultFetcher(url):
"""Retrieve data from ``url``. cssutils default implementation of fetch
URL function.
Returns ``(encoding, string)`` or ``None``
"""
try:
res = urllib2.urlopen(url)
except OSError, e:
# e.g if file URL and not found
cssutils.log.warn(e, error=OSError)
except (OSError, ValueError), e:
# invalid url, e.g. "1"
cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError)
except urllib2.HTTPError, e:
# http error, e.g. 404, e can be raised
cssutils.log.warn(u'HTTPError opening url=%r: %s %s' %
(url, e.code, e.msg), error=e)
except urllib2.URLError, e:
# URLError like mailto: or other IO errors, e can be raised
cssutils.log.warn(u'URLError, %s' % e.reason, error=e)
else:
if res:
mimeType, encoding = encutils.getHTTPInfo(res)
if mimeType != u'text/css':
cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' %
(url, mimeType), error=ValueError)
return encoding, res.read()
def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
"""
Read cssText from url and decode it using all relevant methods (HTTP
header, BOM, @charset). Returns
- encoding used to decode text (which is needed to set encoding of
stylesheet properly)
- type of encoding (how it was retrieved, see list below)
- decodedCssText
``fetcher``
see cssutils.registerFetchUrl for details
``overrideEncoding``
If given this encoding is used and all other encoding information is
ignored (HTTP, BOM etc)
``parentEncoding``
Encoding of parent stylesheet (while e.g. reading @import references sheets)
or document if available.
Priority or encoding information
--------------------------------
**cssutils only**: 0. overrideEncoding
1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols)
2. BOM and/or @charset (see below)
3. <link charset=""> or other metadata from the linking mechanism (if any)
4. charset of referring style sheet or document (if any)
5. Assume UTF-8
"""
enctype = None
if not fetcher:
fetcher = _defaultFetcher
r = fetcher(url)
if r and len(r) == 2 and r[1] is not None:
httpEncoding, content = r
if overrideEncoding:
enctype = 0 # 0. override encoding
encoding = overrideEncoding
elif httpEncoding:
enctype = 1 # 1. HTTP
encoding = httpEncoding
else:
# check content
contentEncoding, explicit = cssutils.codec.detectencoding_str(content)
if explicit:
enctype = 2 # 2. BOM/@charset: explicitly
encoding = contentEncoding
elif parentEncoding:
enctype = 4 # 4. parent stylesheet or document
# may also be None in which case 5. is used in next step anyway
encoding = parentEncoding
else:
enctype = 5 # 5. assume UTF-8
encoding = 'utf-8'
try:
# encoding may still be wrong if encoding *is lying*!
if content is not None:
decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0]
else:
decodedCssText = None
except UnicodeDecodeError, e:
cssutils.log.warn(e, neverraise=True)
decodedCssText = None
return encoding, enctype, decodedCssText
else:
return None, None, None