From 6b6b18e771ab4df7fd559e557a02b9f9f18f76e2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Aug 2008 15:48:32 -0700 Subject: [PATCH] IGN:Initial framework for html2epub --- src/calibre/ebooks/__init__.py | 1 + src/calibre/ebooks/epub/__init__.py | 45 + src/calibre/ebooks/epub/from_html.py | 212 ++++ src/calibre/ebooks/epub/traverse.py | 25 +- src/calibre/ebooks/html.py | 74 ++ src/calibre/linux.py | 1 + src/calibre/startup.py | 7 +- src/calibre/utils/config.py | 4 +- src/calibre/utils/cssutils/__init__.py | 254 ++++ src/calibre/utils/cssutils/codec.py | 581 +++++++++ src/calibre/utils/cssutils/css/__init__.py | 63 + .../utils/cssutils/css/csscharsetrule.py | 165 +++ src/calibre/utils/cssutils/css/csscomment.py | 92 ++ .../utils/cssutils/css/cssfontfacerule.py | 163 +++ .../utils/cssutils/css/cssimportrule.py | 399 ++++++ .../utils/cssutils/css/cssmediarule.py | 349 ++++++ .../utils/cssutils/css/cssnamespacerule.py | 306 +++++ src/calibre/utils/cssutils/css/csspagerule.py | 286 +++++ .../utils/cssutils/css/cssproperties.py | 349 ++++++ src/calibre/utils/cssutils/css/cssrule.py | 134 ++ src/calibre/utils/cssutils/css/cssrulelist.py | 60 + .../utils/cssutils/css/cssstyledeclaration.py | 651 ++++++++++ .../utils/cssutils/css/cssstylerule.py | 242 ++++ .../utils/cssutils/css/cssstylesheet.py | 674 ++++++++++ .../utils/cssutils/css/cssunknownrule.py | 208 ++++ src/calibre/utils/cssutils/css/cssvalue.py | 1086 +++++++++++++++++ src/calibre/utils/cssutils/css/property.py | 414 +++++++ src/calibre/utils/cssutils/css/selector.py | 800 ++++++++++++ .../utils/cssutils/css/selectorlist.py | 249 ++++ src/calibre/utils/cssutils/css2productions.py | 131 ++ src/calibre/utils/cssutils/css3productions.py | 62 + src/calibre/utils/cssutils/cssproductions.py | 121 ++ src/calibre/utils/cssutils/errorhandler.py | 117 ++ src/calibre/utils/cssutils/helper.py | 51 + src/calibre/utils/cssutils/parse.py | 183 +++ src/calibre/utils/cssutils/script.py | 371 ++++++ .../utils/cssutils/scripts/__init__.py | 4 + .../utils/cssutils/scripts/csscapture.py | 69 ++ .../utils/cssutils/scripts/csscombine.py | 90 ++ .../utils/cssutils/scripts/cssparse.py | 59 + src/calibre/utils/cssutils/serialize.py | 882 +++++++++++++ .../utils/cssutils/stylesheets/__init__.py | 18 + .../utils/cssutils/stylesheets/medialist.py | 256 ++++ .../utils/cssutils/stylesheets/mediaquery.py | 237 ++++ .../utils/cssutils/stylesheets/stylesheet.py | 101 ++ .../cssutils/stylesheets/stylesheetlist.py | 35 + src/calibre/utils/cssutils/tokenize2.py | 177 +++ src/calibre/utils/cssutils/util.py | 817 +++++++++++++ 48 files changed, 11670 insertions(+), 5 deletions(-) create mode 100644 src/calibre/ebooks/epub/from_html.py create mode 100644 src/calibre/ebooks/html.py create mode 100644 src/calibre/utils/cssutils/__init__.py create mode 100644 src/calibre/utils/cssutils/codec.py create mode 100644 src/calibre/utils/cssutils/css/__init__.py create mode 100644 src/calibre/utils/cssutils/css/csscharsetrule.py create mode 100644 src/calibre/utils/cssutils/css/csscomment.py create mode 100644 src/calibre/utils/cssutils/css/cssfontfacerule.py create mode 100644 src/calibre/utils/cssutils/css/cssimportrule.py create mode 100644 src/calibre/utils/cssutils/css/cssmediarule.py create mode 100644 src/calibre/utils/cssutils/css/cssnamespacerule.py create mode 100644 src/calibre/utils/cssutils/css/csspagerule.py create mode 100644 src/calibre/utils/cssutils/css/cssproperties.py create mode 100644 src/calibre/utils/cssutils/css/cssrule.py create mode 100644 src/calibre/utils/cssutils/css/cssrulelist.py create mode 100644 src/calibre/utils/cssutils/css/cssstyledeclaration.py create mode 100644 src/calibre/utils/cssutils/css/cssstylerule.py create mode 100644 src/calibre/utils/cssutils/css/cssstylesheet.py create mode 100644 src/calibre/utils/cssutils/css/cssunknownrule.py create mode 100644 src/calibre/utils/cssutils/css/cssvalue.py create mode 100644 src/calibre/utils/cssutils/css/property.py create mode 100644 src/calibre/utils/cssutils/css/selector.py create mode 100644 src/calibre/utils/cssutils/css/selectorlist.py create mode 100644 src/calibre/utils/cssutils/css2productions.py create mode 100644 src/calibre/utils/cssutils/css3productions.py create mode 100644 src/calibre/utils/cssutils/cssproductions.py create mode 100644 src/calibre/utils/cssutils/errorhandler.py create mode 100644 src/calibre/utils/cssutils/helper.py create mode 100644 src/calibre/utils/cssutils/parse.py create mode 100644 src/calibre/utils/cssutils/script.py create mode 100644 src/calibre/utils/cssutils/scripts/__init__.py create mode 100644 src/calibre/utils/cssutils/scripts/csscapture.py create mode 100644 src/calibre/utils/cssutils/scripts/csscombine.py create mode 100644 src/calibre/utils/cssutils/scripts/cssparse.py create mode 100644 src/calibre/utils/cssutils/serialize.py create mode 100644 src/calibre/utils/cssutils/stylesheets/__init__.py create mode 100644 src/calibre/utils/cssutils/stylesheets/medialist.py create mode 100644 src/calibre/utils/cssutils/stylesheets/mediaquery.py create mode 100644 src/calibre/utils/cssutils/stylesheets/stylesheet.py create mode 100644 src/calibre/utils/cssutils/stylesheets/stylesheetlist.py create mode 100644 src/calibre/utils/cssutils/tokenize2.py create mode 100644 src/calibre/utils/cssutils/util.py diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 2716963e8d..d678fdc089 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -15,6 +15,7 @@ class ConversionError(Exception): class UnknownFormatError(Exception): pass + BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', 'html', 'xhtml', 'epub', 'pdf', 'prc', 'mobi', 'azw', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz'] diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py index 75527d6323..c2309d9f8b 100644 --- a/src/calibre/ebooks/epub/__init__.py +++ b/src/calibre/ebooks/epub/__init__.py @@ -6,3 +6,48 @@ __docformat__ = 'restructuredtext en' ''' Conversion to EPUB. ''' +import sys +from calibre.utils.config import Config, StringConfig + +def config(defaults=None): + desc = _('Options to control the conversion to EPUB') + if defaults is None: + c = Config('epub', desc) + else: + c = StringConfig(defaults, desc) + + c.add_opt('output', ['-o', '--output'], default=None, + help=_('The output EPUB file. If not specified, it is derived from the input file name.')) + c.add_opt('encoding', ['--encoding'], default=None, + help=_('Character encoding for HTML files. Default is to auto detect.')) + + metadata = c.add_group('metadata', _('Set metadata of the generated ebook')) + metadata('title', ['-t', '--title'], default=None, + help=_('Set the title. Default is to autodetect.')) + metadata('authors', ['-a', '--authors'], default=_('Unknown'), + help=_('The author(s) of the ebook, as a comma separated list.')) + + traversal = c.add_group('traversal', _('Control the following of links in HTML files.')) + traversal('breadth_first', ['--breadth-first'], default=False, + help=_('Traverse links in HTML files breadth first. Normally, they are traversed depth first')) + traversal('max_levels', ['--max-levels'], default=sys.getrecursionlimit(), group='traversal', + help=_('Maximum levels of recursion when following links in HTML files. Must be non-negative. 0 implies that no links in the root HTML file are followed.')) + + structure = c.add_group('structure detection', _('Control auto-detection of document structure.')) + structure('chapter', ['--chapter'], default="//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]", + help=_('''\ +An XPath expression to detect chapter titles. The default is to consider

or +

tags that contain the text "chapter" or "book" or "section" as chapter titles. This +is achieved by the expression: "//*[re:match(name(), 'h[1-2]') and re:test(., 'chapter|book|section', 'i')]" +The expression used must evaluate to a list of elements. To disable chapter detection, +use the expression "/". +''').replace('\n', ' ')) + structure('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False, + help=_('Don\'t add detected chapters to the Table of Contents')) + structure('no_links_in_toc', ['--no-links-in-toc'], default=False, + help=_('Don\'t add links in the root HTML file to the Table of Contents')) + debug = c.add_group('debug', _('Options useful for debugging')) + debug('verbose', ['-v', '--verbose'], default=0, action='count', + help=_('Be more verbose while processing. Can be specified multiple times to increase verbosity.')) + + return c \ No newline at end of file diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py new file mode 100644 index 0000000000..957b91e9bf --- /dev/null +++ b/src/calibre/ebooks/epub/from_html.py @@ -0,0 +1,212 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' +import os, sys, logging +from lxml import html +from lxml.etree import XPath +get_text = XPath("//text()") + +from calibre import LoggingInterface +from calibre.ebooks.html import PreProcessor +from calibre.ebooks.epub import config as common_config +from calibre.ebooks.epub.traverse import traverse, opf_traverse +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.metadata.meta import get_metadata +from calibre.ebooks.metadata.opf import OPFReader +from calibre.ptempfile import PersistentTemporaryDirectory + + +class HTMLProcessor(PreProcessor, LoggingInterface): + + ENCODING_PATS = [re.compile(r'<[^<>]+encoding=[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE), + re.compile(r'', re.IGNORECASE)] + + def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles): + LoggingInterface.__init__(self, logging.getLogger('html2epub')) + self.htmlfile = htmlfile + self.opts = opts + self.tdir = tdir + self.resource_map = resource_map + self.resource_dir = os.path.join(tdir, 'resources') + self.htmlfiles = htmlfiles + self.parse_html() + self.root.rewrite_links(self.rewrite_links, resolve_base_href=False) + self.rewrite_links(htmlfiles) + self.extract_css() + self.collect_font_statistics() + self.split() + + def parse_html(self): + ''' Create lxml ElementTree from HTML ''' + src = open(self.htmlfile.path, 'rb').decode(self.htmlfile.encoding, 'replace') + src = self.preprocess(src) + # lxml chokes on unicode input when it contains encoding declarations + for pat in self.ENCODING_PATS: + src = pat.sub('', src) + try: + self.root = html.document_fromstring(src) + except: + if self.opts.verbose: + self.log_exception('lxml based parsing failed') + self.root = html.soupparser.fromstring() + self.head = self.body = None + head = self.root.xpath('//head') + if head: + self.head = head[0] + body = self.root.xpath('//body') + if body: + self.body = body[0] + self.detected_chapters = self.opts.chapter(self.root) + + def rewrite_links(self, olink): + link = self.htmlfile.resolve(olink) + if not link.path or not os.path.exists(link.path) or not os.path.isfile(link.path): + return olink + if link.path in self.htmlfiles: + return os.path.basename(link.path) + if link.path in self.resource_map.keys(): + return self.resource_map[] + name = os.path.basename(link.path) + name, ext = os.path.splitext(name) + name += ('_%d'%len(self.resource_map)) + ext + shutil.copyfile(link.path, os.path.join(self.resource_dir, name)) + name = 'resources/'+name + self.resource_map[link.path] = name + return name + + + def extract_css(self): + css = [] + for link in self.root.xpath('//link'): + if 'css' in link.get('type', 'text/css').lower(): + file = self.htmlfile.resolve(link.get('href', '')) + if os.path.exists(file) and os.path.isfile(file): + css.append(open(file, 'rb').read().decode('utf-8')) + link.getparent().remove(link) + + for style in self.root.xpath('//style'): + if 'css' in style.get('type', 'text/css').lower(): + css.append('\n'.join(get_text(style))) + style.getparent().remove(style) + + css_counter = 1 + for elem in self.root.xpath('//*[@style]'): + if 'id' not in elem.keys(): + elem['id'] = 'calibre_css_id_%d'%css_counter + css_counter += 1 + css.append('#%s {%s}'%(elem['id'], elem['style'])) + elem.attrib.pop('style') + chapter_counter = 1 + for chapter in self.detected_chapters: + if chapter.tag.lower() == 'a': + if 'name' in chapter.keys(): + chapter['id'] = id = chapter['name'] + elif 'id' in chapter.keys(): + id = chapter['id'] + else: + id = 'calibre_detected_chapter_%d'%chapter_counter + chapter_counter += 1 + chapter['id'] = id + else: + if 'id' not in chapter.keys(): + id = 'calibre_detected_chapter_%d'%chapter_counter + chapter_counter += 1 + chapter['id'] = id + css.append('#%s {%s}'%(id, 'page-break-before:always')) + + self.raw_css = '\n\n'.join(css) + # TODO: Figure out what to do about CSS imports from linked stylesheets + + def collect_font_statistics(self): + ''' + Collect font statistics to figure out the base font size used in this + HTML document. + ''' + self.font_statistics = {} #: A mapping of font size (in pts) to number of characters rendered at that font size + for text in get_text(self.body if self.body is not None else self.root): + length, parent = len(re.sub(r'\s+', '', text)), text.getparent() + #TODO: Use cssutils on self.raw_css to figure out the font size + # of this piece text and update statistics accordingly + + def split(self): + ''' Split into individual flows to accommodate Adobe's incompetence ''' + # TODO: Split on page breaks, keeping track of anchors (a.name and id) + # and preserving tree structure so that CSS continues to apply + pass + + +def config(): + c = common_config() + return c + +def option_parser(): + c = config() + return c.option_parser(usage=_('''\ +%prog [options] file.html + +Convert a HTML file to an EPUB ebook. Follows links in the HTML file. +''')) + +def search_for_opf(dir): + for f in os.listdir(dir): + if f.lower().endswith('.opf'): + return OPFReader(open(os.path.join(dir, f), 'rb'), dir) + +def parse_content(filelist, opts): + tdir = PersistentTemporaryDirectory('_html2epub') + os.makedirs(os.path.join(tdir, 'content', 'resources')) + resource_map = {} + for htmlfile in filelist: + hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), resource_map) + +def convert(htmlfile, opts, notification=None): + if opts.output is None: + opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub' + opts.output = os.path.abspath(opts.output) + opf = search_for_opf(os.path.dirname(htmlfile)) + if opf: + mi = MetaInformation(opf) + else: + mi = get_metadata(open(htmlfile, 'rb'), 'html') + if opts.title: + mi.title = opts.title + if opts.authors != _('Unknown'): + opts.authors = opts.authors.split(',') + opts.authors = [a.strip() for a in opts.authors] + mi.authors = opts.authors + + if not mi.title: + mi.title = os.path.splitext(os.path.basename(htmlfile))[0] + if not mi.authors: + mi.authors = [_('Unknown')] + + opts.chapter = XPath(opts.chapter, + namespaces={'re':'http://exslt.org/regular-expressions'}) + + filelist = None + print 'Building file list...' + if opf is not None: + filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding) + if not filelist: + filelist = traverse(htmlfile, verbose=opts.verbose, encoding=opts.encoding)\ + [0 if opts.breadth_first else 1] + if opts.verbose: + print '\tFound files...' + for f in filelist: + print '\t\t', f + + parse_content(filelist, opts) + +def main(args=sys.argv): + parser = option_parser() + opts, args = parser.parse_args(args) + if len(args) < 2: + parser.print_help() + print _('You must specify an input HTML file') + return 1 + convert(args[1], opts) + return 0 + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/src/calibre/ebooks/epub/traverse.py b/src/calibre/ebooks/epub/traverse.py index d5d019f376..6f942de3b9 100644 --- a/src/calibre/ebooks/epub/traverse.py +++ b/src/calibre/ebooks/epub/traverse.py @@ -123,9 +123,12 @@ class HTMLFile(object): url = match.group(i) if url: break - link = Link(url, self.base) + link = self.resolve(url) if link not in self.links: self.links.append(link) + + def resolve(self, url): + return Link(url, self.base) def depth_first(root, flat, visited=set([])): @@ -152,7 +155,7 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None) Recursively traverse all links in the HTML file. :param max_levels: Maximum levels of recursion. Must be non-negative. 0 - implies that no links in hte root HTML file are followed. + implies that no links in the root HTML file are followed. :param encoding: Specify character encoding of HTML files. If `None` it is auto-detected. :return: A pair of lists (breadth_first, depth_first). Each list contains @@ -186,7 +189,23 @@ def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None) return flat, list(depth_first(flat[0], flat)) - +def opf_traverse(opf_reader, verbose=0, encoding=None): + ''' + Return a list of :class:`HTMLFile` objects in the order specified by the + `` element of the OPF. + + :param opf_reader: An :class:`calibre.ebooks.metadata.opf.OPFReader` instance. + :param encoding: Specify character encoding of HTML files. If `None` it is + auto-detected. + ''' + if not opf_reader.spine: + raise ValueError('OPF does not have a spine') + flat = [] + for path in opf_reader.spine.items(): + if path not in flat: + flat.append(os.path.abspath(path)) + flat = [HTMLFile(path, 0, encoding, verbose) for path in flat] + return flat diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py new file mode 100644 index 0000000000..dc2114f14a --- /dev/null +++ b/src/calibre/ebooks/html.py @@ -0,0 +1,74 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +import re + + +class PreProcessor(object): + PREPROCESS = [] + # Fix Baen markup + BAEN = [ + (re.compile(r'page-break-before:\s*\w+([\s;\}])', re.IGNORECASE), + lambda match: match.group(1)), + (re.compile(r'

\s*(\s*)\s*

', re.IGNORECASE), + lambda match: match.group(1)), + (re.compile(r'<\s*a\s+id="p[0-9]+"\s+name="p[0-9]+"\s*>\s*', re.IGNORECASE), + lambda match: ''), + ] + # Fix pdftohtml markup + PDFTOHTML = [ + # Remove
tags + (re.compile(r'', re.IGNORECASE), lambda match: ' '), + # Remove page numbers + (re.compile(r'\d+
', re.IGNORECASE), lambda match: ''), + # Remove
and replace

with

+ (re.compile(r'\s*', re.IGNORECASE), lambda match: '

'), + (re.compile(r'(.*)', re.IGNORECASE), + lambda match: match.group() if re.match('<', match.group(1).lstrip()) or len(match.group(1)) < 40 + else match.group(1)), + # Remove hyphenation + (re.compile(r'-\n\r?'), lambda match: ''), + + ] + + # Fix Book Designer markup + BOOK_DESIGNER = [ + # HR + (re.compile('


', re.IGNORECASE), + lambda match : ' '), + # Create header tags + (re.compile('<]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?

', re.IGNORECASE), + lambda match : '

%s

'%(match.group(2) if match.group(2) else 'center', match.group(3))), + (re.compile('<]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?', re.IGNORECASE), + lambda match : '

%s

'%(match.group(2) if match.group(2) else 'center', match.group(3))), + (re.compile('<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), + lambda match : '

%s

'%(match.group(1),)), + (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL), + lambda match : '

%s

'%(match.group(1),)), + ] + + def is_baen(self, src): + return re.compile(r'<]*id=BookTitle', raw) is not None + + def is_pdftohtml(self, src): + return src.startswith('') + + def preprocess(self, html): + if self.is_baen(html): + rules = self.BAEN + elif self.is_book_designer(html): + rules = self.BOOK_DESIGNER + elif self.is_pdftohtml(html): + rules = self.PDFTOHTML + else: + rules = [] + for rule in self.PREPROCESS + rules: + html = rule[0].sub(rule[1], html) + + return html + diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 8fbab59bcf..4b3a0bffaa 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -25,6 +25,7 @@ entry_points = { 'epub-meta = calibre.ebooks.metadata.epub:main', 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', 'html2lrf = calibre.ebooks.lrf.html.convert_from:main', + 'html2epub = calibre.ebooks.epub.from_html:main', 'markdown-calibre = calibre.ebooks.markdown.markdown:main', 'lit2lrf = calibre.ebooks.lrf.lit.convert_from:main', 'epub2lrf = calibre.ebooks.lrf.epub.convert_from:main', diff --git a/src/calibre/startup.py b/src/calibre/startup.py index 347a1fab93..2f4467b0f8 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -13,7 +13,8 @@ from gettext import GNUTranslations import __builtin__ __builtin__.__dict__['_'] = lambda s: s -from calibre.constants import iswindows, isosx, islinux, isfrozen +from calibre.constants import iswindows, isosx, islinux, isfrozen,\ + preferred_encoding from calibre.translations.msgfmt import make _run_once = False @@ -146,4 +147,8 @@ if not _run_once: sys.argv[1:] = winutil.argv()[1-len(sys.argv):] ################################################################################ + # Convert command line arguments to unicode + for i in range(1, len(sys.argv)): + if not isinstance(sys.argv[i], unicode): + sys.argv[i] = sys.argv[i].decode(preferred_encoding, 'replace') \ No newline at end of file diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py index baf85186c1..61cc8516ff 100644 --- a/src/calibre/utils/config.py +++ b/src/calibre/utils/config.py @@ -8,6 +8,7 @@ Manage application-wide preferences. ''' import os, re, cPickle, textwrap from copy import deepcopy +from functools import partial from optparse import OptionParser as _OptionParser from optparse import IndentedHelpFormatter from PyQt4.QtCore import QString @@ -200,6 +201,7 @@ class OptionSet(object): raise ValueError('A group by the name %s already exists in this set'%name) self.groups[name] = description self.group_list.append(name) + return partial(self.add_opt, group=name) def add_opt(self, name, switches=[], help=None, type=None, choices=None, group=None, default=None, action=None, metavar=None): @@ -234,7 +236,7 @@ class OptionSet(object): parser = OptionParser(usage, gui_mode=gui_mode) groups = defaultdict(lambda : parser) for group, desc in self.groups.items(): - groups[group] = parser.add_group(group, desc) + groups[group] = parser.add_option_group(group.upper(), desc) for pref in self.preferences: if not pref.switches: diff --git a/src/calibre/utils/cssutils/__init__.py b/src/calibre/utils/cssutils/__init__.py new file mode 100644 index 0000000000..1bbc68aaa9 --- /dev/null +++ b/src/calibre/utils/cssutils/__init__.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +"""cssutils - CSS Cascading Style Sheets library for Python + + Copyright (C) 2004-2008 Christof Hoeke + + cssutils is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + + +A Python package to parse and build CSS Cascading Style Sheets. DOM only, not any rendering facilities! + +Based upon and partly implementing the following specifications : + +`CSS 2.1 `__ + General CSS rules and properties are defined here +`CSS 2.1 Errata `__ + A few errata, mainly the definition of CHARSET_SYM tokens +`CSS3 Module: Syntax `__ + Used in parts since cssutils 0.9.4. cssutils tries to use the features from CSS 2.1 and CSS 3 with preference to CSS3 but as this is not final yet some parts are from CSS 2.1 +`MediaQueries `__ + MediaQueries are part of ``stylesheets.MediaList`` since v0.9.4, used in @import and @media rules. +`Namespaces `__ + Added in v0.9.1, updated to definition in CSSOM in v0.9.4, updated in 0.9.5 for dev version +`Selectors `__ + The selector syntax defined here (and not in CSS 2.1) should be parsable with cssutils (*should* mind though ;) ) + +`DOM Level 2 Style CSS `__ + DOM for package css +`DOM Level 2 Style Stylesheets `__ + DOM for package stylesheets +`CSSOM `__ + A few details (mainly the NamespaceRule DOM) is taken from here. Plan is to move implementation to the stuff defined here which is newer but still no REC so might change anytime... + + +The cssutils tokenizer is a customized implementation of `CSS3 Module: Syntax (W3C Working Draft 13 August 2003) `__ which itself is based on the CSS 2.1 tokenizer. It tries to be as compliant as possible but uses some (helpful) parts of the CSS 2.1 tokenizer. + +I guess cssutils is neither CSS 2.1 nor CSS 3 compliant but tries to at least be able to parse both grammars including some more real world cases (some CSS hacks are actually parsed and serialized). Both official grammars are not final nor bugfree but still feasible. cssutils aim is not to be fully compliant to any CSS specification (the specifications seem to be in a constant flow anyway) but cssutils *should* be able to read and write as many as possible CSS stylesheets "in the wild" while at the same time implement the official APIs which are well documented. Some minor extensions are provided as well. + +Please visit http://cthedot.de/cssutils/ for more details. + + +Tested with Python 2.5 on Windows Vista mainly. + + +This library may be used ``from cssutils import *`` which +import subpackages ``css`` and ``stylesheets``, CSSParser and +CSSSerializer classes only. + +Usage may be:: + + >>> from cssutils import * + >>> parser = CSSParser() + >>> sheet = parser.parseString(u'a { color: red}') + >>> print sheet.cssText + a { + color: red + } + +""" +__all__ = ['css', 'stylesheets', 'CSSParser', 'CSSSerializer'] +__docformat__ = 'restructuredtext' +__author__ = 'Christof Hoeke with contributions by Walter Doerwald' +__date__ = '$LastChangedDate:: 2008-08-11 20:11:23 +0200 #$:' + +VERSION = '0.9.5.1' + +__version__ = '%s $Id: __init__.py 1426 2008-08-11 18:11:23Z cthedot $' % VERSION + +import codec +import xml.dom + +# order of imports is important (partly circular) +from helper import Deprecated +import errorhandler +log = errorhandler.ErrorHandler() + +import util +import css +import stylesheets +from parse import CSSParser + +from serialize import CSSSerializer +ser = CSSSerializer() + +# used by Selector defining namespace prefix '*' +_ANYNS = -1 + +class DOMImplementationCSS(object): + """ + This interface allows the DOM user to create a CSSStyleSheet + outside the context of a document. There is no way to associate + the new CSSStyleSheet with a document in DOM Level 2. + + This class is its *own factory*, as it is given to + xml.dom.registerDOMImplementation which simply calls it and receives + an instance of this class then. + """ + _features = [ + ('css', '1.0'), + ('css', '2.0'), + ('stylesheets', '1.0'), + ('stylesheets', '2.0') + ] + + def createCSSStyleSheet(self, title, media): + """ + Creates a new CSSStyleSheet. + + title of type DOMString + The advisory title. See also the Style Sheet Interfaces + section. + media of type DOMString + The comma-separated list of media associated with the new style + sheet. See also the Style Sheet Interfaces section. + + returns + CSSStyleSheet: A new CSS style sheet. + + TODO: DOMException + SYNTAX_ERR: Raised if the specified media string value has a + syntax error and is unparsable. + """ + return css.CSSStyleSheet(title=title, media=media) + + def createDocument(self, *args): + # not needed to HTML, also not for CSS? + raise NotImplementedError + + def createDocumentType(self, *args): + # not needed to HTML, also not for CSS? + raise NotImplementedError + + def hasFeature(self, feature, version): + return (feature.lower(), unicode(version)) in self._features + +xml.dom.registerDOMImplementation('cssutils', DOMImplementationCSS) + + +def parseString(*a, **k): + return CSSParser().parseString(*a, **k) +parseString.__doc__ = CSSParser.parseString.__doc__ + +def parseFile(*a, **k): + return CSSParser().parseFile(*a, **k) +parseFile.__doc__ = CSSParser.parseFile.__doc__ + +def parseUrl(*a, **k): + return CSSParser().parseUrl(*a, **k) +parseUrl.__doc__ = CSSParser.parseUrl.__doc__ + +@Deprecated('Use cssutils.parseFile() instead.') +def parse(*a, **k): + return parseFile(*a, **k) +parse.__doc__ = CSSParser.parse.__doc__ + + +# set "ser", default serializer +def setSerializer(serializer): + """ + sets the global serializer used by all class in cssutils + """ + global ser + ser = serializer + + +def getUrls(sheet): + """ + Utility function to get all ``url(urlstring)`` values in + ``CSSImportRules`` and ``CSSStyleDeclaration`` objects (properties) + of given CSSStyleSheet ``sheet``. + + This function is a generator. The url values exclude ``url(`` and ``)`` + and surrounding single or double quotes. + """ + for importrule in (r for r in sheet if r.type == r.IMPORT_RULE): + yield importrule.href + + def getUrl(v): + if v.CSS_PRIMITIVE_VALUE == v.cssValueType and\ + v.CSS_URI == v.primitiveType: + return v.getStringValue() + + def styleDeclarations(base): + "recursive generator to find all CSSStyleDeclarations" + if hasattr(base, 'cssRules'): + for rule in base.cssRules: + for s in styleDeclarations(rule): + yield s + elif hasattr(base, 'style'): + yield base.style + + for style in styleDeclarations(sheet): + for p in style.getProperties(all=True): + v = p.cssValue + if v.CSS_VALUE_LIST == v.cssValueType: + for item in v: + u = getUrl(item) + if u is not None: + yield u + elif v.CSS_PRIMITIVE_VALUE == v.cssValueType: + u = getUrl(v) + if u is not None: + yield u + +def replaceUrls(sheet, replacer): + """ + Utility function to replace all ``url(urlstring)`` values in + ``CSSImportRules`` and ``CSSStyleDeclaration`` objects (properties) + of given CSSStyleSheet ``sheet``. + + ``replacer`` must be a function which is called with a single + argument ``urlstring`` which is the current value of url() + excluding ``url(`` and ``)`` and surrounding single or double quotes. + """ + for importrule in (r for r in sheet if r.type == r.IMPORT_RULE): + importrule.href = replacer(importrule.href) + + def setProperty(v): + if v.CSS_PRIMITIVE_VALUE == v.cssValueType and\ + v.CSS_URI == v.primitiveType: + v.setStringValue(v.CSS_URI, + replacer(v.getStringValue())) + + def styleDeclarations(base): + "recursive generator to find all CSSStyleDeclarations" + if hasattr(base, 'cssRules'): + for rule in base.cssRules: + for s in styleDeclarations(rule): + yield s + elif hasattr(base, 'style'): + yield base.style + + for style in styleDeclarations(sheet): + for p in style.getProperties(all=True): + v = p.cssValue + if v.CSS_VALUE_LIST == v.cssValueType: + for item in v: + setProperty(item) + elif v.CSS_PRIMITIVE_VALUE == v.cssValueType: + setProperty(v) + + +if __name__ == '__main__': + print __doc__ diff --git a/src/calibre/utils/cssutils/codec.py b/src/calibre/utils/cssutils/codec.py new file mode 100644 index 0000000000..8de34a012f --- /dev/null +++ b/src/calibre/utils/cssutils/codec.py @@ -0,0 +1,581 @@ +#!/usr/bin/env python +"""Python codec for CSS.""" +__docformat__ = 'restructuredtext' +__author__ = 'Walter Doerwald' +__version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $' + +import codecs, marshal + +# We're using bits to store all possible candidate encodings (or variants, i.e. +# we have two bits for the variants of UTF-16 and two for the +# variants of UTF-32). +# +# Prefixes for various CSS encodings +# UTF-8-SIG xEF xBB xBF +# UTF-16 (LE) xFF xFE ~x00|~x00 +# UTF-16 (BE) xFE xFF +# UTF-16-LE @ x00 @ x00 +# UTF-16-BE x00 @ +# UTF-32 (LE) xFF xFE x00 x00 +# UTF-32 (BE) x00 x00 xFE xFF +# UTF-32-LE @ x00 x00 x00 +# UTF-32-BE x00 x00 x00 @ +# CHARSET @ c h a ... + + +def detectencoding_str(input, final=False): + """ + Detect the encoding of the byte string ``input``, which contains the + beginning of a CSS file. This function returs the detected encoding (or + ``None`` if it hasn't got enough data), and a flag that indicates whether + to encoding has been detected explicitely or implicitely. To detect the + encoding the first few bytes are used (or if ``input`` is ASCII compatible + and starts with a charset rule the encoding name from the rule). "Explicit" + detection means that the bytes start with a BOM or a charset rule. + + If the encoding can't be detected yet, ``None`` is returned as the encoding. + ``final`` specifies whether more data is available in later calls or not. + If ``final`` is true, ``detectencoding_str()`` will never return ``None`` + as the encoding. + """ + + # A bit for every candidate + CANDIDATE_UTF_8_SIG = 1 + CANDIDATE_UTF_16_AS_LE = 2 + CANDIDATE_UTF_16_AS_BE = 4 + CANDIDATE_UTF_16_LE = 8 + CANDIDATE_UTF_16_BE = 16 + CANDIDATE_UTF_32_AS_LE = 32 + CANDIDATE_UTF_32_AS_BE = 64 + CANDIDATE_UTF_32_LE = 128 + CANDIDATE_UTF_32_BE = 256 + CANDIDATE_CHARSET = 512 + + candidates = 1023 # all candidates + + li = len(input) + if li>=1: + # Check first byte + c = input[0] + if c != "\xef": + candidates &= ~CANDIDATE_UTF_8_SIG + if c != "\xff": + candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_16_AS_LE) + if c != "\xfe": + candidates &= ~CANDIDATE_UTF_16_AS_BE + if c != "@": + candidates &= ~(CANDIDATE_UTF_32_LE|CANDIDATE_UTF_16_LE|CANDIDATE_CHARSET) + if c != "\x00": + candidates &= ~(CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_BE|CANDIDATE_UTF_16_BE) + if li>=2: + # Check second byte + c = input[1] + if c != "\xbb": + candidates &= ~CANDIDATE_UTF_8_SIG + if c != "\xfe": + candidates &= ~(CANDIDATE_UTF_16_AS_LE|CANDIDATE_UTF_32_AS_LE) + if c != "\xff": + candidates &= ~CANDIDATE_UTF_16_AS_BE + if c != "\x00": + candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE) + if c != "@": + candidates &= ~CANDIDATE_UTF_16_BE + if c != "c": + candidates &= ~CANDIDATE_CHARSET + if li>=3: + # Check third byte + c = input[2] + if c != "\xbf": + candidates &= ~CANDIDATE_UTF_8_SIG + if c != "c": + candidates &= ~CANDIDATE_UTF_16_LE + if c != "\x00": + candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE) + if c != "\xfe": + candidates &= ~CANDIDATE_UTF_32_AS_BE + if c != "h": + candidates &= ~CANDIDATE_CHARSET + if li>=4: + # Check fourth byte + c = input[3] + if input[2:4] == "\x00\x00": + candidates &= ~CANDIDATE_UTF_16_AS_LE + if c != "\x00": + candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE) + if c != "\xff": + candidates &= ~CANDIDATE_UTF_32_AS_BE + if c != "@": + candidates &= ~CANDIDATE_UTF_32_BE + if c != "a": + candidates &= ~CANDIDATE_CHARSET + if candidates == 0: + return ("utf-8", False) + if not (candidates & (candidates-1)): # only one candidate remaining + if candidates == CANDIDATE_UTF_8_SIG and li >= 3: + return ("utf-8-sig", True) + elif candidates == CANDIDATE_UTF_16_AS_LE and li >= 2: + return ("utf-16", True) + elif candidates == CANDIDATE_UTF_16_AS_BE and li >= 2: + return ("utf-16", True) + elif candidates == CANDIDATE_UTF_16_LE and li >= 4: + return ("utf-16-le", False) + elif candidates == CANDIDATE_UTF_16_BE and li >= 2: + return ("utf-16-be", False) + elif candidates == CANDIDATE_UTF_32_AS_LE and li >= 4: + return ("utf-32", True) + elif candidates == CANDIDATE_UTF_32_AS_BE and li >= 4: + return ("utf-32", True) + elif candidates == CANDIDATE_UTF_32_LE and li >= 4: + return ("utf-32-le", False) + elif candidates == CANDIDATE_UTF_32_BE and li >= 4: + return ("utf-32-be", False) + elif candidates == CANDIDATE_CHARSET and li >= 4: + prefix = '@charset "' + if input[:len(prefix)] == prefix: + pos = input.find('"', len(prefix)) + if pos >= 0: + return (input[len(prefix):pos], True) + # if this is the last call, and we haven't determined an encoding yet, + # we default to UTF-8 + if final: + return ("utf-8", False) + return (None, False) # dont' know yet + + +def detectencoding_unicode(input, final=False): + """ + Detect the encoding of the unicode string ``input``, which contains the + beginning of a CSS file. The encoding is detected from the charset rule + at the beginning of ``input``. If there is no charset rule, ``"utf-8"`` + will be returned. + + If the encoding can't be detected yet, ``None`` is returned. ``final`` + specifies whether more data will be available in later calls or not. If + ``final`` is true, ``detectencoding_unicode()`` will never return ``None``. + """ + prefix = u'@charset "' + if input.startswith(prefix): + pos = input.find(u'"', len(prefix)) + if pos >= 0: + return (input[len(prefix):pos], True) + elif final or not prefix.startswith(input): + # if this is the last call, and we haven't determined an encoding yet, + # (or the string definitely doesn't start with prefix) we default to UTF-8 + return ("utf-8", False) + return (None, False) # don't know yet + + +def _fixencoding(input, encoding, final=False): + """ + Replace the name of the encoding in the charset rule at the beginning of + ``input`` with ``encoding``. If ``input`` doesn't starts with a charset + rule, ``input`` will be returned unmodified. + + If the encoding can't be found yet, ``None`` is returned. ``final`` + specifies whether more data will be available in later calls or not. + If ``final`` is true, ``_fixencoding()`` will never return ``None``. + """ + prefix = u'@charset "' + if len(input) > len(prefix): + if input.startswith(prefix): + pos = input.find(u'"', len(prefix)) + if pos >= 0: + if encoding.replace("_", "-").lower() == "utf-8-sig": + encoding = u"utf-8" + return prefix + encoding + input[pos:] + # we haven't seen the end of the encoding name yet => fall through + else: + return input # doesn't start with prefix, so nothing to fix + elif not prefix.startswith(input) or final: + # can't turn out to be a @charset rule later (or there is no "later") + return input + if final: + return input + return None # don't know yet + + +def decode(input, errors="strict", encoding=None, force=True): + if encoding is None or not force: + (_encoding, explicit) = detectencoding_str(input, True) + if _encoding == "css": + raise ValueError("css not allowed as encoding name") + if (explicit and not force) or encoding is None: # Take the encoding from the input + encoding = _encoding + (input, consumed) = codecs.getdecoder(encoding)(input, errors) + return (_fixencoding(input, unicode(encoding), True), consumed) + + +def encode(input, errors="strict", encoding=None): + consumed = len(input) + if encoding is None: + encoding = detectencoding_unicode(input, True)[0] + if encoding.replace("_", "-").lower() == "utf-8-sig": + input = _fixencoding(input, u"utf-8", True) + else: + input = _fixencoding(input, unicode(encoding), True) + if encoding == "css": + raise ValueError("css not allowed as encoding name") + encoder = codecs.getencoder(encoding) + return (encoder(input, errors)[0], consumed) + + +def _bytes2int(bytes): + # Helper: convert an 8 bit string into an ``int``. + i = 0 + for byte in bytes: + i = (i<<8) + ord(byte) + return i + + +def _int2bytes(i): + # Helper: convert an ``int`` into an 8-bit string. + v = [] + while i: + v.insert(0, chr(i&0xff)) + i >>= 8 + return "".join(v) + + +if hasattr(codecs, "IncrementalDecoder"): + class IncrementalDecoder(codecs.IncrementalDecoder): + def __init__(self, errors="strict", encoding=None, force=True): + self.decoder = None + self.encoding = encoding + self.force = force + codecs.IncrementalDecoder.__init__(self, errors) + # Store ``errors`` somewhere else, + # because we have to hide it in a property + self._errors = errors + self.buffer = "" + self.headerfixed = False + + def iterdecode(self, input): + for part in input: + result = self.decode(part, False) + if result: + yield result + result = self.decode("", True) + if result: + yield result + + def decode(self, input, final=False): + # We're doing basically the same as a ``BufferedIncrementalDecoder``, + # but since the buffer is only relevant until the encoding has been + # detected (in which case the buffer of the underlying codec might + # kick in), we're implementing buffering ourselves to avoid some + # overhead. + if self.decoder is None: + input = self.buffer + input + # Do we have to detect the encoding from the input? + if self.encoding is None or not self.force: + (encoding, explicit) = detectencoding_str(input, final) + if encoding is None: # no encoding determined yet + self.buffer = input # retry the complete input on the next call + return u"" # no encoding determined yet, so no output + elif encoding == "css": + raise ValueError("css not allowed as encoding name") + if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input + self.encoding = encoding + self.buffer = "" # drop buffer, as the decoder might keep its own + decoder = codecs.getincrementaldecoder(self.encoding) + self.decoder = decoder(self._errors) + if self.headerfixed: + return self.decoder.decode(input, final) + # If we haven't fixed the header yet, + # the content of ``self.buffer`` is a ``unicode`` object + output = self.buffer + self.decoder.decode(input, final) + encoding = self.encoding + if encoding.replace("_", "-").lower() == "utf-8-sig": + encoding = "utf-8" + newoutput = _fixencoding(output, unicode(encoding), final) + if newoutput is None: + # retry fixing the @charset rule (but keep the decoded stuff) + self.buffer = output + return u"" + self.headerfixed = True + return newoutput + + def reset(self): + codecs.IncrementalDecoder.reset(self) + self.decoder = None + self.buffer = "" + self.headerfixed = False + + def _geterrors(self): + return self._errors + + def _seterrors(self, errors): + # Setting ``errors`` must be done on the real decoder too + if self.decoder is not None: + self.decoder.errors = errors + self._errors = errors + errors = property(_geterrors, _seterrors) + + def getstate(self): + if self.decoder is not None: + state = (self.encoding, self.buffer, self.headerfixed, True, self.decoder.getstate()) + else: + state = (self.encoding, self.buffer, self.headerfixed, False, None) + return ("", _bytes2int(marshal.dumps(state))) + + def setstate(self, state): + state = _int2bytes(marshal.loads(state[1])) # ignore buffered input + self.encoding = state[0] + self.buffer = state[1] + self.headerfixed = state[2] + if state[3] is not None: + self.decoder = codecs.getincrementaldecoder(self.encoding)(self._errors) + self.decoder.setstate(state[4]) + else: + self.decoder = None + + +if hasattr(codecs, "IncrementalEncoder"): + class IncrementalEncoder(codecs.IncrementalEncoder): + def __init__(self, errors="strict", encoding=None): + self.encoder = None + self.encoding = encoding + codecs.IncrementalEncoder.__init__(self, errors) + # Store ``errors`` somewhere else, + # because we have to hide it in a property + self._errors = errors + self.buffer = u"" + + def iterencode(self, input): + for part in input: + result = self.encode(part, False) + if result: + yield result + result = self.encode(u"", True) + if result: + yield result + + def encode(self, input, final=False): + if self.encoder is None: + input = self.buffer + input + if self.encoding is not None: + # Replace encoding in the @charset rule with the specified one + encoding = self.encoding + if encoding.replace("_", "-").lower() == "utf-8-sig": + encoding = "utf-8" + newinput = _fixencoding(input, unicode(encoding), final) + if newinput is None: # @charset rule incomplete => Retry next time + self.buffer = input + return "" + input = newinput + else: + # Use encoding from the @charset declaration + self.encoding = detectencoding_unicode(input, final)[0] + if self.encoding is not None: + if self.encoding == "css": + raise ValueError("css not allowed as encoding name") + info = codecs.lookup(self.encoding) + encoding = self.encoding + if self.encoding.replace("_", "-").lower() == "utf-8-sig": + input = _fixencoding(input, u"utf-8", True) + self.encoder = info.incrementalencoder(self._errors) + self.buffer = u"" + else: + self.buffer = input + return "" + return self.encoder.encode(input, final) + + def reset(self): + codecs.IncrementalEncoder.reset(self) + self.encoder = None + self.buffer = u"" + + def _geterrors(self): + return self._errors + + def _seterrors(self, errors): + # Setting ``errors ``must be done on the real encoder too + if self.encoder is not None: + self.encoder.errors = errors + self._errors = errors + errors = property(_geterrors, _seterrors) + + def getstate(self): + if self.encoder is not None: + state = (self.encoding, self.buffer, True, self.encoder.getstate()) + else: + state = (self.encoding, self.buffer, False, None) + return _bytes2int(marshal.dumps(state)) + + def setstate(self, state): + state = _int2bytes(marshal.loads(state)) + self.encoding = state[0] + self.buffer = state[1] + if state[2] is not None: + self.encoder = codecs.getincrementalencoder(self.encoding)(self._errors) + self.encoder.setstate(state[4]) + else: + self.encoder = None + + +class StreamWriter(codecs.StreamWriter): + def __init__(self, stream, errors="strict", encoding=None, header=False): + codecs.StreamWriter.__init__(self, stream, errors) + self.streamwriter = None + self.encoding = encoding + self._errors = errors + self.buffer = u"" + + def encode(self, input, errors='strict'): + li = len(input) + if self.streamwriter is None: + input = self.buffer + input + li = len(input) + if self.encoding is not None: + # Replace encoding in the @charset rule with the specified one + encoding = self.encoding + if encoding.replace("_", "-").lower() == "utf-8-sig": + encoding = "utf-8" + newinput = _fixencoding(input, unicode(encoding), False) + if newinput is None: # @charset rule incomplete => Retry next time + self.buffer = input + return ("", 0) + input = newinput + else: + # Use encoding from the @charset declaration + self.encoding = detectencoding_unicode(input, False)[0] + if self.encoding is not None: + if self.encoding == "css": + raise ValueError("css not allowed as encoding name") + self.streamwriter = codecs.getwriter(self.encoding)(self.stream, self._errors) + encoding = self.encoding + if self.encoding.replace("_", "-").lower() == "utf-8-sig": + input = _fixencoding(input, u"utf-8", True) + self.buffer = u"" + else: + self.buffer = input + return ("", 0) + return (self.streamwriter.encode(input, errors)[0], li) + + def _geterrors(self): + return self._errors + + def _seterrors(self, errors): + # Setting ``errors`` must be done on the streamwriter too + if self.streamwriter is not None: + self.streamwriter.errors = errors + self._errors = errors + errors = property(_geterrors, _seterrors) + + +class StreamReader(codecs.StreamReader): + def __init__(self, stream, errors="strict", encoding=None, force=True): + codecs.StreamReader.__init__(self, stream, errors) + self.streamreader = None + self.encoding = encoding + self.force = force + self._errors = errors + + def decode(self, input, errors='strict'): + if self.streamreader is None: + if self.encoding is None or not self.force: + (encoding, explicit) = detectencoding_str(input, False) + if encoding is None: # no encoding determined yet + return (u"", 0) # no encoding determined yet, so no output + elif encoding == "css": + raise ValueError("css not allowed as encoding name") + if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input + self.encoding = encoding + streamreader = codecs.getreader(self.encoding) + streamreader = streamreader(self.stream, self._errors) + (output, consumed) = streamreader.decode(input, errors) + encoding = self.encoding + if encoding.replace("_", "-").lower() == "utf-8-sig": + encoding = "utf-8" + newoutput = _fixencoding(output, unicode(encoding), False) + if newoutput is not None: + self.streamreader = streamreader + return (newoutput, consumed) + return (u"", 0) # we will create a new streamreader on the next call + return self.streamreader.decode(input, errors) + + def _geterrors(self): + return self._errors + + def _seterrors(self, errors): + # Setting ``errors`` must be done on the streamreader too + if self.streamreader is not None: + self.streamreader.errors = errors + self._errors = errors + errors = property(_geterrors, _seterrors) + + +if hasattr(codecs, "CodecInfo"): + # We're running on Python 2.5 or better + def search_function(name): + if name == "css": + return codecs.CodecInfo( + name="css", + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamwriter=StreamWriter, + streamreader=StreamReader, + ) +else: + # If we're running on Python 2.4, define the utf-8-sig codec here + def utf8sig_encode(input, errors='strict'): + return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) + + def utf8sig_decode(input, errors='strict'): + prefix = 0 + if input[:3] == codecs.BOM_UTF8: + input = input[3:] + prefix = 3 + (output, consumed) = codecs.utf_8_decode(input, errors, True) + return (output, consumed+prefix) + + class UTF8SigStreamWriter(codecs.StreamWriter): + def reset(self): + codecs.StreamWriter.reset(self) + try: + del self.encode + except AttributeError: + pass + + def encode(self, input, errors='strict'): + self.encode = codecs.utf_8_encode + return utf8sig_encode(input, errors) + + class UTF8SigStreamReader(codecs.StreamReader): + def reset(self): + codecs.StreamReader.reset(self) + try: + del self.decode + except AttributeError: + pass + + def decode(self, input, errors='strict'): + if len(input) < 3 and codecs.BOM_UTF8.startswith(input): + # not enough data to decide if this is a BOM + # => try again on the next call + return (u"", 0) + self.decode = codecs.utf_8_decode + return utf8sig_decode(input, errors) + + def search_function(name): + import encodings + name = encodings.normalize_encoding(name) + if name == "css": + return (encode, decode, StreamReader, StreamWriter) + elif name == "utf_8_sig": + return (utf8sig_encode, utf8sig_decode, UTF8SigStreamReader, UTF8SigStreamWriter) + + +codecs.register(search_function) + + +# Error handler for CSS escaping + +def cssescape(exc): + if not isinstance(exc, UnicodeEncodeError): + raise TypeError("don't know how to handle %r" % exc) + return (u"".join(u"\\%06x" % ord(c) for c in exc.object[exc.start:exc.end]), exc.end) + +codecs.register_error("cssescape", cssescape) diff --git a/src/calibre/utils/cssutils/css/__init__.py b/src/calibre/utils/cssutils/css/__init__.py new file mode 100644 index 0000000000..145ac43fca --- /dev/null +++ b/src/calibre/utils/cssutils/css/__init__.py @@ -0,0 +1,63 @@ +""" +Document Object Model Level 2 CSS +http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/css.html + +currently implemented + - CSSStyleSheet + - CSSRuleList + - CSSRule + - CSSComment (cssutils addon) + - CSSCharsetRule + - CSSFontFaceRule + - CSSImportRule + - CSSMediaRule + - CSSNamespaceRule (WD) + - CSSPageRule + - CSSStyleRule + - CSSUnkownRule + - Selector and SelectorList + - CSSStyleDeclaration + - CSS2Properties + - CSSValue + - CSSPrimitiveValue + - CSSValueList + +todo + - RGBColor, Rect, Counter +""" +__all__ = [ + 'CSSStyleSheet', + 'CSSRuleList', + 'CSSRule', + 'CSSComment', + 'CSSCharsetRule', + 'CSSFontFaceRule' + 'CSSImportRule', + 'CSSMediaRule', + 'CSSNamespaceRule', + 'CSSPageRule', + 'CSSStyleRule', + 'CSSUnknownRule', + 'Selector', 'SelectorList', + 'CSSStyleDeclaration', 'Property', + 'CSSValue', 'CSSPrimitiveValue', 'CSSValueList' + ] +__docformat__ = 'restructuredtext' +__version__ = '$Id: __init__.py 1116 2008-03-05 13:52:23Z cthedot $' + +from cssstylesheet import * +from cssrulelist import * +from cssrule import * +from csscomment import * +from csscharsetrule import * +from cssfontfacerule import * +from cssimportrule import * +from cssmediarule import * +from cssnamespacerule import * +from csspagerule import * +from cssstylerule import * +from cssunknownrule import * +from selector import * +from selectorlist import * +from cssstyledeclaration import * +from cssvalue import * diff --git a/src/calibre/utils/cssutils/css/csscharsetrule.py b/src/calibre/utils/cssutils/css/csscharsetrule.py new file mode 100644 index 0000000000..956f24891f --- /dev/null +++ b/src/calibre/utils/cssutils/css/csscharsetrule.py @@ -0,0 +1,165 @@ +"""CSSCharsetRule implements DOM Level 2 CSS CSSCharsetRule. + +TODO: + - check encoding syntax and not codecs.lookup? +""" +__all__ = ['CSSCharsetRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: csscharsetrule.py 1170 2008-03-20 17:42:07Z cthedot $' + +import codecs +import xml.dom +import cssrule +import cssutils + +class CSSCharsetRule(cssrule.CSSRule): + """ + The CSSCharsetRule interface represents an @charset rule in a CSS style + sheet. The value of the encoding attribute does not affect the encoding + of text data in the DOM objects; this encoding is always UTF-16 + (also in Python?). After a stylesheet is loaded, the value of the + encoding attribute is the value found in the @charset rule. If there + was no @charset in the original document, then no CSSCharsetRule is + created. The value of the encoding attribute may also be used as a hint + for the encoding used on serialization of the style sheet. + + The value of the @charset rule (and therefore of the CSSCharsetRule) + may not correspond to the encoding the document actually came in; + character encoding information e.g. in an HTTP header, has priority + (see CSS document representation) but this is not reflected in the + CSSCharsetRule. + + Properties + ========== + cssText: of type DOMString + The parsable textual representation of this rule + encoding: of type DOMString + The encoding information used in this @charset rule. + + Inherits properties from CSSRule + + Format + ====== + charsetrule: + CHARSET_SYM S* STRING S* ';' + + BUT: Only valid format is: + @charset "ENCODING"; + """ + type = property(lambda self: cssrule.CSSRule.CHARSET_RULE) + + def __init__(self, encoding=None, parentRule=None, + parentStyleSheet=None, readonly=False): + """ + encoding: + a valid character encoding + readonly: + defaults to False, not used yet + + if readonly allows setting of properties in constructor only + """ + super(CSSCharsetRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = '@charset' + self._encoding = None + if encoding: + self.encoding = encoding + + self._readonly = readonly + + def _getCssText(self): + """returns serialized property cssText""" + return cssutils.ser.do_CSSCharsetRule(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - INVALID_MODIFICATION_ERR: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - HIERARCHY_REQUEST_ERR: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSCharsetRule, self)._setCssText(cssText) + + wellformed = True + tokenizer = self._tokenize2(cssText) + + if self._type(self._nexttoken(tokenizer)) != self._prods.CHARSET_SYM: + wellformed = False + self._log.error(u'CSSCharsetRule must start with "@charset "', + error=xml.dom.InvalidModificationErr) + + encodingtoken = self._nexttoken(tokenizer) + encodingtype = self._type(encodingtoken) + encoding = self._stringtokenvalue(encodingtoken) + if self._prods.STRING != encodingtype or not encoding: + wellformed = False + self._log.error(u'CSSCharsetRule: no encoding found; %r.' % + self._valuestr(cssText)) + + semicolon = self._tokenvalue(self._nexttoken(tokenizer)) + EOFtype = self._type(self._nexttoken(tokenizer)) + if u';' != semicolon or EOFtype not in ('EOF', None): + wellformed = False + self._log.error(u'CSSCharsetRule: Syntax Error: %r.' % + self._valuestr(cssText)) + + if wellformed: + self.encoding = encoding + + cssText = property(fget=_getCssText, fset=_setCssText, + doc="(DOM) The parsable textual representation.") + + def _setEncoding(self, encoding): + """ + DOMException on setting + + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if this encoding rule is readonly. + - SYNTAX_ERR: (self) + Raised if the specified encoding value has a syntax error and + is unparsable. + Currently only valid Python encodings are allowed. + """ + self._checkReadonly() + tokenizer = self._tokenize2(encoding) + encodingtoken = self._nexttoken(tokenizer) + unexpected = self._nexttoken(tokenizer) + + valid = True + if not encodingtoken or unexpected or\ + self._prods.IDENT != self._type(encodingtoken): + valid = False + self._log.error( + 'CSSCharsetRule: Syntax Error in encoding value %r.' % + encoding) + else: + try: + codecs.lookup(encoding) + except LookupError: + valid = False + self._log.error('CSSCharsetRule: Unknown (Python) encoding %r.' % + encoding) + else: + self._encoding = encoding.lower() + + encoding = property(lambda self: self._encoding, _setEncoding, + doc="(DOM)The encoding information used in this @charset rule.") + + wellformed = property(lambda self: bool(self.encoding)) + + def __repr__(self): + return "cssutils.css.%s(encoding=%r)" % ( + self.__class__.__name__, self.encoding) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.encoding, id(self)) diff --git a/src/calibre/utils/cssutils/css/csscomment.py b/src/calibre/utils/cssutils/css/csscomment.py new file mode 100644 index 0000000000..e78d664107 --- /dev/null +++ b/src/calibre/utils/cssutils/css/csscomment.py @@ -0,0 +1,92 @@ +"""CSSComment is not defined in DOM Level 2 at all but a cssutils defined +class only. +Implements CSSRule which is also extended for a CSSComment rule type +""" +__all__ = ['CSSComment'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: csscomment.py 1170 2008-03-20 17:42:07Z cthedot $' + +import xml.dom +import cssrule +import cssutils + +class CSSComment(cssrule.CSSRule): + """ + (cssutils) a CSS comment + + Properties + ========== + cssText: of type DOMString + The comment text including comment delimiters + + Inherits properties from CSSRule + + Format + ====== + :: + + /*...*/ + """ + type = property(lambda self: cssrule.CSSRule.COMMENT) # value = -1 + # constant but needed: + wellformed = True + + def __init__(self, cssText=None, parentRule=None, + parentStyleSheet=None, readonly=False): + super(CSSComment, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + + self._cssText = None + if cssText: + self._setCssText(cssText) + + self._readonly = readonly + + def _getCssText(self): + """returns serialized property cssText""" + return cssutils.ser.do_CSSComment(self) + + def _setCssText(self, cssText): + """ + cssText + textual text to set or tokenlist which is not tokenized + anymore. May also be a single token for this rule + parser + if called from cssparser directly this is Parser instance + + DOMException on setting + + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - INVALID_MODIFICATION_ERR: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSComment, self)._setCssText(cssText) + tokenizer = self._tokenize2(cssText) + + commenttoken = self._nexttoken(tokenizer) + unexpected = self._nexttoken(tokenizer) + + if not commenttoken or\ + self._type(commenttoken) != self._prods.COMMENT or\ + unexpected: + self._log.error(u'CSSComment: Not a CSSComment: %r' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + self._cssText = self._tokenvalue(commenttoken) + + cssText = property(_getCssText, _setCssText, + doc=u"(cssutils) Textual representation of this comment") + + def __repr__(self): + return "cssutils.css.%s(cssText=%r)" % ( + self.__class__.__name__, self.cssText) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.cssText, id(self)) diff --git a/src/calibre/utils/cssutils/css/cssfontfacerule.py b/src/calibre/utils/cssutils/css/cssfontfacerule.py new file mode 100644 index 0000000000..1f839c6779 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssfontfacerule.py @@ -0,0 +1,163 @@ +"""CSSFontFaceRule implements DOM Level 2 CSS CSSFontFaceRule. +""" +__all__ = ['CSSFontFaceRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssfontfacerule.py 1284 2008-06-05 16:29:17Z cthedot $' + +import xml.dom +import cssrule +import cssutils +from cssstyledeclaration import CSSStyleDeclaration + +class CSSFontFaceRule(cssrule.CSSRule): + """ + The CSSFontFaceRule interface represents a @font-face rule in a CSS + style sheet. The @font-face rule is used to hold a set of font + descriptions. + + Properties + ========== + atkeyword (cssutils only) + the literal keyword used + cssText: of type DOMString + The parsable textual representation of this rule + style: of type CSSStyleDeclaration + The declaration-block of this rule. + + Inherits properties from CSSRule + + Format + ====== + :: + + font_face + : FONT_FACE_SYM S* + '{' S* declaration [ ';' S* declaration ]* '}' S* + ; + """ + type = property(lambda self: cssrule.CSSRule.FONT_FACE_RULE) + # constant but needed: + wellformed = True + + def __init__(self, style=None, parentRule=None, + parentStyleSheet=None, readonly=False): + """ + if readonly allows setting of properties in constructor only + + style + CSSStyleDeclaration for this CSSStyleRule + """ + super(CSSFontFaceRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = u'@font-face' + if style: + self.style = style + else: + self._style = CSSStyleDeclaration(parentRule=self) + + self._readonly = readonly + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_CSSFontFaceRule(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + - SYNTAX_ERR: (self, StyleDeclaration) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - INVALID_MODIFICATION_ERR: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - HIERARCHY_REQUEST_ERR: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSFontFaceRule, self)._setCssText(cssText) + + tokenizer = self._tokenize2(cssText) + attoken = self._nexttoken(tokenizer, None) + if self._type(attoken) != self._prods.FONT_FACE_SYM: + self._log.error(u'CSSFontFaceRule: No CSSFontFaceRule found: %s' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + wellformed = True + beforetokens, brace = self._tokensupto2(tokenizer, + blockstartonly=True, + separateEnd=True) + if self._tokenvalue(brace) != u'{': + wellformed = False + self._log.error( + u'CSSFontFaceRule: No start { of style declaration found: %r' % + self._valuestr(cssText), brace) + + # parse stuff before { which should be comments and S only + new = {'wellformed': True} + newseq = self._tempSeq()#[] + + beforewellformed, expected = self._parse(expected=':', + seq=newseq, tokenizer=self._tokenize2(beforetokens), + productions={}) + wellformed = wellformed and beforewellformed and new['wellformed'] + + styletokens, braceorEOFtoken = self._tokensupto2(tokenizer, + blockendonly=True, + separateEnd=True) + + val, typ = self._tokenvalue(braceorEOFtoken), self._type(braceorEOFtoken) + if val != u'}' and typ != 'EOF': + wellformed = False + self._log.error( + u'CSSFontFaceRule: No "}" after style declaration found: %r' % + self._valuestr(cssText)) + + nonetoken = self._nexttoken(tokenizer) + if nonetoken: + wellformed = False + self._log.error(u'CSSFontFaceRule: Trailing content found.', + token=nonetoken) + + newstyle = CSSStyleDeclaration() + if 'EOF' == typ: + # add again as style needs it + styletokens.append(braceorEOFtoken) + newstyle.cssText = styletokens + + if wellformed: + self.style = newstyle + self._setSeq(newseq) # contains (probably comments) upto { only + + cssText = property(_getCssText, _setCssText, + doc="(DOM) The parsable textual representation of the rule.") + + def _getStyle(self): + return self._style + + def _setStyle(self, style): + """ + style + StyleDeclaration or string + """ + self._checkReadonly() + if isinstance(style, basestring): + self._style = CSSStyleDeclaration(parentRule=self, cssText=style) + else: + self._style._seq = style.seq + + style = property(_getStyle, _setStyle, + doc="(DOM) The declaration-block of this rule set.") + + def __repr__(self): + return "cssutils.css.%s(style=%r)" % ( + self.__class__.__name__, self.style.cssText) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.style.cssText, id(self)) diff --git a/src/calibre/utils/cssutils/css/cssimportrule.py b/src/calibre/utils/cssutils/css/cssimportrule.py new file mode 100644 index 0000000000..f619353e48 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssimportrule.py @@ -0,0 +1,399 @@ +"""CSSImportRule implements DOM Level 2 CSS CSSImportRule. + +plus: + +``name`` property + http://www.w3.org/TR/css3-cascade/#cascading + +""" +__all__ = ['CSSImportRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssimportrule.py 1401 2008-07-29 21:07:54Z cthedot $' + +import os +import urllib +import urlparse +import xml.dom +import cssrule +import cssutils + +class CSSImportRule(cssrule.CSSRule): + """ + Represents an @import rule within a CSS style sheet. The @import rule + is used to import style rules from other style sheets. + + Properties + ========== + atkeyword: (cssutils only) + the literal keyword used + cssText: of type DOMString + The parsable textual representation of this rule + href: of type DOMString, (DOM readonly, cssutils also writable) + The location of the style sheet to be imported. The attribute will + not contain the url(...) specifier around the URI. + hreftype: 'uri' (serializer default) or 'string' (cssutils only) + The original type of href, not really relevant as it may be + reconfigured in the serializer but it is kept anyway + media: of type stylesheets::MediaList (DOM readonly) + A list of media types for this rule of type MediaList. + name: + An optional name used for cascading + styleSheet: of type CSSStyleSheet (DOM readonly) + The style sheet referred to by this rule. The value of this + attribute is None if the style sheet has not yet been loaded or if + it will not be loaded (e.g. if the stylesheet is for a media type + not supported by the user agent). + + Inherits properties from CSSRule + + Format + ====== + import + : IMPORT_SYM S* + [STRING|URI] S* [ medium [ COMMA S* medium]* ]? S* STRING? S* ';' S* + ; + """ + type = property(lambda self: cssrule.CSSRule.IMPORT_RULE) + + def __init__(self, href=None, mediaText=u'all', name=None, + parentRule=None, parentStyleSheet=None, readonly=False): + """ + if readonly allows setting of properties in constructor only + + Do not use as positional but as keyword attributes only! + + href + location of the style sheet to be imported. + mediaText + A list of media types for which this style sheet may be used + as a string + """ + super(CSSImportRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = u'@import' + self.hreftype = None + self._styleSheet = None + + self._href = None + self.href = href + + self._media = cssutils.stylesheets.MediaList() + if mediaText: + self._media.mediaText = mediaText + + self._name = name + + seq = self._tempSeq() + seq.append(self.href, 'href') + seq.append(self.media, 'media') + seq.append(self.name, 'name') + self._setSeq(seq) + self._readonly = readonly + + _usemedia = property(lambda self: self.media.mediaText not in (u'', u'all'), + doc="if self._media is used (or simply empty)") + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_CSSImportRule(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + - HIERARCHY_REQUEST_ERR: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - INVALID_MODIFICATION_ERR: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error and + is unparsable. + """ + super(CSSImportRule, self)._setCssText(cssText) + tokenizer = self._tokenize2(cssText) + attoken = self._nexttoken(tokenizer, None) + if self._type(attoken) != self._prods.IMPORT_SYM: + self._log.error(u'CSSImportRule: No CSSImportRule found: %s' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + # for closures: must be a mutable + new = {'keyword': self._tokenvalue(attoken), + 'href': None, + 'hreftype': None, + 'media': None, + 'name': None, + 'wellformed': True + } + + def __doname(seq, token): + # called by _string or _ident + new['name'] = self._stringtokenvalue(token) + seq.append(new['name'], 'name') + return ';' + + def _string(expected, seq, token, tokenizer=None): + if 'href' == expected: + # href + new['href'] = self._stringtokenvalue(token) + new['hreftype'] = 'string' + seq.append(new['href'], 'href') + return 'media name ;' + elif 'name' in expected: + # name + return __doname(seq, token) + else: + new['wellformed'] = False + self._log.error( + u'CSSImportRule: Unexpected string.', token) + return expected + + def _uri(expected, seq, token, tokenizer=None): + # href + if 'href' == expected: + uri = self._uritokenvalue(token) + new['hreftype'] = 'uri' + new['href'] = uri + seq.append(new['href'], 'href') + return 'media name ;' + else: + new['wellformed'] = False + self._log.error( + u'CSSImportRule: Unexpected URI.', token) + return expected + + def _ident(expected, seq, token, tokenizer=None): + # medialist ending with ; which is checked upon too + if expected.startswith('media'): + mediatokens = self._tokensupto2( + tokenizer, importmediaqueryendonly=True) + mediatokens.insert(0, token) # push found token + + last = mediatokens.pop() # retrieve ; + lastval, lasttyp = self._tokenvalue(last), self._type(last) + if lastval != u';' and lasttyp not in ('EOF', self._prods.STRING): + new['wellformed'] = False + self._log.error(u'CSSImportRule: No ";" found: %s' % + self._valuestr(cssText), token=token) + + media = cssutils.stylesheets.MediaList() + media.mediaText = mediatokens + if media.wellformed: + new['media'] = media + seq.append(media, 'media') + else: + new['wellformed'] = False + self._log.error(u'CSSImportRule: Invalid MediaList: %s' % + self._valuestr(cssText), token=token) + + if lasttyp == self._prods.STRING: + # name + return __doname(seq, last) + else: + return 'EOF' # ';' is token "last" + else: + new['wellformed'] = False + self._log.error( + u'CSSImportRule: Unexpected ident.', token) + return expected + + def _char(expected, seq, token, tokenizer=None): + # final ; + val = self._tokenvalue(token) + if expected.endswith(';') and u';' == val: + return 'EOF' + else: + new['wellformed'] = False + self._log.error( + u'CSSImportRule: Unexpected char.', token) + return expected + + # import : IMPORT_SYM S* [STRING|URI] + # S* [ medium [ ',' S* medium]* ]? ';' S* + # STRING? # see http://www.w3.org/TR/css3-cascade/#cascading + # ; + newseq = self._tempSeq() + wellformed, expected = self._parse(expected='href', + seq=newseq, tokenizer=tokenizer, + productions={'STRING': _string, + 'URI': _uri, + 'IDENT': _ident, + 'CHAR': _char}, + new=new) + + # wellformed set by parse + wellformed = wellformed and new['wellformed'] + + # post conditions + if not new['href']: + wellformed = False + self._log.error(u'CSSImportRule: No href found: %s' % + self._valuestr(cssText)) + + if expected != 'EOF': + wellformed = False + self._log.error(u'CSSImportRule: No ";" found: %s' % + self._valuestr(cssText)) + + # set all + if wellformed: + self.atkeyword = new['keyword'] + self.hreftype = new['hreftype'] + if new['media']: + # use same object + self.media.mediaText = new['media'].mediaText + # put it in newseq too + for index, x in enumerate(newseq): + if x.type == 'media': + newseq.replace(index, self.media, + x.type, x.line, x.col) + break + else: + # reset media + self.media.mediaText = u'all' + newseq.append(self.media, 'media') + self.name = new['name'] + self._setSeq(newseq) + self.href = new['href'] + + if self.styleSheet: + # title is set by href + #self.styleSheet._href = self.href + self.styleSheet._parentStyleSheet = self.parentStyleSheet + + cssText = property(fget=_getCssText, fset=_setCssText, + doc="(DOM attribute) The parsable textual representation.") + + def _setHref(self, href): + # update seq + for i, item in enumerate(self.seq): + val, typ = item.value, item.type + if 'href' == typ: + self._seq[i] = (href, typ, item.line, item.col) + break + else: + seq = self._tempSeq() + seq.append(self.href, 'href') + self._setSeq(seq) + # set new href + self._href = href + if not self.styleSheet: + # set only if not set before + self.__setStyleSheet() + + href = property(lambda self: self._href, _setHref, + doc="Location of the style sheet to be imported.") + + media = property(lambda self: self._media, + doc=u"(DOM readonly) A list of media types for this rule" + " of type MediaList") + + def _setName(self, name): + """raises xml.dom.SyntaxErr if name is not a string""" + if isinstance(name, basestring) or name is None: + # "" or '' + if not name: + name = None + # update seq + for i, item in enumerate(self.seq): + val, typ = item.value, item.type + if 'name' == typ: + self._seq[i] = (name, typ, item.line, item.col) + break + else: + # append + seq = self._tempSeq() + for item in self.seq: + # copy current seq + seq.append(item.value, item.type, item.line, item.col) + seq.append(name, 'name') + self._setSeq(seq) + self._name = name + # set title of referred sheet + if self.styleSheet: + self.styleSheet.title = name + else: + self._log.error(u'CSSImportRule: Not a valid name: %s' % name) + + name = property(lambda self: self._name, _setName, + doc=u"An optional name for the imported sheet") + + def __setStyleSheet(self): + """Read new CSSStyleSheet cssText from href using parentStyleSheet.href + + Indirectly called if setting ``href``. In case of any error styleSheet + is set to ``None``. + """ + # should simply fail so all errors are catched! + if self.parentStyleSheet and self.href: + # relative href + parentHref = self.parentStyleSheet.href + if parentHref is None: + # use cwd instead + parentHref = u'file:' + urllib.pathname2url(os.getcwd()) + '/' + href = urlparse.urljoin(parentHref, self.href) + + # all possible exceptions are ignored (styleSheet is None then) + try: + usedEncoding, enctype, cssText = self.parentStyleSheet._resolveImport(href) + if cssText is None: + # catched in next except below! + raise IOError('Cannot read Stylesheet.') + styleSheet = cssutils.css.CSSStyleSheet(href=href, + media=self.media, + ownerRule=self, + title=self.name) + # inherit fetcher for @imports in styleSheet + styleSheet._setFetcher(self.parentStyleSheet._fetcher) + # contentEncoding with parentStyleSheet.overrideEncoding, + # HTTP or parent + encodingOverride, encoding = None, None + if enctype == 0: + encodingOverride = usedEncoding + elif 5 > enctype > 0: + encoding = usedEncoding + + styleSheet._setCssTextWithEncodingOverride(cssText, + encodingOverride=encodingOverride, + encoding=encoding) + + except (OSError, IOError, ValueError), e: + self._log.warn(u'CSSImportRule: While processing imported style sheet href=%r: %r' + % (self.href, e), neverraise=True) + else: + self._styleSheet = styleSheet + + styleSheet = property(lambda self: self._styleSheet, + doc="(readonly) The style sheet referred to by this rule.") + + def _getWellformed(self): + "depending if media is used at all" + if self._usemedia: + return bool(self.href and self.media.wellformed) + else: + return bool(self.href) + + wellformed = property(_getWellformed) + + def __repr__(self): + if self._usemedia: + mediaText = self.media.mediaText + else: + mediaText = None + return "cssutils.css.%s(href=%r, mediaText=%r, name=%r)" % ( + self.__class__.__name__, + self.href, self.media.mediaText, self.name) + + def __str__(self): + if self._usemedia: + mediaText = self.media.mediaText + else: + mediaText = None + return "" % ( + self.__class__.__name__, self.href, mediaText, self.name, id(self)) diff --git a/src/calibre/utils/cssutils/css/cssmediarule.py b/src/calibre/utils/cssutils/css/cssmediarule.py new file mode 100644 index 0000000000..ee839b5c1d --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssmediarule.py @@ -0,0 +1,349 @@ +"""CSSMediaRule implements DOM Level 2 CSS CSSMediaRule. +""" +__all__ = ['CSSMediaRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssmediarule.py 1370 2008-07-14 20:15:03Z cthedot $' + +import xml.dom +import cssrule +import cssutils + +class CSSMediaRule(cssrule.CSSRule): + """ + Objects implementing the CSSMediaRule interface can be identified by the + MEDIA_RULE constant. On these objects the type attribute must return the + value of that constant. + + Properties + ========== + atkeyword: (cssutils only) + the literal keyword used + cssRules: A css::CSSRuleList of all CSS rules contained within the + media block. + cssText: of type DOMString + The parsable textual representation of this rule + media: of type stylesheets::MediaList, (DOM readonly) + A list of media types for this rule of type MediaList. + name: + An optional name used for cascading + + Format + ====== + media + : MEDIA_SYM S* medium [ COMMA S* medium ]* + + STRING? # the name + + LBRACE S* ruleset* '}' S*; + """ + # CONSTANT + type = property(lambda self: cssrule.CSSRule.MEDIA_RULE) + + def __init__(self, mediaText='all', name=None, + parentRule=None, parentStyleSheet=None, readonly=False): + """constructor""" + super(CSSMediaRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = u'@media' + self._media = cssutils.stylesheets.MediaList(mediaText, + readonly=readonly) + self.name = name + self.cssRules = cssutils.css.cssrulelist.CSSRuleList() + self.cssRules.append = self.insertRule + self.cssRules.extend = self.insertRule + self.cssRules.__delitem__ == self.deleteRule + + self._readonly = readonly + + def __iter__(self): + """generator which iterates over cssRules.""" + for rule in self.cssRules: + yield rule + + def _getCssText(self): + """return serialized property cssText""" + return cssutils.ser.do_CSSMediaRule(self) + + def _setCssText(self, cssText): + """ + :param cssText: + a parseable string or a tuple of (cssText, dict-of-namespaces) + :Exceptions: + - `NAMESPACE_ERR`: (Selector) + Raised if a specified selector uses an unknown namespace + prefix. + - `SYNTAX_ERR`: (self, StyleDeclaration, etc) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - `INVALID_MODIFICATION_ERR`: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - `HIERARCHY_REQUEST_ERR`: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - `NO_MODIFICATION_ALLOWED_ERR`: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSMediaRule, self)._setCssText(cssText) + + # might be (cssText, namespaces) + cssText, namespaces = self._splitNamespacesOff(cssText) + try: + # use parent style sheet ones if available + namespaces = self.parentStyleSheet.namespaces + except AttributeError: + pass + + tokenizer = self._tokenize2(cssText) + attoken = self._nexttoken(tokenizer, None) + if self._type(attoken) != self._prods.MEDIA_SYM: + self._log.error(u'CSSMediaRule: No CSSMediaRule found: %s' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + # media "name"? { cssRules } + + # media + wellformed = True + mediatokens, end = self._tokensupto2(tokenizer, + mediaqueryendonly=True, + separateEnd=True) + if u'{' == self._tokenvalue(end) or self._prods.STRING == self._type(end): + newmedia = cssutils.stylesheets.MediaList() + newmedia.mediaText = mediatokens + + # name (optional) + name = None + nameseq = self._tempSeq() + if self._prods.STRING == self._type(end): + name = self._stringtokenvalue(end) + # TODO: for now comments are lost after name + nametokens, end = self._tokensupto2(tokenizer, + blockstartonly=True, + separateEnd=True) + wellformed, expected = self._parse(None, nameseq, nametokens, {}) + if not wellformed: + self._log.error(u'CSSMediaRule: Syntax Error: %s' % + self._valuestr(cssText)) + + + # check for { + if u'{' != self._tokenvalue(end): + self._log.error(u'CSSMediaRule: No "{" found: %s' % + self._valuestr(cssText)) + return + + # cssRules + cssrulestokens, braceOrEOF = self._tokensupto2(tokenizer, + mediaendonly=True, + separateEnd=True) + nonetoken = self._nexttoken(tokenizer, None) + if (u'}' != self._tokenvalue(braceOrEOF) and + 'EOF' != self._type(braceOrEOF)): + self._log.error(u'CSSMediaRule: No "}" found.', + token=braceOrEOF) + elif nonetoken: + self._log.error(u'CSSMediaRule: Trailing content found.', + token=nonetoken) + else: + # for closures: must be a mutable + newcssrules = [] #cssutils.css.CSSRuleList() + new = {'wellformed': True } + + def ruleset(expected, seq, token, tokenizer): + rule = cssutils.css.CSSStyleRule(parentRule=self) + rule.cssText = (self._tokensupto2(tokenizer, token), + namespaces) + if rule.wellformed: + rule._parentStyleSheet=self.parentStyleSheet + seq.append(rule) + return expected + + def atrule(expected, seq, token, tokenizer): + # TODO: get complete rule! + tokens = self._tokensupto2(tokenizer, token) + atval = self._tokenvalue(token) + if atval in ('@charset ', '@font-face', '@import', '@namespace', + '@page', '@media'): + self._log.error( + u'CSSMediaRule: This rule is not allowed in CSSMediaRule - ignored: %s.' + % self._valuestr(tokens), + token = token, + error=xml.dom.HierarchyRequestErr) + else: + rule = cssutils.css.CSSUnknownRule(parentRule=self, + parentStyleSheet=self.parentStyleSheet) + rule.cssText = tokens + if rule.wellformed: + seq.append(rule) + return expected + + def COMMENT(expected, seq, token, tokenizer=None): + seq.append(cssutils.css.CSSComment([token])) + return expected + + tokenizer = (t for t in cssrulestokens) # TODO: not elegant! + wellformed, expected = self._parse(braceOrEOF, + newcssrules, + tokenizer, { + 'COMMENT': COMMENT, + 'CHARSET_SYM': atrule, + 'FONT_FACE_SYM': atrule, + 'IMPORT_SYM': atrule, + 'NAMESPACE_SYM': atrule, + 'PAGE_SYM': atrule, + 'MEDIA_SYM': atrule, + 'ATKEYWORD': atrule + }, + default=ruleset, + new=new) + + # no post condition + if newmedia.wellformed and wellformed: + # keep reference + self._media.mediaText = newmedia.mediaText + self.name = name + self._setSeq(nameseq) + del self.cssRules[:] + for r in newcssrules: + self.cssRules.append(r) + + cssText = property(_getCssText, _setCssText, + doc="(DOM attribute) The parsable textual representation.") + + def _setName(self, name): + if isinstance(name, basestring) or name is None: + # "" or '' + if not name: + name = None + + self._name = name + else: + self._log.error(u'CSSImportRule: Not a valid name: %s' % name) + + + name = property(lambda self: self._name, _setName, + doc=u"An optional name for the media rules") + + media = property(lambda self: self._media, + doc=u"(DOM readonly) A list of media types for this rule of type\ + MediaList") + + wellformed = property(lambda self: self.media.wellformed) + + def deleteRule(self, index): + """ + index + within the media block's rule collection of the rule to remove. + + Used to delete a rule from the media block. + + DOMExceptions + + - INDEX_SIZE_ERR: (self) + Raised if the specified index does not correspond to a rule in + the media rule list. + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this media rule is readonly. + """ + self._checkReadonly() + + try: + self.cssRules[index]._parentRule = None # detach + del self.cssRules[index] # remove from @media + except IndexError: + raise xml.dom.IndexSizeErr( + u'CSSMediaRule: %s is not a valid index in the rulelist of length %i' % ( + index, self.cssRules.length)) + + def add(self, rule): + """Add rule to end of this mediarule. Same as ``.insertRule(rule)``.""" + self.insertRule(rule, index=None) + + def insertRule(self, rule, index=None): + """ + rule + The parsable text representing the rule. For rule sets this + contains both the selector and the style declaration. For + at-rules, this specifies both the at-identifier and the rule + content. + + cssutils also allows rule to be a valid **CSSRule** object + + index + within the media block's rule collection of the rule before + which to insert the specified rule. If the specified index is + equal to the length of the media blocks's rule collection, the + rule will be added to the end of the media block. + If index is not given or None rule will be appended to rule + list. + + Used to insert a new rule into the media block. + + DOMException on setting + + - HIERARCHY_REQUEST_ERR: + (no use case yet as no @charset or @import allowed)) + Raised if the rule cannot be inserted at the specified index, + e.g., if an @import rule is inserted after a standard rule set + or other at-rule. + - INDEX_SIZE_ERR: (self) + Raised if the specified index is not a valid insertion point. + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this media rule is readonly. + - SYNTAX_ERR: (CSSStyleRule) + Raised if the specified rule has a syntax error and is + unparsable. + + returns the index within the media block's rule collection of the + newly inserted rule. + + """ + self._checkReadonly() + + # check position + if index is None: + index = len(self.cssRules) + elif index < 0 or index > self.cssRules.length: + raise xml.dom.IndexSizeErr( + u'CSSMediaRule: Invalid index %s for CSSRuleList with a length of %s.' % ( + index, self.cssRules.length)) + + # parse + if isinstance(rule, basestring): + tempsheet = cssutils.css.CSSStyleSheet() + tempsheet.cssText = rule + if len(tempsheet.cssRules) != 1 or (tempsheet.cssRules and + not isinstance(tempsheet.cssRules[0], cssutils.css.CSSRule)): + self._log.error(u'CSSMediaRule: Invalid Rule: %s' % rule) + return + rule = tempsheet.cssRules[0] + elif not isinstance(rule, cssutils.css.CSSRule): + self._log.error(u'CSSMediaRule: Not a CSSRule: %s' % rule) + return + + # CHECK HIERARCHY + # @charset @import @page @namespace @media + if isinstance(rule, cssutils.css.CSSCharsetRule) or \ + isinstance(rule, cssutils.css.CSSFontFaceRule) or \ + isinstance(rule, cssutils.css.CSSImportRule) or \ + isinstance(rule, cssutils.css.CSSNamespaceRule) or \ + isinstance(rule, cssutils.css.CSSPageRule) or \ + isinstance(rule, CSSMediaRule): + self._log.error(u'CSSMediaRule: This type of rule is not allowed here: %s' % + rule.cssText, + error=xml.dom.HierarchyRequestErr) + return + + self.cssRules.insert(index, rule) + rule._parentRule = self + rule._parentStyleSheet = self.parentStyleSheet + return index + + def __repr__(self): + return "cssutils.css.%s(mediaText=%r)" % ( + self.__class__.__name__, self.media.mediaText) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.media.mediaText, id(self)) diff --git a/src/calibre/utils/cssutils/css/cssnamespacerule.py b/src/calibre/utils/cssutils/css/cssnamespacerule.py new file mode 100644 index 0000000000..370dda9af0 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssnamespacerule.py @@ -0,0 +1,306 @@ +"""CSSNamespaceRule currently implements +http://dev.w3.org/csswg/css3-namespace/ + +(until 0.9.5a2: http://www.w3.org/TR/2006/WD-css3-namespace-20060828/) +""" +__all__ = ['CSSNamespaceRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssnamespacerule.py 1305 2008-06-22 18:42:51Z cthedot $' + +import xml.dom +import cssrule +import cssutils +from cssutils.helper import Deprecated + +class CSSNamespaceRule(cssrule.CSSRule): + """ + Represents an @namespace rule within a CSS style sheet. + + The @namespace at-rule declares a namespace prefix and associates + it with a given namespace (a string). This namespace prefix can then be + used in namespace-qualified names such as those described in the + Selectors Module [SELECT] or the Values and Units module [CSS3VAL]. + + Properties + ========== + atkeyword (cssutils only) + the literal keyword used + cssText: of type DOMString + The parsable textual representation of this rule + namespaceURI: of type DOMString + The namespace URI (a simple string!) which is bound to the given + prefix. If no prefix is set (``CSSNamespaceRule.prefix==''``) + the namespace defined by ``namespaceURI`` is set as the default + namespace. + prefix: of type DOMString + The prefix used in the stylesheet for the given + ``CSSNamespaceRule.nsuri``. If prefix is empty namespaceURI sets a + default namespace for the stylesheet. + + Inherits properties from CSSRule + + Format + ====== + namespace + : NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S* + ; + namespace_prefix + : IDENT + ; + """ + type = property(lambda self: cssrule.CSSRule.NAMESPACE_RULE) + + def __init__(self, namespaceURI=None, prefix=None, cssText=None, + parentRule=None, parentStyleSheet=None, readonly=False): + """ + :Parameters: + namespaceURI + The namespace URI (a simple string!) which is bound to the + given prefix. If no prefix is set + (``CSSNamespaceRule.prefix==''``) the namespace defined by + namespaceURI is set as the default namespace + prefix + The prefix used in the stylesheet for the given + ``CSSNamespaceRule.uri``. + cssText + if no namespaceURI is given cssText must be given to set + a namespaceURI as this is readonly later on + parentStyleSheet + sheet where this rule belongs to + + Do not use as positional but as keyword parameters only! + + If readonly allows setting of properties in constructor only + + format namespace:: + + namespace + : NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S* + ; + namespace_prefix + : IDENT + ; + """ + super(CSSNamespaceRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = u'@namespace' + self._prefix = u'' + self._namespaceURI = None + + if namespaceURI: + self.namespaceURI = namespaceURI + self.prefix = prefix + tempseq = self._tempSeq() + tempseq.append(self.prefix, 'prefix') + tempseq.append(self.namespaceURI, 'namespaceURI') + self._setSeq(tempseq) + elif cssText is not None: + self.cssText = cssText + + if parentStyleSheet: + self._parentStyleSheet = parentStyleSheet + + self._readonly = readonly + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_CSSNamespaceRule(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + :param cssText: initial value for this rules cssText which is parsed + :Exceptions: + - `HIERARCHY_REQUEST_ERR`: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - `INVALID_MODIFICATION_ERR`: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - `NO_MODIFICATION_ALLOWED_ERR`: (CSSRule) + Raised if the rule is readonly. + - `SYNTAX_ERR`: (self) + Raised if the specified CSS string value has a syntax error and + is unparsable. + """ + super(CSSNamespaceRule, self)._setCssText(cssText) + tokenizer = self._tokenize2(cssText) + attoken = self._nexttoken(tokenizer, None) + if self._type(attoken) != self._prods.NAMESPACE_SYM: + self._log.error(u'CSSNamespaceRule: No CSSNamespaceRule found: %s' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + # for closures: must be a mutable + new = {'keyword': self._tokenvalue(attoken), + 'prefix': u'', + 'uri': None, + 'wellformed': True + } + + def _ident(expected, seq, token, tokenizer=None): + # the namespace prefix, optional + if 'prefix or uri' == expected: + new['prefix'] = self._tokenvalue(token) + seq.append(new['prefix'], 'prefix') + return 'uri' + else: + new['wellformed'] = False + self._log.error( + u'CSSNamespaceRule: Unexpected ident.', token) + return expected + + def _string(expected, seq, token, tokenizer=None): + # the namespace URI as a STRING + if expected.endswith('uri'): + new['uri'] = self._stringtokenvalue(token) + seq.append(new['uri'], 'namespaceURI') + return ';' + + else: + new['wellformed'] = False + self._log.error( + u'CSSNamespaceRule: Unexpected string.', token) + return expected + + def _uri(expected, seq, token, tokenizer=None): + # the namespace URI as URI which is DEPRECATED + if expected.endswith('uri'): + uri = self._uritokenvalue(token) + new['uri'] = uri + seq.append(new['uri'], 'namespaceURI') + return ';' + else: + new['wellformed'] = False + self._log.error( + u'CSSNamespaceRule: Unexpected URI.', token) + return expected + + def _char(expected, seq, token, tokenizer=None): + # final ; + val = self._tokenvalue(token) + if ';' == expected and u';' == val: + return 'EOF' + else: + new['wellformed'] = False + self._log.error( + u'CSSNamespaceRule: Unexpected char.', token) + return expected + + # "NAMESPACE_SYM S* [namespace_prefix S*]? [STRING|URI] S* ';' S*" + newseq = self._tempSeq() + wellformed, expected = self._parse(expected='prefix or uri', + seq=newseq, tokenizer=tokenizer, + productions={'IDENT': _ident, + 'STRING': _string, + 'URI': _uri, + 'CHAR': _char}, + new=new) + + # wellformed set by parse + wellformed = wellformed and new['wellformed'] + + # post conditions + if new['uri'] is None: + wellformed = False + self._log.error(u'CSSNamespaceRule: No namespace URI found: %s' % + self._valuestr(cssText)) + + if expected != 'EOF': + wellformed = False + self._log.error(u'CSSNamespaceRule: No ";" found: %s' % + self._valuestr(cssText)) + + # set all + if wellformed: + self.atkeyword = new['keyword'] + self._prefix = new['prefix'] + self.namespaceURI = new['uri'] + self._setSeq(newseq) + + cssText = property(fget=_getCssText, fset=_setCssText, + doc="(DOM attribute) The parsable textual representation.") + + def _setNamespaceURI(self, namespaceURI): + """ + DOMException on setting + + :param namespaceURI: the initial value for this rules namespaceURI + :Exceptions: + - `NO_MODIFICATION_ALLOWED_ERR`: + (CSSRule) Raised if this rule is readonly or a namespaceURI is + already set in this rule. + """ + self._checkReadonly() + if not self._namespaceURI: + # initial setting + self._namespaceURI = namespaceURI + tempseq = self._tempSeq() + tempseq.append(namespaceURI, 'namespaceURI') + self._setSeq(tempseq) # makes seq readonly! + elif self._namespaceURI != namespaceURI: + self._log.error(u'CSSNamespaceRule: namespaceURI is readonly.', + error=xml.dom.NoModificationAllowedErr) + + namespaceURI = property(lambda self: self._namespaceURI, _setNamespaceURI, + doc="URI (string!) of the defined namespace.") + + def _setPrefix(self, prefix=None): + """ + DOMException on setting + + :param prefix: the new prefix + :Exceptions: + - `SYNTAX_ERR`: (TODO) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - `NO_MODIFICATION_ALLOWED_ERR`: CSSRule) + Raised if this rule is readonly. + """ + self._checkReadonly() + if not prefix: + prefix = u'' + else: + tokenizer = self._tokenize2(prefix) + prefixtoken = self._nexttoken(tokenizer, None) + if not prefixtoken or self._type(prefixtoken) != self._prods.IDENT: + self._log.error(u'CSSNamespaceRule: No valid prefix "%s".' % + self._valuestr(prefix), + error=xml.dom.SyntaxErr) + return + else: + prefix = self._tokenvalue(prefixtoken) + # update seg + for i, x in enumerate(self._seq): + if x == self._prefix: + self._seq[i] = (prefix, 'prefix', None, None) + break + else: + # put prefix at the beginning! + self._seq[0] = (prefix, 'prefix', None, None) + + # set new prefix + self._prefix = prefix + + prefix = property(lambda self: self._prefix, _setPrefix, + doc="Prefix used for the defined namespace.") + +# def _setParentStyleSheet(self, parentStyleSheet): +# self._parentStyleSheet = parentStyleSheet +# +# parentStyleSheet = property(lambda self: self._parentStyleSheet, +# _setParentStyleSheet, +# doc=u"Containing CSSStyleSheet.") + + wellformed = property(lambda self: self.namespaceURI is not None) + + def __repr__(self): + return "cssutils.css.%s(namespaceURI=%r, prefix=%r)" % ( + self.__class__.__name__, self.namespaceURI, self.prefix) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.namespaceURI, self.prefix, id(self)) diff --git a/src/calibre/utils/cssutils/css/csspagerule.py b/src/calibre/utils/cssutils/css/csspagerule.py new file mode 100644 index 0000000000..38bab03c27 --- /dev/null +++ b/src/calibre/utils/cssutils/css/csspagerule.py @@ -0,0 +1,286 @@ +"""CSSPageRule implements DOM Level 2 CSS CSSPageRule. +""" +__all__ = ['CSSPageRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: csspagerule.py 1284 2008-06-05 16:29:17Z cthedot $' + +import xml.dom +import cssrule +import cssutils +from selectorlist import SelectorList +from cssstyledeclaration import CSSStyleDeclaration + +class CSSPageRule(cssrule.CSSRule): + """ + The CSSPageRule interface represents a @page rule within a CSS style + sheet. The @page rule is used to specify the dimensions, orientation, + margins, etc. of a page box for paged media. + + Properties + ========== + atkeyword (cssutils only) + the literal keyword used + cssText: of type DOMString + The parsable textual representation of this rule + selectorText: of type DOMString + The parsable textual representation of the page selector for the rule. + style: of type CSSStyleDeclaration + The declaration-block of this rule. + + Inherits properties from CSSRule + + Format + ====== + :: + + page + : PAGE_SYM S* pseudo_page? S* + LBRACE S* declaration [ ';' S* declaration ]* '}' S* + ; + pseudo_page + : ':' IDENT # :first, :left, :right in CSS 2.1 + ; + + """ + type = property(lambda self: cssrule.CSSRule.PAGE_RULE) + # constant but needed: + wellformed = True + + def __init__(self, selectorText=None, style=None, parentRule=None, + parentStyleSheet=None, readonly=False): + """ + if readonly allows setting of properties in constructor only + + selectorText + type string + style + CSSStyleDeclaration for this CSSStyleRule + """ + super(CSSPageRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = u'@page' + tempseq = self._tempSeq() + if selectorText: + self.selectorText = selectorText + tempseq.append(self.selectorText, 'selectorText') + else: + self._selectorText = u'' + if style: + self.style = style + tempseq.append(self.style, 'style') + else: + self._style = CSSStyleDeclaration(parentRule=self) + self._setSeq(tempseq) + + self._readonly = readonly + + def __parseSelectorText(self, selectorText): + """ + parses selectorText which may also be a list of tokens + and returns (selectorText, seq) + + see _setSelectorText for details + """ + # for closures: must be a mutable + new = {'selector': None, 'wellformed': True} + + def _char(expected, seq, token, tokenizer=None): + # pseudo_page, :left, :right or :first + val = self._tokenvalue(token) + if ':' == expected and u':' == val: + try: + identtoken = tokenizer.next() + except StopIteration: + self._log.error( + u'CSSPageRule selectorText: No IDENT found.', token) + else: + ival, ityp = self._tokenvalue(identtoken), self._type(identtoken) + if self._prods.IDENT != ityp: + self._log.error( + u'CSSPageRule selectorText: Expected IDENT but found: %r' % + ival, token) + else: + new['selector'] = val + ival + seq.append(new['selector'], 'selector') + return 'EOF' + return expected + else: + new['wellformed'] = False + self._log.error( + u'CSSPageRule selectorText: Unexpected CHAR: %r' % val, token) + return expected + + def S(expected, seq, token, tokenizer=None): + "Does not raise if EOF is found." + return expected + + def COMMENT(expected, seq, token, tokenizer=None): + "Does not raise if EOF is found." + seq.append(cssutils.css.CSSComment([token]), 'COMMENT') + return expected + + newseq = self._tempSeq() + wellformed, expected = self._parse(expected=':', + seq=newseq, tokenizer=self._tokenize2(selectorText), + productions={'CHAR': _char, + 'COMMENT': COMMENT, + 'S': S}, + new=new) + wellformed = wellformed and new['wellformed'] + newselector = new['selector'] + + # post conditions + if expected == 'ident': + self._log.error( + u'CSSPageRule selectorText: No valid selector: %r' % + self._valuestr(selectorText)) + + if not newselector in (None, u':first', u':left', u':right'): + self._log.warn(u'CSSPageRule: Unknown CSS 2.1 @page selector: %r' % + newselector, neverraise=True) + + return newselector, newseq + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_CSSPageRule(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + - SYNTAX_ERR: (self, StyleDeclaration) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - INVALID_MODIFICATION_ERR: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - HIERARCHY_REQUEST_ERR: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSPageRule, self)._setCssText(cssText) + + tokenizer = self._tokenize2(cssText) + if self._type(self._nexttoken(tokenizer)) != self._prods.PAGE_SYM: + self._log.error(u'CSSPageRule: No CSSPageRule found: %s' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + wellformed = True + selectortokens, startbrace = self._tokensupto2(tokenizer, + blockstartonly=True, + separateEnd=True) + styletokens, braceorEOFtoken = self._tokensupto2(tokenizer, + blockendonly=True, + separateEnd=True) + nonetoken = self._nexttoken(tokenizer) + if self._tokenvalue(startbrace) != u'{': + wellformed = False + self._log.error( + u'CSSPageRule: No start { of style declaration found: %r' % + self._valuestr(cssText), startbrace) + elif nonetoken: + wellformed = False + self._log.error( + u'CSSPageRule: Trailing content found.', token=nonetoken) + + + newselector, newselectorseq = self.__parseSelectorText(selectortokens) + + newstyle = CSSStyleDeclaration() + val, typ = self._tokenvalue(braceorEOFtoken), self._type(braceorEOFtoken) + if val != u'}' and typ != 'EOF': + wellformed = False + self._log.error( + u'CSSPageRule: No "}" after style declaration found: %r' % + self._valuestr(cssText)) + else: + if 'EOF' == typ: + # add again as style needs it + styletokens.append(braceorEOFtoken) + newstyle.cssText = styletokens + + if wellformed: + self._selectorText = newselector # already parsed + self.style = newstyle + self._setSeq(newselectorseq) # contains upto style only + + cssText = property(_getCssText, _setCssText, + doc="(DOM) The parsable textual representation of the rule.") + + def _getSelectorText(self): + """ + wrapper for cssutils Selector object + """ + return self._selectorText + + def _setSelectorText(self, selectorText): + """ + wrapper for cssutils Selector object + + selector: DOM String + in CSS 2.1 one of + - :first + - :left + - :right + - empty + + If WS or Comments are included they are ignored here! Only + way to add a comment is via setting ``cssText`` + + DOMException on setting + + - SYNTAX_ERR: + Raised if the specified CSS string value has a syntax error + and is unparsable. + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this rule is readonly. + """ + self._checkReadonly() + + # may raise SYNTAX_ERR + newselectortext, newseq = self.__parseSelectorText(selectorText) + + if newselectortext: + for i, x in enumerate(self.seq): + if x == self._selectorText: + self.seq[i] = newselectortext + self._selectorText = newselectortext + + selectorText = property(_getSelectorText, _setSelectorText, + doc="""(DOM) The parsable textual representation of the page selector for the rule.""") + + def _getStyle(self): + + return self._style + + def _setStyle(self, style): + """ + style + StyleDeclaration or string + """ + self._checkReadonly() + + if isinstance(style, basestring): + self._style.cssText = style + else: + # cssText would be serialized with optional preferences + # so use seq! + self._style._seq = style.seq + + style = property(_getStyle, _setStyle, + doc="(DOM) The declaration-block of this rule set.") + + def __repr__(self): + return "cssutils.css.%s(selectorText=%r, style=%r)" % ( + self.__class__.__name__, self.selectorText, self.style.cssText) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.selectorText, self.style.cssText, + id(self)) diff --git a/src/calibre/utils/cssutils/css/cssproperties.py b/src/calibre/utils/cssutils/css/cssproperties.py new file mode 100644 index 0000000000..2d46ea4f59 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssproperties.py @@ -0,0 +1,349 @@ +"""CSS2Properties (partly!) implements DOM Level 2 CSS CSS2Properties used +by CSSStyleDeclaration + +TODO: CSS2Properties + If an implementation does implement this interface, it is expected to + understand the specific syntax of the shorthand properties, and apply + their semantics; when the margin property is set, for example, the + marginTop, marginRight, marginBottom and marginLeft properties are + actually being set by the underlying implementation. + + When dealing with CSS "shorthand" properties, the shorthand properties + should be decomposed into their component longhand properties as + appropriate, and when querying for their value, the form returned + should be the shortest form exactly equivalent to the declarations made + in the ruleset. However, if there is no shorthand declaration that + could be added to the ruleset without changing in any way the rules + already declared in the ruleset (i.e., by adding longhand rules that + were previously not declared in the ruleset), then the empty string + should be returned for the shorthand property. + + For example, querying for the font property should not return + "normal normal normal 14pt/normal Arial, sans-serif", when + "14pt Arial, sans-serif" suffices. (The normals are initial values, and + are implied by use of the longhand property.) + + If the values for all the longhand properties that compose a particular + string are the initial values, then a string consisting of all the + initial values should be returned (e.g. a border-width value of + "medium" should be returned as such, not as ""). + + For some shorthand properties that take missing values from other + sides, such as the margin, padding, and border-[width|style|color] + properties, the minimum number of sides possible should be used; i.e., + "0px 10px" will be returned instead of "0px 10px 0px 10px". + + If the value of a shorthand property can not be decomposed into its + component longhand properties, as is the case for the font property + with a value of "menu", querying for the values of the component + longhand properties should return the empty string. + +TODO: CSS2Properties DOMImplementation + The interface found within this section are not mandatory. A DOM + application can use the hasFeature method of the DOMImplementation + interface to determine whether it is supported or not. The feature + string for this extended interface listed in this section is "CSS2" + and the version is "2.0". + + +cssvalues +========= +contributed by Kevin D. Smith, thanks! + +"cssvalues" is used as a property validator. +it is an importable object that contains a dictionary of compiled regular +expressions. The keys of this dictionary are all of the valid CSS property +names. The values are compiled regular expressions that can be used to +validate the values for that property. (Actually, the values are references +to the 'match' method of a compiled regular expression, so that they are +simply called like functions.) + +""" +__all__ = ['CSS2Properties', 'cssvalues'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssproperties.py 1116 2008-03-05 13:52:23Z cthedot $' + +import re + +""" +Define some regular expression fragments that will be used as +macros within the CSS property value regular expressions. +""" +MACROS = { + 'ident': r'[-]?{nmstart}{nmchar}*', + 'name': r'{nmchar}+', + 'nmstart': r'[_a-z]|{nonascii}|{escape}', + 'nonascii': r'[^\0-\177]', + 'unicode': r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?', + 'escape': r'{unicode}|\\[ -~\200-\777]', +# 'escape': r'{unicode}|\\[ -~\200-\4177777]', + 'int': r'[-]?\d+', + 'nmchar': r'[\w-]|{nonascii}|{escape}', + 'num': r'[-]?\d+|[-]?\d*\.\d+', + 'number': r'{num}', + 'string': r'{string1}|{string2}', + 'string1': r'"(\\\"|[^\"])*"', + 'string2': r"'(\\\'|[^\'])*'", + 'nl': r'\n|\r\n|\r|\f', + 'w': r'\s*', + + 'integer': r'{int}', + 'length': r'0|{num}(em|ex|px|in|cm|mm|pt|pc)', + 'angle': r'0|{num}(deg|grad|rad)', + 'time': r'0|{num}m?s', + 'frequency': r'0|{num}k?Hz', + 'color': r'(maroon|red|orange|yellow|olive|purple|fuchsia|white|lime|green|navy|blue|aqua|teal|black|silver|gray|ActiveBorder|ActiveCaption|AppWorkspace|Background|ButtonFace|ButtonHighlight|ButtonShadow|ButtonText|CaptionText|GrayText|Highlight|HighlightText|InactiveBorder|InactiveCaption|InactiveCaptionText|InfoBackground|InfoText|Menu|MenuText|Scrollbar|ThreeDDarkShadow|ThreeDFace|ThreeDHighlight|ThreeDLightShadow|ThreeDShadow|Window|WindowFrame|WindowText)|#[0-9a-f]{3}|#[0-9a-f]{6}|rgb\({w}{int}{w},{w}{int}{w},{w}{int}{w}\)|rgb\({w}{num}%{w},{w}{num}%{w},{w}{num}%{w}\)', + 'uri': r'url\({w}({string}|(\\\)|[^\)])+){w}\)', + 'percentage': r'{num}%', + 'border-style': 'none|hidden|dotted|dashed|solid|double|groove|ridge|inset|outset', + 'border-color': '{color}', + 'border-width': '{length}|thin|medium|thick', + + 'background-color': r'{color}|transparent|inherit', + 'background-image': r'{uri}|none|inherit', + 'background-position': r'({percentage}|{length})(\s*({percentage}|{length}))?|((top|center|bottom)\s*(left|center|right))|((left|center|right)\s*(top|center|bottom))|inherit', + 'background-repeat': r'repeat|repeat-x|repeat-y|no-repeat|inherit', + 'background-attachment': r'scroll|fixed|inherit', + + 'shape': r'rect\(({w}({length}|auto}){w},){3}{w}({length}|auto){w}\)', + 'counter': r'counter\({w}{identifier}{w}(?:,{w}{list-style-type}{w})?\)', + 'identifier': r'{ident}', + 'family-name': r'{string}|{identifier}', + 'generic-family': r'serif|sans-serif|cursive|fantasy|monospace', + 'absolute-size': r'(x?x-)?(small|large)|medium', + 'relative-size': r'smaller|larger', + 'font-family': r'(({family-name}|{generic-family}){w},{w})*({family-name}|{generic-family})|inherit', + 'font-size': r'{absolute-size}|{relative-size}|{length}|{percentage}|inherit', + 'font-style': r'normal|italic|oblique|inherit', + 'font-variant': r'normal|small-caps|inherit', + 'font-weight': r'normal|bold|bolder|lighter|[1-9]00|inherit', + 'line-height': r'normal|{number}|{length}|{percentage}|inherit', + 'list-style-image': r'{uri}|none|inherit', + 'list-style-position': r'inside|outside|inherit', + 'list-style-type': r'disc|circle|square|decimal|decimal-leading-zero|lower-roman|upper-roman|lower-greek|lower-(latin|alpha)|upper-(latin|alpha)|armenian|georgian|none|inherit', + 'margin-width': r'{length}|{percentage}|auto', + 'outline-color': r'{color}|invert|inherit', + 'outline-style': r'{border-style}|inherit', + 'outline-width': r'{border-width}|inherit', + 'padding-width': r'{length}|{percentage}', + 'specific-voice': r'{identifier}', + 'generic-voice': r'male|female|child', + 'content': r'{string}|{uri}|{counter}|attr\({w}{identifier}{w}\)|open-quote|close-quote|no-open-quote|no-close-quote', + 'border-attrs': r'{border-width}|{border-style}|{border-color}', + 'background-attrs': r'{background-color}|{background-image}|{background-repeat}|{background-attachment}|{background-position}', + 'list-attrs': r'{list-style-type}|{list-style-position}|{list-style-image}', + 'font-attrs': r'{font-style}|{font-variant}|{font-weight}', + 'outline-attrs': r'{outline-color}|{outline-style}|{outline-width}', + 'text-attrs': r'underline|overline|line-through|blink', +} + +""" +Define the regular expressions for validation all CSS values +""" +cssvalues = { + 'azimuth': r'{angle}|(behind\s+)?(left-side|far-left|left|center-left|center|center-right|right|far-right|right-side)(\s+behind)?|behind|leftwards|rightwards|inherit', + 'background-attachment': r'{background-attachment}', + 'background-color': r'{background-color}', + 'background-image': r'{background-image}', + 'background-position': r'{background-position}', + 'background-repeat': r'{background-repeat}', + # Each piece should only be allowed one time + 'background': r'{background-attrs}(\s+{background-attrs})*|inherit', + 'border-collapse': r'collapse|separate|inherit', + 'border-color': r'({border-color}|transparent)(\s+({border-color}|transparent)){0,3}|inherit', + 'border-spacing': r'{length}(\s+{length})?|inherit', + 'border-style': r'{border-style}(\s+{border-style}){0,3}|inherit', + 'border-top': r'{border-attrs}(\s+{border-attrs})*|inherit', + 'border-right': r'{border-attrs}(\s+{border-attrs})*|inherit', + 'border-bottom': r'{border-attrs}(\s+{border-attrs})*|inherit', + 'border-left': r'{border-attrs}(\s+{border-attrs})*|inherit', + 'border-top-color': r'{border-color}|transparent|inherit', + 'border-right-color': r'{border-color}|transparent|inherit', + 'border-bottom-color': r'{border-color}|transparent|inherit', + 'border-left-color': r'{border-color}|transparent|inherit', + 'border-top-style': r'{border-style}|inherit', + 'border-right-style': r'{border-style}|inherit', + 'border-bottom-style': r'{border-style}|inherit', + 'border-left-style': r'{border-style}|inherit', + 'border-top-width': r'{border-width}|inherit', + 'border-right-width': r'{border-width}|inherit', + 'border-bottom-width': r'{border-width}|inherit', + 'border-right-width': r'{border-width}|inherit', + 'border-width': r'{border-width}(\s+{border-width}){0,3}|inherit', + 'border': r'{border-attrs}(\s+{border-attrs})*|inherit', + 'bottom': r'{length}|{percentage}|auto|inherit', + 'caption-side': r'top|bottom|inherit', + 'clear': r'none|left|right|both|inherit', + 'clip': r'{shape}|auto|inherit', + 'color': r'{color}|inherit', + 'content': r'normal|{content}(\s+{content})*|inherit', + 'counter-increment': r'({identifier}(\s+{integer})?)(\s+({identifier}(\s+{integer})))*|none|inherit', + 'counter-reset': r'({identifier}(\s+{integer})?)(\s+({identifier}(\s+{integer})))*|none|inherit', + 'cue-after': r'{uri}|none|inherit', + 'cue-before': r'{uri}|none|inherit', + 'cue': r'({uri}|none|inherit){1,2}|inherit', + 'cursor': r'((({uri}{w},{w})*)?(auto|crosshair|default|pointer|move|(e|ne|nw|n|se|sw|s|w)-resize|text|wait|help|progress))|inherit', + 'direction': r'ltr|rtl|inherit', + 'display': r'inline|block|list-item|run-in|inline-block|table|inline-table|table-row-group|table-header-group|table-footer-group|table-row|table-column-group|table-column|table-cell|table-caption|none|inherit', + 'elevation': r'{angle}|below|level|above|higher|lower|inherit', + 'empty-cells': r'show|hide|inherit', + 'float': r'left|right|none|inherit', + 'font-family': r'{font-family}', + 'font-size': r'{font-size}', + 'font-style': r'{font-style}', + 'font-variant': r'{font-variant}', + 'font-weight': r'{font-weight}', + 'font': r'({font-attrs}\s+)*{font-size}({w}/{w}{line-height})?\s+{font-family}|caption|icon|menu|message-box|small-caption|status-bar|inherit', + 'height': r'{length}|{percentage}|auto|inherit', + 'left': r'{length}|{percentage}|auto|inherit', + 'letter-spacing': r'normal|{length}|inherit', + 'line-height': r'{line-height}', + 'list-style-image': r'{list-style-image}', + 'list-style-position': r'{list-style-position}', + 'list-style-type': r'{list-style-type}', + 'list-style': r'{list-attrs}(\s+{list-attrs})*|inherit', + 'margin-right': r'{margin-width}|inherit', + 'margin-left': r'{margin-width}|inherit', + 'margin-top': r'{margin-width}|inherit', + 'margin-bottom': r'{margin-width}|inherit', + 'margin': r'{margin-width}(\s+{margin-width}){0,3}|inherit', + 'max-height': r'{length}|{percentage}|none|inherit', + 'max-width': r'{length}|{percentage}|none|inherit', + 'min-height': r'{length}|{percentage}|none|inherit', + 'min-width': r'{length}|{percentage}|none|inherit', + 'orphans': r'{integer}|inherit', + 'outline-color': r'{outline-color}', + 'outline-style': r'{outline-style}', + 'outline-width': r'{outline-width}', + 'outline': r'{outline-attrs}(\s+{outline-attrs})*|inherit', + 'overflow': r'visible|hidden|scroll|auto|inherit', + 'padding-top': r'{padding-width}|inherit', + 'padding-right': r'{padding-width}|inherit', + 'padding-bottom': r'{padding-width}|inherit', + 'padding-left': r'{padding-width}|inherit', + 'padding': r'{padding-width}(\s+{padding-width}){0,3}|inherit', + 'page-break-after': r'auto|always|avoid|left|right|inherit', + 'page-break-before': r'auto|always|avoid|left|right|inherit', + 'page-break-inside': r'avoid|auto|inherit', + 'pause-after': r'{time}|{percentage}|inherit', + 'pause-before': r'{time}|{percentage}|inherit', + 'pause': r'({time}|{percentage}){1,2}|inherit', + 'pitch-range': r'{number}|inherit', + 'pitch': r'{frequency}|x-low|low|medium|high|x-high|inherit', + 'play-during': r'{uri}(\s+(mix|repeat))*|auto|none|inherit', + 'position': r'static|relative|absolute|fixed|inherit', + 'quotes': r'({string}\s+{string})(\s+{string}\s+{string})*|none|inherit', + 'richness': r'{number}|inherit', + 'right': r'{length}|{percentage}|auto|inherit', + 'speak-header': r'once|always|inherit', + 'speak-numeral': r'digits|continuous|inherit', + 'speak-punctuation': r'code|none|inherit', + 'speak': r'normal|none|spell-out|inherit', + 'speech-rate': r'{number}|x-slow|slow|medium|fast|x-fast|faster|slower|inherit', + 'stress': r'{number}|inherit', + 'table-layout': r'auto|fixed|inherit', + 'text-align': r'left|right|center|justify|inherit', + 'text-decoration': r'none|{text-attrs}(\s+{text-attrs})*|inherit', + 'text-indent': r'{length}|{percentage}|inherit', + 'text-transform': r'capitalize|uppercase|lowercase|none|inherit', + 'top': r'{length}|{percentage}|auto|inherit', + 'unicode-bidi': r'normal|embed|bidi-override|inherit', + 'vertical-align': r'baseline|sub|super|top|text-top|middle|bottom|text-bottom|{percentage}|{length}|inherit', + 'visibility': r'visible|hidden|collapse|inherit', + 'voice-family': r'({specific-voice}|{generic-voice}{w},{w})*({specific-voice}|{generic-voice})|inherit', + 'volume': r'{number}|{percentage}|silent|x-soft|soft|medium|loud|x-loud|inherit', + 'white-space': r'normal|pre|nowrap|pre-wrap|pre-line|inherit', + 'widows': r'{integer}|inherit', + 'width': r'{length}|{percentage}|auto|inherit', + 'word-spacing': r'normal|{length}|inherit', + 'z-index': r'auto|{integer}|inherit', +} + +def _expand_macros(tokdict): + """ Expand macros in token dictionary """ + def macro_value(m): + return '(?:%s)' % MACROS[m.groupdict()['macro']] + for key, value in tokdict.items(): + while re.search(r'{[a-z][a-z0-9-]*}', value): + value = re.sub(r'{(?P[a-z][a-z0-9-]*)}', + macro_value, value) + tokdict[key] = value + return tokdict + +def _compile_regexes(tokdict): + """ Compile all regular expressions into callable objects """ + for key, value in tokdict.items(): + tokdict[key] = re.compile('^(?:%s)$' % value, re.I).match + return tokdict + +_compile_regexes(_expand_macros(cssvalues)) + + +# functions to convert between CSS and DOM name + +_reCSStoDOMname = re.compile('-[a-z]', re.I) +def _toDOMname(CSSname): + """ + returns DOMname for given CSSname e.g. for CSSname 'font-style' returns + 'fontStyle' + """ + def _doCSStoDOMname2(m): return m.group(0)[1].capitalize() + return _reCSStoDOMname.sub(_doCSStoDOMname2, CSSname) + +_reDOMtoCSSname = re.compile('([A-Z])[a-z]+') +def _toCSSname(DOMname): + """ + returns CSSname for given DOMname e.g. for DOMname 'fontStyle' returns + 'font-style' + """ + def _doDOMtoCSSname2(m): return '-' + m.group(0).lower() + return _reDOMtoCSSname.sub(_doDOMtoCSSname2, DOMname) + + +class CSS2Properties(object): + """ + The CSS2Properties interface represents a convenience mechanism + for retrieving and setting properties within a CSSStyleDeclaration. + The attributes of this interface correspond to all the properties + specified in CSS2. Getting an attribute of this interface is + equivalent to calling the getPropertyValue method of the + CSSStyleDeclaration interface. Setting an attribute of this + interface is equivalent to calling the setProperty method of the + CSSStyleDeclaration interface. + + cssutils actually also allows usage of ``del`` to remove a CSS property + from a CSSStyleDeclaration. + + This is an abstract class, the following functions need to be present + in inheriting class: + + - ``_getP`` + - ``_setP`` + - ``_delP`` + """ + # actual properties are set after the class definition! + def _getP(self, CSSname): pass + def _setP(self, CSSname, value): pass + def _delP(self, CSSname): pass + +# add list of DOMname properties to CSS2Properties +# used for CSSStyleDeclaration to check if allowed properties +# but somehow doubled, any better way? +CSS2Properties._properties = [_toDOMname(p) for p in cssvalues.keys()] + +# add CSS2Properties to CSSStyleDeclaration: +def __named_property_def(DOMname): + """ + closure to keep name known in each properties accessor function + DOMname is converted to CSSname here, so actual calls use CSSname + """ + CSSname = _toCSSname(DOMname) + def _get(self): return self._getP(CSSname) + def _set(self, value): self._setP(CSSname, value) + def _del(self): self._delP(CSSname) + return _get, _set, _del + +# add all CSS2Properties to CSSStyleDeclaration +for DOMname in CSS2Properties._properties: + setattr(CSS2Properties, DOMname, + property(*__named_property_def(DOMname))) diff --git a/src/calibre/utils/cssutils/css/cssrule.py b/src/calibre/utils/cssutils/css/cssrule.py new file mode 100644 index 0000000000..e518d40425 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssrule.py @@ -0,0 +1,134 @@ +"""CSSRule implements DOM Level 2 CSS CSSRule.""" +__all__ = ['CSSRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssrule.py 1177 2008-03-20 17:47:23Z cthedot $' + +import xml.dom +import cssutils + +class CSSRule(cssutils.util.Base2): + """ + Abstract base interface for any type of CSS statement. This includes + both rule sets and at-rules. An implementation is expected to preserve + all rules specified in a CSS style sheet, even if the rule is not + recognized by the parser. Unrecognized rules are represented using the + CSSUnknownRule interface. + + Properties + ========== + cssText: of type DOMString + The parsable textual representation of the rule. This reflects the + current state of the rule and not its initial value. + parentRule: of type CSSRule, readonly + If this rule is contained inside another rule (e.g. a style rule + inside an @media block), this is the containing rule. If this rule + is not nested inside any other rules, this returns None. + parentStyleSheet: of type CSSStyleSheet, readonly + The style sheet that contains this rule. + type: of type unsigned short, readonly + The type of the rule, as defined above. The expectation is that + binding-specific casting methods can be used to cast down from an + instance of the CSSRule interface to the specific derived interface + implied by the type. + + cssutils only + ------------- + seq (READONLY): + contains sequence of parts of the rule including comments but + excluding @KEYWORD and braces + typeString: string + A string name of the type of this rule, e.g. 'STYLE_RULE'. Mainly + useful for debugging + wellformed: + if a rule is valid + """ + + """ + CSSRule type constants. + An integer indicating which type of rule this is. + """ + COMMENT = -1 # cssutils only + UNKNOWN_RULE = 0 #u + STYLE_RULE = 1 #s + CHARSET_RULE = 2 #c + IMPORT_RULE = 3 #i + MEDIA_RULE = 4 #m + FONT_FACE_RULE = 5 #f + PAGE_RULE = 6 #p + NAMESPACE_RULE = 7 # CSSOM + + _typestrings = ['UNKNOWN_RULE', 'STYLE_RULE', 'CHARSET_RULE', 'IMPORT_RULE', + 'MEDIA_RULE', 'FONT_FACE_RULE', 'PAGE_RULE', 'NAMESPACE_RULE', + 'COMMENT'] + + type = UNKNOWN_RULE + """ + The type of this rule, as defined by a CSSRule type constant. + Overwritten in derived classes. + + The expectation is that binding-specific casting methods can be used to + cast down from an instance of the CSSRule interface to the specific + derived interface implied by the type. + (Casting not for this Python implementation I guess...) + """ + + def __init__(self, parentRule=None, parentStyleSheet=None, readonly=False): + """ + set common attributes for all rules + """ + super(CSSRule, self).__init__() + self._parentRule = parentRule + self._parentStyleSheet = parentStyleSheet + self._setSeq(self._tempSeq()) + # must be set after initialization of #inheriting rule is done + self._readonly = False + + def _setCssText(self, cssText): + """ + DOMException on setting + + - SYNTAX_ERR: + Raised if the specified CSS string value has a syntax error and + is unparsable. + - INVALID_MODIFICATION_ERR: + Raised if the specified CSS string value represents a different + type of rule than the current one. + - HIERARCHY_REQUEST_ERR: + Raised if the rule cannot be inserted at this point in the + style sheet. + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if the rule is readonly. + """ + self._checkReadonly() + + cssText = property(lambda self: u'', _setCssText, + doc="""(DOM) The parsable textual representation of the rule. This + reflects the current state of the rule and not its initial value. + The initial value is saved, but this may be removed in a future + version! + MUST BE OVERWRITTEN IN SUBCLASS TO WORK!""") + + def _setAtkeyword(self, akw): + """checks if new keyword is normalized same as old""" + if not self.atkeyword or (self._normalize(akw) == + self._normalize(self.atkeyword)): + self._atkeyword = akw + else: + self._log.error(u'%s: Invalid atkeyword for this rule: %r' % + (self._normalize(self.atkeyword), akw), + error=xml.dom.InvalidModificationErr) + + atkeyword = property(lambda self: self._atkeyword, _setAtkeyword, + doc=u"@keyword for @rules") + + parentRule = property(lambda self: self._parentRule, + doc=u"READONLY") + + parentStyleSheet = property(lambda self: self._parentStyleSheet, + doc=u"READONLY") + + wellformed = property(lambda self: False, + doc=u"READONLY") + + typeString = property(lambda self: CSSRule._typestrings[self.type], + doc="Name of this rules type.") diff --git a/src/calibre/utils/cssutils/css/cssrulelist.py b/src/calibre/utils/cssutils/css/cssrulelist.py new file mode 100644 index 0000000000..96262dc66e --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssrulelist.py @@ -0,0 +1,60 @@ +""" +CSSRuleList implements DOM Level 2 CSS CSSRuleList. + +Partly also + * http://dev.w3.org/csswg/cssom/#the-cssrulelist +""" +__all__ = ['CSSRuleList'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssrulelist.py 1116 2008-03-05 13:52:23Z cthedot $' + +class CSSRuleList(list): + """ + The CSSRuleList object represents an (ordered) list of statements. + + The items in the CSSRuleList are accessible via an integral index, + starting from 0. + + Subclasses a standard Python list so theoretically all standard list + methods are available. Setting methods like ``__init__``, ``append``, + ``extend`` or ``__setslice__`` are added later on instances of this + class if so desired. + E.g. CSSStyleSheet adds ``append`` which is not available in a simple + instance of this class! + + Properties + ========== + length: of type unsigned long, readonly + The number of CSSRules in the list. The range of valid child rule + indices is 0 to length-1 inclusive. + """ + def __init__(self, *ignored): + "nothing is set as this must also be defined later" + pass + + def __notimplemented(self, *ignored): + "no direct setting possible" + raise NotImplementedError( + 'Must be implemented by class using an instance of this class.') + + append = extend = __setitem__ = __setslice__ = __notimplemented + + def item(self, index): + """ + (DOM) + Used to retrieve a CSS rule by ordinal index. The order in this + collection represents the order of the rules in the CSS style + sheet. If index is greater than or equal to the number of rules in + the list, this returns None. + + Returns CSSRule, the style rule at the index position in the + CSSRuleList, or None if that is not a valid index. + """ + try: + return self[index] + except IndexError: + return None + + length = property(lambda self: len(self), + doc="(DOM) The number of CSSRules in the list.") + diff --git a/src/calibre/utils/cssutils/css/cssstyledeclaration.py b/src/calibre/utils/cssutils/css/cssstyledeclaration.py new file mode 100644 index 0000000000..e9d90a9e99 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssstyledeclaration.py @@ -0,0 +1,651 @@ +"""CSSStyleDeclaration implements DOM Level 2 CSS CSSStyleDeclaration and +extends CSS2Properties + +see + http://www.w3.org/TR/1998/REC-CSS2-19980512/syndata.html#parsing-errors + +Unknown properties +------------------ +User agents must ignore a declaration with an unknown property. +For example, if the style sheet is:: + + H1 { color: red; rotation: 70minutes } + +the user agent will treat this as if the style sheet had been:: + + H1 { color: red } + +Cssutils gives a message about any unknown properties but +keeps any property (if syntactically correct). + +Illegal values +-------------- +User agents must ignore a declaration with an illegal value. For example:: + + IMG { float: left } /* correct CSS2 */ + IMG { float: left here } /* "here" is not a value of 'float' */ + IMG { background: "red" } /* keywords cannot be quoted in CSS2 */ + IMG { border-width: 3 } /* a unit must be specified for length values */ + +A CSS2 parser would honor the first rule and ignore the rest, as if the +style sheet had been:: + + IMG { float: left } + IMG { } + IMG { } + IMG { } + +Cssutils again will issue a message (WARNING in this case) about invalid +CSS2 property values. + +TODO: + This interface is also used to provide a read-only access to the + computed values of an element. See also the ViewCSS interface. + + - return computed values and not literal values + - simplify unit pairs/triples/quadruples + 2px 2px 2px 2px -> 2px for border/padding... + - normalize compound properties like: + background: no-repeat left url() #fff + -> background: #fff url() no-repeat left +""" +__all__ = ['CSSStyleDeclaration', 'Property'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssstyledeclaration.py 1284 2008-06-05 16:29:17Z cthedot $' + +import xml.dom +import cssutils +from cssproperties import CSS2Properties +from property import Property + +class CSSStyleDeclaration(CSS2Properties, cssutils.util.Base2): + """ + The CSSStyleDeclaration class represents a single CSS declaration + block. This class may be used to determine the style properties + currently set in a block or to set style properties explicitly + within the block. + + While an implementation may not recognize all CSS properties within + a CSS declaration block, it is expected to provide access to all + specified properties in the style sheet through the + CSSStyleDeclaration interface. + Furthermore, implementations that support a specific level of CSS + should correctly handle CSS shorthand properties for that level. For + a further discussion of shorthand properties, see the CSS2Properties + interface. + + Additionally the CSS2Properties interface is implemented. + + Properties + ========== + cssText + The parsable textual representation of the declaration block + (excluding the surrounding curly braces). Setting this attribute + will result in the parsing of the new value and resetting of the + properties in the declaration block. It also allows the insertion + of additional properties and their values into the block. + length: of type unsigned long, readonly + The number of properties that have been explicitly set in this + declaration block. The range of valid indices is 0 to length-1 + inclusive. + parentRule: of type CSSRule, readonly + The CSS rule that contains this declaration block or None if this + CSSStyleDeclaration is not attached to a CSSRule. + seq: a list (cssutils) + All parts of this style declaration including CSSComments + + $css2propertyname + All properties defined in the CSS2Properties class are available + as direct properties of CSSStyleDeclaration with their respective + DOM name, so e.g. ``fontStyle`` for property 'font-style'. + + These may be used as:: + + >>> style = CSSStyleDeclaration(cssText='color: red') + >>> style.color = 'green' + >>> print style.color + green + >>> del style.color + >>> print style.color # print empty string + + Format + ====== + [Property: Value Priority?;]* [Property: Value Priority?]? + """ + def __init__(self, cssText=u'', parentRule=None, readonly=False): + """ + cssText + Shortcut, sets CSSStyleDeclaration.cssText + parentRule + The CSS rule that contains this declaration block or + None if this CSSStyleDeclaration is not attached to a CSSRule. + readonly + defaults to False + """ + super(CSSStyleDeclaration, self).__init__() + self._parentRule = parentRule + #self._seq = self._tempSeq() + self.cssText = cssText + self._readonly = readonly + + def __contains__(self, nameOrProperty): + """ + checks if a property (or a property with given name is in style + + name + a string or Property, uses normalized name and not literalname + """ + if isinstance(nameOrProperty, Property): + name = nameOrProperty.name + else: + name = self._normalize(nameOrProperty) + return name in self.__nnames() + + def __iter__(self): + """ + iterator of set Property objects with different normalized names. + """ + def properties(): + for name in self.__nnames(): + yield self.getProperty(name) + return properties() + + def __setattr__(self, n, v): + """ + Prevent setting of unknown properties on CSSStyleDeclaration + which would not work anyway. For these + ``CSSStyleDeclaration.setProperty`` MUST be called explicitly! + + TODO: + implementation of known is not really nice, any alternative? + """ + known = ['_tokenizer', '_log', '_ttypes', + '_seq', 'seq', 'parentRule', '_parentRule', 'cssText', + 'valid', 'wellformed', + '_readonly'] + known.extend(CSS2Properties._properties) + if n in known: + super(CSSStyleDeclaration, self).__setattr__(n, v) + else: + raise AttributeError( + 'Unknown CSS Property, ``CSSStyleDeclaration.setProperty("%s", ...)`` MUST be used.' + % n) + + def __nnames(self): + """ + returns iterator for all different names in order as set + if names are set twice the last one is used (double reverse!) + """ + names = [] + for item in reversed(self.seq): + val = item.value + if isinstance(val, Property) and not val.name in names: + names.append(val.name) + return reversed(names) + + def __getitem__(self, CSSName): + """Retrieve the value of property ``CSSName`` from this declaration. + + ``CSSName`` will be always normalized. + """ + return self.getPropertyValue(CSSName) + + def __setitem__(self, CSSName, value): + """Set value of property ``CSSName``. ``value`` may also be a tuple of + (value, priority), e.g. style['color'] = ('red', 'important') + + ``CSSName`` will be always normalized. + """ + priority = None + if type(value) == tuple: + value, priority = value + + return self.setProperty(CSSName, value, priority) + + def __delitem__(self, CSSName): + """Delete property ``CSSName`` from this declaration. + If property is not in this declaration return u'' just like + removeProperty. + + ``CSSName`` will be always normalized. + """ + return self.removeProperty(CSSName) + + # overwritten accessor functions for CSS2Properties' properties + def _getP(self, CSSName): + """ + (DOM CSS2Properties) + Overwritten here and effectively the same as + ``self.getPropertyValue(CSSname)``. + + Parameter is in CSSname format ('font-style'), see CSS2Properties. + + Example:: + + >>> style = CSSStyleDeclaration(cssText='font-style:italic;') + >>> print style.fontStyle + italic + """ + return self.getPropertyValue(CSSName) + + def _setP(self, CSSName, value): + """ + (DOM CSS2Properties) + Overwritten here and effectively the same as + ``self.setProperty(CSSname, value)``. + + Only known CSS2Properties may be set this way, otherwise an + AttributeError is raised. + For these unknown properties ``setPropertyValue(CSSname, value)`` + has to be called explicitly. + Also setting the priority of properties needs to be done with a + call like ``setPropertyValue(CSSname, value, priority)``. + + Example:: + + >>> style = CSSStyleDeclaration() + >>> style.fontStyle = 'italic' + >>> # or + >>> style.setProperty('font-style', 'italic', '!important') + """ + self.setProperty(CSSName, value) + # TODO: Shorthand ones + + def _delP(self, CSSName): + """ + (cssutils only) + Overwritten here and effectively the same as + ``self.removeProperty(CSSname)``. + + Example:: + + >>> style = CSSStyleDeclaration(cssText='font-style:italic;') + >>> del style.fontStyle + >>> print style.fontStyle # prints u'' + + """ + self.removeProperty(CSSName) + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_css_CSSStyleDeclaration(self) + + def _setCssText(self, cssText): + """ + Setting this attribute will result in the parsing of the new value + and resetting of all the properties in the declaration block + including the removal or addition of properties. + + DOMException on setting + + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this declaration is readonly or a property is readonly. + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error and + is unparsable. + """ + self._checkReadonly() + tokenizer = self._tokenize2(cssText) + + # for closures: must be a mutable + new = {'wellformed': True} + def ident(expected, seq, token, tokenizer=None): + # a property + + tokens = self._tokensupto2(tokenizer, starttoken=token, + semicolon=True) + if self._tokenvalue(tokens[-1]) == u';': + tokens.pop() + property = Property() + property.cssText = tokens + if property.wellformed: + seq.append(property, 'Property') + else: + self._log.error(u'CSSStyleDeclaration: Syntax Error in Property: %s' + % self._valuestr(tokens)) + # does not matter in this case + return expected + + def unexpected(expected, seq, token, tokenizer=None): + # error, find next ; or } to omit upto next property + ignored = self._tokenvalue(token) + self._valuestr( + self._tokensupto2(tokenizer, propertyvalueendonly=True)) + self._log.error(u'CSSStyleDeclaration: Unexpected token, ignoring upto %r.' % + ignored,token) + # does not matter in this case + return expected + + # [Property: Value;]* Property: Value? + newseq = self._tempSeq() + wellformed, expected = self._parse(expected=None, + seq=newseq, tokenizer=tokenizer, + productions={'IDENT': ident},#, 'CHAR': char}, + default=unexpected) + # wellformed set by parse + # post conditions + + # do not check wellformed as invalid things are removed anyway + #if wellformed: + self._setSeq(newseq) + + cssText = property(_getCssText, _setCssText, + doc="(DOM) A parsable textual representation of the declaration\ + block excluding the surrounding curly braces.") + + def getCssText(self, separator=None): + """ + returns serialized property cssText, each property separated by + given ``separator`` which may e.g. be u'' to be able to use + cssText directly in an HTML style attribute. ";" is always part of + each property (except the last one) and can **not** be set with + separator! + """ + return cssutils.ser.do_css_CSSStyleDeclaration(self, separator) + + def _getParentRule(self): + return self._parentRule + + def _setParentRule(self, parentRule): + self._parentRule = parentRule + + parentRule = property(_getParentRule, _setParentRule, + doc="(DOM) The CSS rule that contains this declaration block or\ + None if this CSSStyleDeclaration is not attached to a CSSRule.") + + def getProperties(self, name=None, all=False): + """ + Returns a list of Property objects set in this declaration. + + name + optional name of properties which are requested (a filter). + Only properties with this **always normalized** name are returned. + all=False + if False (DEFAULT) only the effective properties (the ones set + last) are returned. If name is given a list with only one property + is returned. + + if True all properties including properties set multiple times with + different values or priorities for different UAs are returned. + The order of the properties is fully kept as in the original + stylesheet. + """ + if name and not all: + # single prop but list + p = self.getProperty(name) + if p: + return [p] + else: + return [] + elif not all: + # effective Properties in name order + return [self.getProperty(name)for name in self.__nnames()] + else: + # all properties or all with this name + nname = self._normalize(name) + properties = [] + for item in self.seq: + val = item.value + if isinstance(val, Property) and ( + (bool(nname) == False) or (val.name == nname)): + properties.append(val) + return properties + + def getProperty(self, name, normalize=True): + """ + Returns the effective Property object. + + name + of the CSS property, always lowercase (even if not normalized) + normalize + if True (DEFAULT) name will be normalized (lowercase, no simple + escapes) so "color", "COLOR" or "C\olor" will all be equivalent + + If False may return **NOT** the effective value but the effective + for the unnormalized name. + """ + nname = self._normalize(name) + found = None + for item in reversed(self.seq): + val = item.value + if isinstance(val, Property): + if (normalize and nname == val.name) or name == val.literalname: + if val.priority: + return val + elif not found: + found = val + return found + + def getPropertyCSSValue(self, name, normalize=True): + """ + Returns CSSValue, the value of the effective property if it has been + explicitly set for this declaration block. + + name + of the CSS property, always lowercase (even if not normalized) + normalize + if True (DEFAULT) name will be normalized (lowercase, no simple + escapes) so "color", "COLOR" or "C\olor" will all be equivalent + + If False may return **NOT** the effective value but the effective + for the unnormalized name. + + (DOM) + Used to retrieve the object representation of the value of a CSS + property if it has been explicitly set within this declaration + block. Returns None if the property has not been set. + + (This method returns None if the property is a shorthand + property. Shorthand property values can only be accessed and + modified as strings, using the getPropertyValue and setProperty + methods.) + + **cssutils currently always returns a CSSValue if the property is + set.** + + for more on shorthand properties see + http://www.dustindiaz.com/css-shorthand/ + """ + nname = self._normalize(name) + if nname in self._SHORTHANDPROPERTIES: + self._log.info( + u'CSSValue for shorthand property "%s" should be None, this may be implemented later.' % + nname, neverraise=True) + + p = self.getProperty(name, normalize) + if p: + return p.cssValue + else: + return None + + def getPropertyValue(self, name, normalize=True): + """ + Returns the value of the effective property if it has been explicitly + set for this declaration block. Returns the empty string if the + property has not been set. + + name + of the CSS property, always lowercase (even if not normalized) + normalize + if True (DEFAULT) name will be normalized (lowercase, no simple + escapes) so "color", "COLOR" or "C\olor" will all be equivalent + + If False may return **NOT** the effective value but the effective + for the unnormalized name. + """ + p = self.getProperty(name, normalize) + if p: + return p.value + else: + return u'' + + def getPropertyPriority(self, name, normalize=True): + """ + Returns the priority of the effective CSS property (e.g. the + "important" qualifier) if the property has been explicitly set in + this declaration block. The empty string if none exists. + + name + of the CSS property, always lowercase (even if not normalized) + normalize + if True (DEFAULT) name will be normalized (lowercase, no simple + escapes) so "color", "COLOR" or "C\olor" will all be equivalent + + If False may return **NOT** the effective value but the effective + for the unnormalized name. + """ + p = self.getProperty(name, normalize) + if p: + return p.priority + else: + return u'' + + def removeProperty(self, name, normalize=True): + """ + (DOM) + Used to remove a CSS property if it has been explicitly set within + this declaration block. + + Returns the value of the property if it has been explicitly set for + this declaration block. Returns the empty string if the property + has not been set or the property name does not correspond to a + known CSS property + + name + of the CSS property + normalize + if True (DEFAULT) name will be normalized (lowercase, no simple + escapes) so "color", "COLOR" or "C\olor" will all be equivalent. + The effective Property value is returned and *all* Properties + with ``Property.name == name`` are removed. + + If False may return **NOT** the effective value but the effective + for the unnormalized ``name`` only. Also only the Properties with + the literal name ``name`` are removed. + + raises DOMException + + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this declaration is readonly or the property is + readonly. + """ + self._checkReadonly() + r = self.getPropertyValue(name, normalize=normalize) + newseq = self._tempSeq() + if normalize: + # remove all properties with name == nname + nname = self._normalize(name) + for item in self.seq: + if not (isinstance(item.value, Property) and item.value.name == nname): + newseq.appendItem(item) + else: + # remove all properties with literalname == name + for item in self.seq: + if not (isinstance(item.value, Property) and item.value.literalname == name): + newseq.appendItem(item) + self._setSeq(newseq) + return r + + def setProperty(self, name, value=None, priority=u'', normalize=True): + """ + (DOM) + Used to set a property value and priority within this declaration + block. + + name + of the CSS property to set (in W3C DOM the parameter is called + "propertyName"), always lowercase (even if not normalized) + + If a property with this name is present it will be reset + + cssutils also allowed name to be a Property object, all other + parameter are ignored in this case + + value + the new value of the property, omit if name is already a Property + priority + the optional priority of the property (e.g. "important") + normalize + if True (DEFAULT) name will be normalized (lowercase, no simple + escapes) so "color", "COLOR" or "C\olor" will all be equivalent + + DOMException on setting + + - SYNTAX_ERR: (self) + Raised if the specified value has a syntax error and is + unparsable. + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this declaration is readonly or the property is + readonly. + """ + self._checkReadonly() + + if isinstance(name, Property): + newp = name + name = newp.literalname + else: + newp = Property(name, value, priority) + if not newp.wellformed: + self._log.warn(u'Invalid Property: %s: %s %s' + % (name, value, priority)) + else: + nname = self._normalize(name) + properties = self.getProperties(name, all=(not normalize)) + for property in reversed(properties): + if normalize and property.name == nname: + property.cssValue = newp.cssValue.cssText + property.priority = newp.priority + break + elif property.literalname == name: + property.cssValue = newp.cssValue.cssText + property.priority = newp.priority + break + else: + self.seq._readonly = False + self.seq.append(newp, 'Property') + self.seq._readonly = True + + def item(self, index): + """ + (DOM) + Used to retrieve the properties that have been explicitly set in + this declaration block. The order of the properties retrieved using + this method does not have to be the order in which they were set. + This method can be used to iterate over all properties in this + declaration block. + + index + of the property to retrieve, negative values behave like + negative indexes on Python lists, so -1 is the last element + + returns the name of the property at this ordinal position. The + empty string if no property exists at this position. + + ATTENTION: + Only properties with a different name are counted. If two + properties with the same name are present in this declaration + only the effective one is included. + + ``item()`` and ``length`` work on the same set here. + """ + names = list(self.__nnames()) + try: + return names[index] + except IndexError: + return u'' + + length = property(lambda self: len(self.__nnames()), + doc="(DOM) The number of distinct properties that have been explicitly\ + in this declaration block. The range of valid indices is 0 to\ + length-1 inclusive. These are properties with a different ``name``\ + only. ``item()`` and ``length`` work on the same set here.") + + def __repr__(self): + return "cssutils.css.%s(cssText=%r)" % ( + self.__class__.__name__, self.getCssText(separator=u' ')) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.length, + len(self.getProperties(all=True)), id(self)) diff --git a/src/calibre/utils/cssutils/css/cssstylerule.py b/src/calibre/utils/cssutils/css/cssstylerule.py new file mode 100644 index 0000000000..323d9f0c0f --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssstylerule.py @@ -0,0 +1,242 @@ +"""CSSStyleRule implements DOM Level 2 CSS CSSStyleRule. +""" +__all__ = ['CSSStyleRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssstylerule.py 1284 2008-06-05 16:29:17Z cthedot $' + +import xml.dom +import cssrule +import cssutils +from selectorlist import SelectorList +from cssstyledeclaration import CSSStyleDeclaration + +class CSSStyleRule(cssrule.CSSRule): + """ + The CSSStyleRule object represents a ruleset specified (if any) in a CSS + style sheet. It provides access to a declaration block as well as to the + associated group of selectors. + + Properties + ========== + selectorList: of type SelectorList (cssutils only) + A list of all Selector elements for the rule set. + selectorText: of type DOMString + The textual representation of the selector for the rule set. The + implementation may have stripped out insignificant whitespace while + parsing the selector. + style: of type CSSStyleDeclaration, (DOM) + The declaration-block of this rule set. + type + the type of this rule, constant cssutils.CSSRule.STYLE_RULE + + inherited properties: + - cssText + - parentRule + - parentStyleSheet + + Format + ====== + ruleset:: + + : selector [ COMMA S* selector ]* + LBRACE S* declaration [ ';' S* declaration ]* '}' S* + ; + """ + type = property(lambda self: cssrule.CSSRule.STYLE_RULE) + + def __init__(self, selectorText=None, style=None, parentRule=None, + parentStyleSheet=None, readonly=False): + """ + :Parameters: + selectorText + string parsed into selectorList + style + string parsed into CSSStyleDeclaration for this CSSStyleRule + readonly + if True allows setting of properties in constructor only + """ + super(CSSStyleRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + + self._selectorList = SelectorList(parentRule=self) + self._style = CSSStyleDeclaration(parentRule=self) + if selectorText: + self.selectorText = selectorText + if style: + self.style = style + + self._readonly = readonly + + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_CSSStyleRule(self) + + def _setCssText(self, cssText): + """ + :param cssText: + a parseable string or a tuple of (cssText, dict-of-namespaces) + :Exceptions: + - `NAMESPACE_ERR`: (Selector) + Raised if the specified selector uses an unknown namespace + prefix. + - `SYNTAX_ERR`: (self, StyleDeclaration, etc) + Raised if the specified CSS string value has a syntax error and + is unparsable. + - `INVALID_MODIFICATION_ERR`: (self) + Raised if the specified CSS string value represents a different + type of rule than the current one. + - `HIERARCHY_REQUEST_ERR`: (CSSStylesheet) + Raised if the rule cannot be inserted at this point in the + style sheet. + - `NO_MODIFICATION_ALLOWED_ERR`: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSStyleRule, self)._setCssText(cssText) + + # might be (cssText, namespaces) + cssText, namespaces = self._splitNamespacesOff(cssText) + try: + # use parent style sheet ones if available + namespaces = self.parentStyleSheet.namespaces + except AttributeError: + pass + + tokenizer = self._tokenize2(cssText) + selectortokens = self._tokensupto2(tokenizer, blockstartonly=True) + styletokens = self._tokensupto2(tokenizer, blockendonly=True) + trail = self._nexttoken(tokenizer) + if trail: + self._log.error(u'CSSStyleRule: Trailing content: %s' % + self._valuestr(cssText), token=trail) + elif not selectortokens: + self._log.error(u'CSSStyleRule: No selector found: %r' % + self._valuestr(cssText)) + elif self._tokenvalue(selectortokens[0]).startswith(u'@'): + self._log.error(u'CSSStyleRule: No style rule: %r' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + wellformed = True + + bracetoken = selectortokens.pop() + if self._tokenvalue(bracetoken) != u'{': + wellformed = False + self._log.error( + u'CSSStyleRule: No start { of style declaration found: %r' % + self._valuestr(cssText), bracetoken) + elif not selectortokens: + wellformed = False + self._log.error(u'CSSStyleRule: No selector found: %r.' % + self._valuestr(cssText), bracetoken) + newselectorlist = SelectorList(selectorText=(selectortokens, + namespaces), + parentRule=self) + + newstyle = CSSStyleDeclaration() + if not styletokens: + wellformed = False + self._log.error( + u'CSSStyleRule: No style declaration or "}" found: %r' % + self._valuestr(cssText)) + else: + braceorEOFtoken = styletokens.pop() + val, typ = self._tokenvalue(braceorEOFtoken), self._type(braceorEOFtoken) + if val != u'}' and typ != 'EOF': + wellformed = False + self._log.error( + u'CSSStyleRule: No "}" after style declaration found: %r' % + self._valuestr(cssText)) + else: + if 'EOF' == typ: + # add again as style needs it + styletokens.append(braceorEOFtoken) + newstyle.cssText = styletokens + + if wellformed: + self._selectorList = newselectorlist + self.style = newstyle + + cssText = property(_getCssText, _setCssText, + doc="(DOM) The parsable textual representation of the rule.") + + + def __getNamespaces(self): + "uses children namespaces if not attached to a sheet, else the sheet's ones" + try: + return self.parentStyleSheet.namespaces + except AttributeError: + return self.selectorList._namespaces + + _namespaces = property(__getNamespaces, doc=u"""if this Rule is + attached to a CSSStyleSheet the namespaces of that sheet are mirrored + here. While the Rule is not attached the namespaces of selectorList + are used.""") + + def _setSelectorList(self, selectorList): + """ + :param selectorList: selectorList, only content is used, not the actual + object + """ + self._checkReadonly() + self.selectorText = selectorList.selectorText + + selectorList = property(lambda self: self._selectorList, _setSelectorList, + doc="The SelectorList of this rule.") + + def _setSelectorText(self, selectorText): + """ + wrapper for cssutils SelectorList object + + :param selectorText: of type string, might also be a comma separated list + of selectors + :Exceptions: + - `NAMESPACE_ERR`: (Selector) + Raised if the specified selector uses an unknown namespace + prefix. + - `SYNTAX_ERR`: (SelectorList, Selector) + Raised if the specified CSS string value has a syntax error + and is unparsable. + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if this rule is readonly. + """ + self._checkReadonly() + self._selectorList.selectorText = selectorText + + selectorText = property(lambda self: self._selectorList.selectorText, + _setSelectorText, + doc="""(DOM) The textual representation of the selector for the + rule set.""") + + def _setStyle(self, style): + """ + :param style: CSSStyleDeclaration or string, only the cssText of a + declaration is used, not the actual object + """ + self._checkReadonly() + if isinstance(style, basestring): + self._style.cssText = style + else: + # cssText would be serialized with optional preferences + # so use _seq! + self._style._seq = style._seq + + style = property(lambda self: self._style, _setStyle, + doc="(DOM) The declaration-block of this rule set.") + + wellformed = property(lambda self: self.selectorList.wellformed) + + def __repr__(self): + if self._namespaces: + st = (self.selectorText, self._namespaces) + else: + st = self.selectorText + return "cssutils.css.%s(selectorText=%r, style=%r)" % ( + self.__class__.__name__, st, self.style.cssText) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.selectorText, self.style.cssText, + self._namespaces, id(self)) diff --git a/src/calibre/utils/cssutils/css/cssstylesheet.py b/src/calibre/utils/cssutils/css/cssstylesheet.py new file mode 100644 index 0000000000..4a2172587d --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssstylesheet.py @@ -0,0 +1,674 @@ +""" +CSSStyleSheet implements DOM Level 2 CSS CSSStyleSheet. + +Partly also: + - http://dev.w3.org/csswg/cssom/#the-cssstylesheet + - http://www.w3.org/TR/2006/WD-css3-namespace-20060828/ + +TODO: + - ownerRule and ownerNode +""" +__all__ = ['CSSStyleSheet'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssstylesheet.py 1429 2008-08-11 19:01:52Z cthedot $' + +import xml.dom +import cssutils.stylesheets +from cssutils.util import _Namespaces, _SimpleNamespaces, _readUrl +from cssutils.helper import Deprecated + +class CSSStyleSheet(cssutils.stylesheets.StyleSheet): + """ + The CSSStyleSheet interface represents a CSS style sheet. + + Properties + ========== + CSSOM + ----- + cssRules + of type CSSRuleList, (DOM readonly) + encoding + reflects the encoding of an @charset rule or 'utf-8' (default) + if set to ``None`` + ownerRule + of type CSSRule, readonly. If this sheet is imported this is a ref + to the @import rule that imports it. + + Inherits properties from stylesheet.StyleSheet + + cssutils + -------- + cssText: string + a textual representation of the stylesheet + namespaces + reflects set @namespace rules of this rule. + A dict of {prefix: namespaceURI} mapping. + + Format + ====== + stylesheet + : [ CHARSET_SYM S* STRING S* ';' ]? + [S|CDO|CDC]* [ import [S|CDO|CDC]* ]* + [ namespace [S|CDO|CDC]* ]* # according to @namespace WD + [ [ ruleset | media | page ] [S|CDO|CDC]* ]* + """ + def __init__(self, href=None, media=None, title=u'', disabled=None, + ownerNode=None, parentStyleSheet=None, readonly=False, + ownerRule=None): + """ + init parameters are the same as for stylesheets.StyleSheet + """ + super(CSSStyleSheet, self).__init__( + 'text/css', href, media, title, disabled, + ownerNode, parentStyleSheet) + + self._ownerRule = ownerRule + self.cssRules = cssutils.css.CSSRuleList() + self.cssRules.append = self.insertRule + self.cssRules.extend = self.insertRule + self._namespaces = _Namespaces(parentStyleSheet=self, log=self._log) + self._readonly = readonly + + # used only during setting cssText by parse*() + self.__encodingOverride = None + self._fetcher = None + + def __iter__(self): + "generator which iterates over cssRules." + for rule in self.cssRules: + yield rule + + def _cleanNamespaces(self): + "removes all namespace rules with same namespaceURI but last one set" + rules = self.cssRules + namespaceitems = self.namespaces.items() + i = 0 + while i < len(rules): + rule = rules[i] + if rule.type == rule.NAMESPACE_RULE and \ + (rule.prefix, rule.namespaceURI) not in namespaceitems: + self.deleteRule(i) + else: + i += 1 + + def _getUsedURIs(self): + "returns set of URIs used in the sheet" + useduris = set() + for r1 in self: + if r1.STYLE_RULE == r1.type: + useduris.update(r1.selectorList._getUsedUris()) + elif r1.MEDIA_RULE == r1.type: + for r2 in r1: + if r2.type == r2.STYLE_RULE: + useduris.update(r2.selectorList._getUsedUris()) + return useduris + + def _getCssText(self): + return cssutils.ser.do_CSSStyleSheet(self) + + def _setCssText(self, cssText): + """ + (cssutils) + Parses ``cssText`` and overwrites the whole stylesheet. + + :param cssText: + a parseable string or a tuple of (cssText, dict-of-namespaces) + :Exceptions: + - `NAMESPACE_ERR`: + If a namespace prefix is found which is not declared. + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if the rule is readonly. + - `SYNTAX_ERR`: + Raised if the specified CSS string value has a syntax error and + is unparsable. + """ + self._checkReadonly() + + cssText, namespaces = self._splitNamespacesOff(cssText) + if not namespaces: + namespaces = _SimpleNamespaces(log=self._log) + + tokenizer = self._tokenize2(cssText) + newseq = [] #cssutils.css.CSSRuleList() + + # for closures: must be a mutable + new = {'encoding': None, # needed for setting encoding of @import rules + 'namespaces': namespaces} + def S(expected, seq, token, tokenizer=None): + # @charset must be at absolute beginning of style sheet + if expected == 0: + return 1 + else: + return expected + + def COMMENT(expected, seq, token, tokenizer=None): + "special: sets parent*" + comment = cssutils.css.CSSComment([token], + parentStyleSheet=self.parentStyleSheet) + seq.append(comment) + return expected + + def charsetrule(expected, seq, token, tokenizer): + rule = cssutils.css.CSSCharsetRule(parentStyleSheet=self) + rule.cssText = self._tokensupto2(tokenizer, token) + if expected > 0 or len(seq) > 0: + self._log.error( + u'CSSStylesheet: CSSCharsetRule only allowed at beginning of stylesheet.', + token, xml.dom.HierarchyRequestErr) + else: + if rule.wellformed: + seq.append(rule) + new['encoding'] = rule.encoding + return 1 + + def importrule(expected, seq, token, tokenizer): + if new['encoding']: + # set temporarily as used by _resolveImport + # save newEncoding which have been set by resolveImport + self.__newEncoding = new['encoding'] + + rule = cssutils.css.CSSImportRule(parentStyleSheet=self) + rule.cssText = self._tokensupto2(tokenizer, token) + if expected > 1: + self._log.error( + u'CSSStylesheet: CSSImportRule not allowed here.', + token, xml.dom.HierarchyRequestErr) + else: + if rule.wellformed: + #del rule._parentEncoding # remove as later it is read from this sheet! + seq.append(rule) + + try: + # remove as only used temporarily but may not be set at all + del self.__newEncoding + except AttributeError, e: + pass + + return 1 + + def namespacerule(expected, seq, token, tokenizer): + rule = cssutils.css.CSSNamespaceRule( + cssText=self._tokensupto2(tokenizer, token), + parentStyleSheet=self) + if expected > 2: + self._log.error( + u'CSSStylesheet: CSSNamespaceRule not allowed here.', + token, xml.dom.HierarchyRequestErr) + else: + if rule.wellformed: + seq.append(rule) + # temporary namespaces given to CSSStyleRule and @media + new['namespaces'][rule.prefix] = rule.namespaceURI + return 2 + + def fontfacerule(expected, seq, token, tokenizer): + rule = cssutils.css.CSSFontFaceRule(parentStyleSheet=self) + rule.cssText = self._tokensupto2(tokenizer, token) + if rule.wellformed: + seq.append(rule) + return 3 + + def mediarule(expected, seq, token, tokenizer): + rule = cssutils.css.CSSMediaRule() + rule.cssText = (self._tokensupto2(tokenizer, token), + new['namespaces']) + if rule.wellformed: + rule._parentStyleSheet=self + for r in rule: + r._parentStyleSheet=self + seq.append(rule) + return 3 + + def pagerule(expected, seq, token, tokenizer): + rule = cssutils.css.CSSPageRule(parentStyleSheet=self) + rule.cssText = self._tokensupto2(tokenizer, token) + if rule.wellformed: + seq.append(rule) + return 3 + + def unknownrule(expected, seq, token, tokenizer): + self._log.warn( + u'CSSStylesheet: Unknown @rule found.', + token, neverraise=True) + rule = cssutils.css.CSSUnknownRule(parentStyleSheet=self) + rule.cssText = self._tokensupto2(tokenizer, token) + if rule.wellformed: + seq.append(rule) + return expected + + def ruleset(expected, seq, token, tokenizer): + rule = cssutils.css.CSSStyleRule() + rule.cssText = (self._tokensupto2(tokenizer, token), + new['namespaces']) + if rule.wellformed: + rule._parentStyleSheet=self + seq.append(rule) + return 3 + + # expected: + # ['CHARSET', 'IMPORT', 'NAMESPACE', ('PAGE', 'MEDIA', ruleset)] + wellformed, expected = self._parse(0, newseq, tokenizer, + {'S': S, + 'COMMENT': COMMENT, + 'CDO': lambda *ignored: None, + 'CDC': lambda *ignored: None, + 'CHARSET_SYM': charsetrule, + 'FONT_FACE_SYM': fontfacerule, + 'IMPORT_SYM': importrule, + 'NAMESPACE_SYM': namespacerule, + 'PAGE_SYM': pagerule, + 'MEDIA_SYM': mediarule, + 'ATKEYWORD': unknownrule + }, + default=ruleset) + + if wellformed: + del self.cssRules[:] + for rule in newseq: + self.insertRule(rule, _clean=False) + self._cleanNamespaces() + + cssText = property(_getCssText, _setCssText, + "(cssutils) a textual representation of the stylesheet") + + def _resolveImport(self, url): + """Read (encoding, enctype, decodedContent) from ``url`` for @import + sheets.""" + try: + # only available during parse of a complete sheet + selfAsParentEncoding = self.__newEncoding + except AttributeError: + try: + # explicit @charset + selfAsParentEncoding = self.cssRules[0].encoding + except (IndexError, AttributeError): + # default not UTF-8 but None! + selfAsParentEncoding = None + + return _readUrl(url, fetcher=self._fetcher, + overrideEncoding=self.__encodingOverride, + parentEncoding=selfAsParentEncoding) + + def _setCssTextWithEncodingOverride(self, cssText, encodingOverride=None, + encoding=None): + """Set cssText but use ``encodingOverride`` to overwrite detected + encoding. This is used by parse and @import during setting of cssText. + + If ``encoding`` is given use this but do not save it as encodingOverride""" + if encodingOverride: + # encoding during resolving of @import + self.__encodingOverride = encodingOverride + + self.__newEncoding = encoding # save for nested @import + self.cssText = cssText + + if encodingOverride: + # set encodingOverride explicit again! + self.encoding = self.__encodingOverride + # remove? + self.__encodingOverride = None + elif encoding: + # may e.g. be httpEncoding + self.encoding = encoding + + def _setFetcher(self, fetcher=None): + """sets @import URL loader, if None the default is used""" + self._fetcher = fetcher + + def _setEncoding(self, encoding): + """ + sets encoding of charset rule if present or inserts new charsetrule + with given encoding. If encoding if None removes charsetrule if + present. + """ + try: + rule = self.cssRules[0] + except IndexError: + rule = None + if rule and rule.CHARSET_RULE == rule.type: + if encoding: + rule.encoding = encoding + else: + self.deleteRule(0) + elif encoding: + self.insertRule(cssutils.css.CSSCharsetRule(encoding=encoding), 0) + + def _getEncoding(self): + "return encoding if @charset rule if given or default of 'utf-8'" + try: + return self.cssRules[0].encoding + except (IndexError, AttributeError): + return 'utf-8' + + encoding = property(_getEncoding, _setEncoding, + "(cssutils) reflects the encoding of an @charset rule or 'UTF-8' (default) if set to ``None``") + + namespaces = property(lambda self: self._namespaces, + doc="Namespaces used in this CSSStyleSheet.") + + def add(self, rule): + """ + Adds rule to stylesheet at appropriate position. + Same as ``sheet.insertRule(rule, inOrder=True)``. + """ + return self.insertRule(rule, index=None, inOrder=True) + + def deleteRule(self, index): + """ + Used to delete a rule from the style sheet. + + :param index: + of the rule to remove in the StyleSheet's rule list. For an + index < 0 **no** INDEX_SIZE_ERR is raised but rules for + normal Python lists are used. E.g. ``deleteRule(-1)`` removes + the last rule in cssRules. + :Exceptions: + - `INDEX_SIZE_ERR`: (self) + Raised if the specified index does not correspond to a rule in + the style sheet's rule list. + - `NAMESPACE_ERR`: (self) + Raised if removing this rule would result in an invalid StyleSheet + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if this style sheet is readonly. + """ + self._checkReadonly() + + try: + rule = self.cssRules[index] + except IndexError: + raise xml.dom.IndexSizeErr( + u'CSSStyleSheet: %s is not a valid index in the rulelist of length %i' % ( + index, self.cssRules.length)) + else: + if rule.type == rule.NAMESPACE_RULE: + # check all namespacerules if used + uris = [r.namespaceURI for r in self if r.type == r.NAMESPACE_RULE] + useduris = self._getUsedURIs() + if rule.namespaceURI in useduris and\ + uris.count(rule.namespaceURI) == 1: + raise xml.dom.NoModificationAllowedErr( + u'CSSStyleSheet: NamespaceURI defined in this rule is used, cannot remove.') + return + + rule._parentStyleSheet = None # detach + del self.cssRules[index] # delete from StyleSheet + + def insertRule(self, rule, index=None, inOrder=False, _clean=True): + """ + Used to insert a new rule into the style sheet. The new rule now + becomes part of the cascade. + + :Parameters: + rule + a parsable DOMString, in cssutils also a CSSRule or a + CSSRuleList + index + of the rule before the new rule will be inserted. + If the specified index is equal to the length of the + StyleSheet's rule collection, the rule will be added to the end + of the style sheet. + If index is not given or None rule will be appended to rule + list. + inOrder + if True the rule will be put to a proper location while + ignoring index but without raising HIERARCHY_REQUEST_ERR. + The resulting index is returned nevertheless + :returns: the index within the stylesheet's rule collection + :Exceptions: + - `HIERARCHY_REQUEST_ERR`: (self) + Raised if the rule cannot be inserted at the specified index + e.g. if an @import rule is inserted after a standard rule set + or other at-rule. + - `INDEX_SIZE_ERR`: (self) + Raised if the specified index is not a valid insertion point. + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if this style sheet is readonly. + - `SYNTAX_ERR`: (rule) + Raised if the specified rule has a syntax error and is + unparsable. + """ + self._checkReadonly() + + # check position + if index is None: + index = len(self.cssRules) + elif index < 0 or index > self.cssRules.length: + raise xml.dom.IndexSizeErr( + u'CSSStyleSheet: Invalid index %s for CSSRuleList with a length of %s.' % ( + index, self.cssRules.length)) + return + + if isinstance(rule, basestring): + # init a temp sheet which has the same properties as self + tempsheet = CSSStyleSheet(href=self.href, + media=self.media, + title=self.title, + parentStyleSheet=self.parentStyleSheet, + ownerRule=self.ownerRule) + tempsheet._ownerNode = self.ownerNode + tempsheet._fetcher = self._fetcher + + # prepend encoding if in this sheet to be able to use it in + # @import rules encoding resolution + # do not add if new rule startswith "@charset" (which is exact!) + if not rule.startswith(u'@charset') and (self.cssRules and + self.cssRules[0].type == self.cssRules[0].CHARSET_RULE): + # rule 0 is @charset! + newrulescount, newruleindex = 2, 1 + rule = self.cssRules[0].cssText + rule + else: + newrulescount, newruleindex = 1, 0 + + # parse the new rule(s) + tempsheet.cssText = (rule, self._namespaces) + + if len(tempsheet.cssRules) != newrulescount or (not isinstance( + tempsheet.cssRules[newruleindex], cssutils.css.CSSRule)): + self._log.error(u'CSSStyleSheet: Not a CSSRule: %s' % rule) + return + rule = tempsheet.cssRules[newruleindex] + rule._parentStyleSheet = None # done later? + + # TODO: + #tempsheet._namespaces = self._namespaces + + elif isinstance(rule, cssutils.css.CSSRuleList): + # insert all rules + for i, r in enumerate(rule): + self.insertRule(r, index + i) + return index + + if not rule.wellformed: + self._log.error(u'CSSStyleSheet: Invalid rules cannot be added.') + return + + # CHECK HIERARCHY + # @charset + if rule.type == rule.CHARSET_RULE: + if inOrder: + index = 0 + # always first and only + if (self.cssRules and self.cssRules[0].type == rule.CHARSET_RULE): + self.cssRules[0].encoding = rule.encoding + else: + self.cssRules.insert(0, rule) + elif index != 0 or (self.cssRules and + self.cssRules[0].type == rule.CHARSET_RULE): + self._log.error( + u'CSSStylesheet: @charset only allowed once at the beginning of a stylesheet.', + error=xml.dom.HierarchyRequestErr) + return + else: + self.cssRules.insert(index, rule) + + # @unknown or comment + elif rule.type in (rule.UNKNOWN_RULE, rule.COMMENT) and not inOrder: + if index == 0 and self.cssRules and\ + self.cssRules[0].type == rule.CHARSET_RULE: + self._log.error( + u'CSSStylesheet: @charset must be the first rule.', + error=xml.dom.HierarchyRequestErr) + return + else: + self.cssRules.insert(index, rule) + + # @import + elif rule.type == rule.IMPORT_RULE: + if inOrder: + # automatic order + if rule.type in (r.type for r in self): + # find last of this type + for i, r in enumerate(reversed(self.cssRules)): + if r.type == rule.type: + index = len(self.cssRules) - i + break + else: + # find first point to insert + if self.cssRules and self.cssRules[0].type in (rule.CHARSET_RULE, + rule.COMMENT): + index = 1 + else: + index = 0 + else: + # after @charset + if index == 0 and self.cssRules and\ + self.cssRules[0].type == rule.CHARSET_RULE: + self._log.error( + u'CSSStylesheet: Found @charset at index 0.', + error=xml.dom.HierarchyRequestErr) + return + # before @namespace, @page, @font-face, @media and stylerule + for r in self.cssRules[:index]: + if r.type in (r.NAMESPACE_RULE, r.MEDIA_RULE, r.PAGE_RULE, + r.STYLE_RULE, r.FONT_FACE_RULE): + self._log.error( + u'CSSStylesheet: Cannot insert @import here, found @namespace, @media, @page or CSSStyleRule before index %s.' % + index, + error=xml.dom.HierarchyRequestErr) + return + self.cssRules.insert(index, rule) + + # @namespace + elif rule.type == rule.NAMESPACE_RULE: + if inOrder: + if rule.type in (r.type for r in self): + # find last of this type + for i, r in enumerate(reversed(self.cssRules)): + if r.type == rule.type: + index = len(self.cssRules) - i + break + else: + # find first point to insert + for i, r in enumerate(self.cssRules): + if r.type in (r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, + r.FONT_FACE_RULE, r.UNKNOWN_RULE, r.COMMENT): + index = i # before these + break + else: + # after @charset and @import + for r in self.cssRules[index:]: + if r.type in (r.CHARSET_RULE, r.IMPORT_RULE): + self._log.error( + u'CSSStylesheet: Cannot insert @namespace here, found @charset or @import after index %s.' % + index, + error=xml.dom.HierarchyRequestErr) + return + # before @media and stylerule + for r in self.cssRules[:index]: + if r.type in (r.MEDIA_RULE, r.PAGE_RULE, r.STYLE_RULE, + r.FONT_FACE_RULE): + self._log.error( + u'CSSStylesheet: Cannot insert @namespace here, found @media, @page or CSSStyleRule before index %s.' % + index, + error=xml.dom.HierarchyRequestErr) + return + + if not (rule.prefix in self.namespaces and + self.namespaces[rule.prefix] == rule.namespaceURI): + # no doublettes + self.cssRules.insert(index, rule) + if _clean: + self._cleanNamespaces() + + # all other where order is not important + else: + if inOrder: + # simply add to end as no specific order + self.cssRules.append(rule) + index = len(self.cssRules) - 1 + else: + for r in self.cssRules[index:]: + if r.type in (r.CHARSET_RULE, r.IMPORT_RULE, r.NAMESPACE_RULE): + self._log.error( + u'CSSStylesheet: Cannot insert rule here, found @charset, @import or @namespace before index %s.' % + index, + error=xml.dom.HierarchyRequestErr) + return + self.cssRules.insert(index, rule) + + # post settings, TODO: for other rules which contain @rules + rule._parentStyleSheet = self + if rule.MEDIA_RULE == rule.type: + for r in rule: + r._parentStyleSheet = self + # ? + elif rule.IMPORT_RULE == rule.type: + rule.href = rule.href # try to reload stylesheet + + return index + + ownerRule = property(lambda self: self._ownerRule, + doc="(DOM attribute) NOT IMPLEMENTED YET") + + @Deprecated('Use cssutils.replaceUrls(sheet, replacer) instead.') + def replaceUrls(self, replacer): + """ + **EXPERIMENTAL** + + Utility method to replace all ``url(urlstring)`` values in + ``CSSImportRules`` and ``CSSStyleDeclaration`` objects (properties). + + ``replacer`` must be a function which is called with a single + argument ``urlstring`` which is the current value of url() + excluding ``url(`` and ``)``. It still may have surrounding + single or double quotes though. + """ + cssutils.replaceUrls(self, replacer) + + def setSerializer(self, cssserializer): + """ + Sets the global Serializer used for output of all stylesheet + output. + """ + if isinstance(cssserializer, cssutils.CSSSerializer): + cssutils.ser = cssserializer + else: + raise ValueError(u'Serializer must be an instance of cssutils.CSSSerializer.') + + def setSerializerPref(self, pref, value): + """ + Sets Preference of CSSSerializer used for output of this + stylesheet. See cssutils.serialize.Preferences for possible + preferences to be set. + """ + cssutils.ser.prefs.__setattr__(pref, value) + + def __repr__(self): + if self.media: + mediaText = self.media.mediaText + else: + mediaText = None + return "cssutils.css.%s(href=%r, media=%r, title=%r)" % ( + self.__class__.__name__, + self.href, mediaText, self.title) + + def __str__(self): + if self.media: + mediaText = self.media.mediaText + else: + mediaText = None + return "" % ( + self.__class__.__name__, self.encoding, self.href, + mediaText, self.title, self.namespaces.namespaces, + id(self)) diff --git a/src/calibre/utils/cssutils/css/cssunknownrule.py b/src/calibre/utils/cssutils/css/cssunknownrule.py new file mode 100644 index 0000000000..db96693305 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssunknownrule.py @@ -0,0 +1,208 @@ +"""CSSUnknownRule implements DOM Level 2 CSS CSSUnknownRule. +""" +__all__ = ['CSSUnknownRule'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssunknownrule.py 1170 2008-03-20 17:42:07Z cthedot $' + +import xml.dom +import cssrule +import cssutils + +class CSSUnknownRule(cssrule.CSSRule): + """ + represents an at-rule not supported by this user agent. + + Properties + ========== + inherited from CSSRule + - cssText + - type + + cssutils only + ------------- + atkeyword + the literal keyword used + seq + All parts of this rule excluding @KEYWORD but including CSSComments + wellformed + if this Rule is wellformed, for Unknown rules if an atkeyword is set + at all + + Format + ====== + unknownrule: + @xxx until ';' or block {...} + """ + type = property(lambda self: cssrule.CSSRule.UNKNOWN_RULE) + + def __init__(self, cssText=u'', parentRule=None, + parentStyleSheet=None, readonly=False): + """ + cssText + of type string + """ + super(CSSUnknownRule, self).__init__(parentRule=parentRule, + parentStyleSheet=parentStyleSheet) + self._atkeyword = None + if cssText: + self.cssText = cssText + + self._readonly = readonly + + def _getCssText(self): + """ returns serialized property cssText """ + return cssutils.ser.do_CSSUnknownRule(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + - SYNTAX_ERR: + Raised if the specified CSS string value has a syntax error and + is unparsable. + - INVALID_MODIFICATION_ERR: + Raised if the specified CSS string value represents a different + type of rule than the current one. + - HIERARCHY_REQUEST_ERR: (never raised) + Raised if the rule cannot be inserted at this point in the + style sheet. + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + """ + super(CSSUnknownRule, self)._setCssText(cssText) + tokenizer = self._tokenize2(cssText) + attoken = self._nexttoken(tokenizer, None) + if not attoken or self._type(attoken) != self._prods.ATKEYWORD: + self._log.error(u'CSSUnknownRule: No CSSUnknownRule found: %s' % + self._valuestr(cssText), + error=xml.dom.InvalidModificationErr) + else: + # for closures: must be a mutable + new = {'nesting': [], # {} [] or () + 'wellformed': True + } + + def CHAR(expected, seq, token, tokenizer=None): + type_, val, line, col = token + if expected != 'EOF': + if val in u'{[(': + new['nesting'].append(val) + elif val in u'}])': + opening = {u'}': u'{', u']': u'[', u')': u'('}[val] + try: + if new['nesting'][-1] == opening: + new['nesting'].pop() + else: + raise IndexError() + except IndexError: + new['wellformed'] = False + self._log.error(u'CSSUnknownRule: Wrong nesting of {, [ or (.', + token=token) + + if val in u'};' and not new['nesting']: + expected = 'EOF' + + seq.append(val, type_, line=line, col=col) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSUnknownRule: Expected end of rule.', + token=token) + return expected + + def EOF(expected, seq, token, tokenizer=None): + "close all blocks and return 'EOF'" + for x in reversed(new['nesting']): + closing = {u'{': u'}', u'[': u']', u'(': u')'}[x] + seq.append(closing, closing) + new['nesting'] = [] + return 'EOF' + + def INVALID(expected, seq, token, tokenizer=None): + # makes rule invalid + self._log.error(u'CSSUnknownRule: Bad syntax.', + token=token, error=xml.dom.SyntaxErr) + new['wellformed'] = False + return expected + + def STRING(expected, seq, token, tokenizer=None): + type_, val, line, col = token + val = self._stringtokenvalue(token) + if expected != 'EOF': + seq.append(val, type_, line=line, col=col) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSUnknownRule: Expected end of rule.', + token=token) + return expected + + def URI(expected, seq, token, tokenizer=None): + type_, val, line, col = token + val = self._uritokenvalue(token) + if expected != 'EOF': + seq.append(val, type_, line=line, col=col) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSUnknownRule: Expected end of rule.', + token=token) + return expected + + def default(expected, seq, token, tokenizer=None): + type_, val, line, col = token + if expected != 'EOF': + seq.append(val, type_, line=line, col=col) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSUnknownRule: Expected end of rule.', + token=token) + return expected + + # unknown : ATKEYWORD S* ... ; | } + newseq = self._tempSeq() + wellformed, expected = self._parse(expected=None, + seq=newseq, tokenizer=tokenizer, + productions={'CHAR': CHAR, + 'EOF': EOF, + 'INVALID': INVALID, + 'STRING': STRING, + 'URI': URI, + 'S': default # overwrite default default! + }, + default=default, + new=new) + + # wellformed set by parse + wellformed = wellformed and new['wellformed'] + + # post conditions + if expected != 'EOF': + wellformed = False + self._log.error( + u'CSSUnknownRule: No ending ";" or "}" found: %r' % + self._valuestr(cssText)) + elif new['nesting']: + wellformed = False + self._log.error( + u'CSSUnknownRule: Unclosed "{", "[" or "(": %r' % + self._valuestr(cssText)) + + # set all + if wellformed: + self.atkeyword = self._tokenvalue(attoken) + self._setSeq(newseq) + + cssText = property(fget=_getCssText, fset=_setCssText, + doc="(DOM) The parsable textual representation.") + + wellformed = property(lambda self: bool(self.atkeyword)) + + def __repr__(self): + return "cssutils.css.%s(cssText=%r)" % ( + self.__class__.__name__, self.cssText) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.cssText, id(self)) diff --git a/src/calibre/utils/cssutils/css/cssvalue.py b/src/calibre/utils/cssutils/css/cssvalue.py new file mode 100644 index 0000000000..79948fa4d1 --- /dev/null +++ b/src/calibre/utils/cssutils/css/cssvalue.py @@ -0,0 +1,1086 @@ +"""CSSValue related classes + +- CSSValue implements DOM Level 2 CSS CSSValue +- CSSPrimitiveValue implements DOM Level 2 CSS CSSPrimitiveValue +- CSSValueList implements DOM Level 2 CSS CSSValueList + +""" +__all__ = ['CSSValue', 'CSSPrimitiveValue', 'CSSValueList'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssvalue.py 1228 2008-05-19 19:59:50Z cthedot $' + +import re +import xml.dom +import cssutils +import cssproperties + +class CSSValue(cssutils.util.Base): + """ + The CSSValue interface represents a simple or a complex value. + A CSSValue object only occurs in a context of a CSS property + + Properties + ========== + cssText + A string representation of the current value. + cssValueType + A (readonly) code defining the type of the value. + + seq: a list (cssutils) + All parts of this style declaration including CSSComments + valid: boolean + if the value is valid at all, False for e.g. color: #1 + wellformed + if this Property is syntactically ok + + _value (INTERNAL!) + value without any comments, used to validate + """ + + CSS_INHERIT = 0 + """ + The value is inherited and the cssText contains "inherit". + """ + CSS_PRIMITIVE_VALUE = 1 + """ + The value is a primitive value and an instance of the + CSSPrimitiveValue interface can be obtained by using binding-specific + casting methods on this instance of the CSSValue interface. + """ + CSS_VALUE_LIST = 2 + """ + The value is a CSSValue list and an instance of the CSSValueList + interface can be obtained by using binding-specific casting + methods on this instance of the CSSValue interface. + """ + CSS_CUSTOM = 3 + """ + The value is a custom value. + """ + _typestrings = ['CSS_INHERIT' , 'CSS_PRIMITIVE_VALUE', 'CSS_VALUE_LIST', + 'CSS_CUSTOM'] + + def __init__(self, cssText=None, readonly=False, _propertyName=None): + """ + inits a new CSS Value + + cssText + the parsable cssText of the value + readonly + defaults to False + property + used to validate this value in the context of a property + """ + super(CSSValue, self).__init__() + + self.seq = [] + self.valid = False + self.wellformed = False + self._valueValue = u'' + self._linetoken = None # used for line report only + self._propertyName = _propertyName + + if cssText is not None: # may be 0 + if type(cssText) in (int, float): + cssText = unicode(cssText) # if it is a number + self.cssText = cssText + + self._readonly = readonly + + def _getValue(self): + v = [] + for x in self.seq: + if isinstance(x, cssutils.css.CSSComment): + continue + elif isinstance(x, basestring): + v.append(x) + else: # maybe CSSPrimitiveValue + v.append(x.cssText) + if v and u'' == v[-1].strip(): + # simple strip of joined string does not work for escaped spaces + del v[-1] + return u''.join(v) + + def _setValue(self, value): + "overwritten by CSSValueList!" + self._valueValue = value + + _value = property(_getValue, _setValue, + doc="Actual cssText value of this CSSValue.") + + def _getCssText(self): + return cssutils.ser.do_css_CSSValue(self) + + def _setCssText(self, cssText): + """ + Format + ====== + :: + + unary_operator + : '-' | '+' + ; + operator + : '/' S* | ',' S* | /* empty */ + ; + expr + : term [ operator term ]* + ; + term + : unary_operator? + [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | + TIME S* | FREQ S* ] + | STRING S* | IDENT S* | URI S* | hexcolor | function + ; + function + : FUNCTION S* expr ')' S* + ; + /* + * There is a constraint on the color that it must + * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F]) + * after the "#"; e.g., "#000" is OK, but "#abcd" is not. + */ + hexcolor + : HASH S* + ; + + DOMException on setting + + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error + (according to the attached property) or is unparsable. + - TODO: INVALID_MODIFICATION_ERR: + Raised if the specified CSS string value represents a different + type of values than the values allowed by the CSS property. + - NO_MODIFICATION_ALLOWED_ERR: (self) + Raised if this value is readonly. + """ + self._checkReadonly() + + # for closures: must be a mutable + new = {'values': [], + 'commas': 0, + 'valid': True, + 'wellformed': True } + + def _S(expected, seq, token, tokenizer=None): + val = self._tokenvalue(token) + if expected.endswith('operator'): + seq.append(u' ') + return 'term or operator' + elif expected.endswith('S'): + return 'term or S' + else: + return expected + + def _char(expected, seq, token, tokenizer=None): + val = self._tokenvalue(token) + if 'funcend' == expected and u')' == val: + # end of FUNCTION + seq[-1] += val + new['values'].append(seq[-1]) + return 'operator' + + elif expected in (')', ']', '}') and expected == val: + # end of any block: (), [], {} + seq[-1] += val + return 'operator' + + elif expected in ('funcend', ')', ']', '}'): + # content of func or block: (), [], {} + seq[-1] += val + return expected + + elif expected.endswith('operator') and ',' == val: + # term , term + new['commas'] += 1 + if seq and seq[-1] == u' ': + seq[-1] = val + else: + seq.append(val) + return 'term or S' + + elif expected.endswith('operator') and '/' == val: + # term / term + if seq and seq[-1] == u' ': + seq[-1] = val + else: + seq.append(val) + return 'term or S' + + elif expected.startswith('term') and u'(' == val: + # start of ( any* ) block + seq.append(val) + return ')' + elif expected.startswith('term') and u'[' == val: + # start of [ any* ] block + seq.append(val) + return ']' + elif expected.startswith('term') and u'{' == val: + # start of { any* } block + seq.append(val) + return '}' + elif expected.startswith('term') and u'-' == val or u'+' == 'val': + # unary operator + seq.append(val) + new['values'].append(val) + return 'number percentage dimension' + elif expected.startswith('term') and u'/' == val: + # font-size/line-height separator + seq.append(val) + new['values'].append(val) + return 'number percentage dimension' + else: + new['wellformed'] = False + self._log.error(u'CSSValue: Unexpected char.', token) + return expected + + def _number_percentage_dimension(expected, seq, token, tokenizer=None): + # NUMBER PERCENTAGE DIMENSION after -/+ or operator + if expected.startswith('term') or expected == 'number percentage dimension': + # normal value + val = self._tokenvalue(token) + if new['values'] and new['values'][-1] in (u'-', u'+'): + new['values'][-1] += val + else: + new['values'].append(val) + seq.append(val) + return 'operator' + elif 'operator' == expected: + # expected S but token which is ok + val = self._tokenvalue(token) + if new['values'] and new['values'][-1] in (u'-', u'+'): + new['values'][-1] += val + else: + new['values'].append(u' ') + seq.append(u' ') + new['values'].append(val) + seq.append(val) + return 'operator' + elif expected in ('funcend', ')', ']', '}'): + # a block + seq[-1] += self._tokenvalue(token) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSValue: Unexpected token.', token) + return expected + + def _string_ident_uri_hexcolor(expected, seq, token, tokenizer=None): + # STRING IDENT URI HASH + if expected.startswith('term'): + # normal value + + # TODO: use actual values, probably needs Base2 for this + typ = self._type(token) + if self._prods.STRING == typ: + val = u'"%s"' % self._stringtokenvalue(token) +# elif 'URI' == typ: +# val = u'url(%s)' % self._uritokenvalue(token) + else: + val = self._tokenvalue(token) + + new['values'].append(val) + seq.append(val) + return 'operator' + elif 'operator' == expected: + # expected S but still ok + + # TODO: use actual values, probably needs Base2 for this + typ = self._type(token) + if self._prods.STRING == typ: + val = u'"%s"' % self._stringtokenvalue(token) +# elif 'URI' == typ: +# val = u'url(%s)' % self._uritokenvalue(token) + else: + val = self._tokenvalue(token) + new['values'].append(u' ') + new['values'].append(val) + seq.append(u' ') + seq.append(val) + return 'operator' + elif expected in ('funcend', ')', ']', '}'): + # a block + seq[-1] += self._tokenvalue(token) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSValue: Unexpected token.', token) + return expected + + def _function(expected, seq, token, tokenizer=None): + # FUNCTION + if expected.startswith('term'): + # normal value but add if funcend if found + seq.append(self._tokenvalue(token)) + return 'funcend' + elif 'operator' == expected: + # normal value but add if funcend if found + seq.append(u' ') + seq.append(self._tokenvalue(token)) + return 'funcend' + elif expected in ('funcend', ')', ']', '}'): + # a block + seq[-1] += self._tokenvalue(token) + return expected + else: + new['wellformed'] = False + self._log.error(u'CSSValue: Unexpected token.', token) + return expected + + tokenizer = self._tokenize2(cssText) + + linetoken = self._nexttoken(tokenizer) + if not linetoken: + self._log.error(u'CSSValue: Unknown syntax or no value: %r.' % + self._valuestr(cssText)) + else: + # TODO: not very efficient tokenizing twice! + tokenizer = self._tokenize2(cssText) + newseq = [] + wellformed, expected = self._parse(expected='term', + seq=newseq, tokenizer=tokenizer, + productions={'S': _S, + 'CHAR': _char, + + 'NUMBER': _number_percentage_dimension, + 'PERCENTAGE': _number_percentage_dimension, + 'DIMENSION': _number_percentage_dimension, + + 'STRING': _string_ident_uri_hexcolor, + 'IDENT': _string_ident_uri_hexcolor, + 'URI': _string_ident_uri_hexcolor, + 'HASH': _string_ident_uri_hexcolor, + 'UNICODE-RANGE': _string_ident_uri_hexcolor, #? + + 'FUNCTION': _function + }) + + wellformed = wellformed and new['wellformed'] + + # post conditions + if expected.startswith('term') and newseq and newseq[-1] != u' ' or ( + expected in ('funcend', ')', ']', '}')): + wellformed = False + self._log.error(u'CSSValue: Incomplete value: %r.' % + self._valuestr(cssText)) + + if not new['values']: + wellformed = False + self._log.error(u'CSSValue: Unknown syntax or no value: %r.' % + self._valuestr(cssText)) + + else: + self._linetoken = linetoken # used for line report + self.seq = newseq + self.valid = False + + self._validate() + + if len(new['values']) == 1 and new['values'][0] == u'inherit': + self._value = u'inherit' + self._cssValueType = CSSValue.CSS_INHERIT + self.__class__ = CSSValue # reset + elif len(new['values']) == 1: + self.__class__ = CSSPrimitiveValue + self._init() #inits CSSPrimitiveValue + elif len(new['values']) > 1 and\ + len(new['values']) == new['commas'] + 1: + # e.g. value for font-family: a, b + self.__class__ = CSSPrimitiveValue + self._init() #inits CSSPrimitiveValue + elif len(new['values']) > 1: + # separated by S + self.__class__ = CSSValueList + self._init() # inits CSSValueList + else: + self._cssValueType = CSSValue.CSS_CUSTOM + self.__class__ = CSSValue # reset + + self.wellformed = wellformed + + cssText = property(_getCssText, _setCssText, + doc="A string representation of the current value.") + + def _getCssValueType(self): + if hasattr(self, '_cssValueType'): + return self._cssValueType + + cssValueType = property(_getCssValueType, + doc="A (readonly) code defining the type of the value as defined above.") + + def _getCssValueTypeString(self): + t = self.cssValueType + if t is not None: # may be 0! + return CSSValue._typestrings[t] + else: + return None + + cssValueTypeString = property(_getCssValueTypeString, + doc="cssutils: Name of cssValueType of this CSSValue (readonly).") + + def _validate(self): + """ + validates value against _propertyName context if given + """ + if self._value: + if self._propertyName in cssproperties.cssvalues: + if cssproperties.cssvalues[self._propertyName](self._value): + self.valid = True + else: + self.valid = False + self._log.warn( + u'CSSValue: Invalid value for CSS2 property %r: %r' % + (self._propertyName, self._value), neverraise=True) + else: + self._log.debug( + u'CSSValue: Unable to validate as no or unknown property context set for this value: %r' + % self._value, neverraise=True) + + def _get_propertyName(self): + return self.__propertyName + + def _set_propertyName(self, _propertyName): + self.__propertyName = _propertyName + self._validate() + + _propertyName = property(_get_propertyName, _set_propertyName, + doc="cssutils: Property this values is validated against") + + def __repr__(self): + return "cssutils.css.%s(%r, _propertyName=%r)" % ( + self.__class__.__name__, self.cssText, self._propertyName) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.cssValueTypeString, + self.cssText, self._propertyName, self.valid, id(self)) + + +class CSSPrimitiveValue(CSSValue): + """ + represents a single CSS Value. May be used to determine the value of a + specific style property currently set in a block or to set a specific + style property explicitly within the block. Might be obtained from the + getPropertyCSSValue method of CSSStyleDeclaration. + + Conversions are allowed between absolute values (from millimeters to + centimeters, from degrees to radians, and so on) but not between + relative values. (For example, a pixel value cannot be converted to a + centimeter value.) Percentage values can't be converted since they are + relative to the parent value (or another property value). There is one + exception for color percentage values: since a color percentage value + is relative to the range 0-255, a color percentage value can be + converted to a number; (see also the RGBColor interface). + """ + # constant: type of this CSSValue class + cssValueType = CSSValue.CSS_PRIMITIVE_VALUE + + # An integer indicating which type of unit applies to the value. + CSS_UNKNOWN = 0 # only obtainable via cssText + CSS_NUMBER = 1 + CSS_PERCENTAGE = 2 + CSS_EMS = 3 + CSS_EXS = 4 + CSS_PX = 5 + CSS_CM = 6 + CSS_MM = 7 + CSS_IN = 8 + CSS_PT = 9 + CSS_PC = 10 + CSS_DEG = 11 + CSS_RAD = 12 + CSS_GRAD = 13 + CSS_MS = 14 + CSS_S = 15 + CSS_HZ = 16 + CSS_KHZ = 17 + CSS_DIMENSION = 18 + CSS_STRING = 19 + CSS_URI = 20 + CSS_IDENT = 21 + CSS_ATTR = 22 + CSS_COUNTER = 23 + CSS_RECT = 24 + CSS_RGBCOLOR = 25 + # NOT OFFICIAL: + CSS_RGBACOLOR = 26 + + _floattypes = [CSS_NUMBER, CSS_PERCENTAGE, CSS_EMS, CSS_EXS, + CSS_PX, CSS_CM, CSS_MM, CSS_IN, CSS_PT, CSS_PC, + CSS_DEG, CSS_RAD, CSS_GRAD, CSS_MS, CSS_S, + CSS_HZ, CSS_KHZ, CSS_DIMENSION + ] + _stringtypes = [CSS_ATTR, CSS_IDENT, CSS_STRING, CSS_URI] + _countertypes = [CSS_COUNTER] + _recttypes = [CSS_RECT] + _rbgtypes = [CSS_RGBCOLOR, CSS_RGBACOLOR] + + _reNumDim = re.compile(ur'^(.*?)([a-z]+|%)$', re.I| re.U|re.X) + + # oldtype: newType: converterfunc + _converter = { + # cm <-> mm <-> in, 1 inch is equal to 2.54 centimeters. + # pt <-> pc, the points used by CSS 2.1 are equal to 1/72nd of an inch. + # pc: picas - 1 pica is equal to 12 points + (CSS_CM, CSS_MM): lambda x: x * 10, + (CSS_MM, CSS_CM): lambda x: x / 10, + + (CSS_PT, CSS_PC): lambda x: x * 12, + (CSS_PC, CSS_PT): lambda x: x / 12, + + (CSS_CM, CSS_IN): lambda x: x / 2.54, + (CSS_IN, CSS_CM): lambda x: x * 2.54, + (CSS_MM, CSS_IN): lambda x: x / 25.4, + (CSS_IN, CSS_MM): lambda x: x * 25.4, + + (CSS_IN, CSS_PT): lambda x: x / 72, + (CSS_PT, CSS_IN): lambda x: x * 72, + (CSS_CM, CSS_PT): lambda x: x / 2.54 / 72, + (CSS_PT, CSS_CM): lambda x: x * 72 * 2.54, + (CSS_MM, CSS_PT): lambda x: x / 25.4 / 72, + (CSS_PT, CSS_MM): lambda x: x * 72 * 25.4, + + (CSS_IN, CSS_PC): lambda x: x / 72 / 12, + (CSS_PC, CSS_IN): lambda x: x * 12 * 72, + (CSS_CM, CSS_PC): lambda x: x / 2.54 / 72 / 12, + (CSS_PC, CSS_CM): lambda x: x * 12 * 72 * 2.54, + (CSS_MM, CSS_PC): lambda x: x / 25.4 / 72 / 12, + (CSS_PC, CSS_MM): lambda x: x * 12 * 72 * 25.4, + + # hz <-> khz + (CSS_KHZ, CSS_HZ): lambda x: x * 1000, + (CSS_HZ, CSS_KHZ): lambda x: x / 1000, + # s <-> ms + (CSS_S, CSS_MS): lambda x: x * 1000, + (CSS_MS, CSS_S): lambda x: x / 1000 + + # TODO: convert deg <-> rad <-> grad + } + + def __init__(self, cssText=None, readonly=False, _propertyName=None): + """ + see CSSPrimitiveValue.__init__() + """ + super(CSSPrimitiveValue, self).__init__(cssText=cssText, + readonly=readonly, + _propertyName=_propertyName) + + #(String representation for unit types, token type of unit type, detail) + # used to detect primitiveType and for __repr__ + self._init() + + def _init(self): + # _unitinfos must be set here as self._prods is not known before + self._unitinfos = [ + ('CSS_UNKNOWN', None, None), + ('CSS_NUMBER', self._prods.NUMBER, None), + ('CSS_PERCENTAGE', self._prods.PERCENTAGE, None), + ('CSS_EMS', self._prods.DIMENSION, 'em'), + ('CSS_EXS', self._prods.DIMENSION, 'ex'), + ('CSS_PX', self._prods.DIMENSION, 'px'), + ('CSS_CM', self._prods.DIMENSION, 'cm'), + ('CSS_MM', self._prods.DIMENSION, 'mm'), + ('CSS_IN', self._prods.DIMENSION, 'in'), + ('CSS_PT', self._prods.DIMENSION, 'pt'), + ('CSS_PC', self._prods.DIMENSION, 'pc'), + ('CSS_DEG', self._prods.DIMENSION, 'deg'), + ('CSS_RAD', self._prods.DIMENSION, 'rad'), + ('CSS_GRAD', self._prods.DIMENSION, 'grad'), + ('CSS_MS', self._prods.DIMENSION, 'ms'), + ('CSS_S', self._prods.DIMENSION, 's'), + ('CSS_HZ', self._prods.DIMENSION, 'hz'), + ('CSS_KHZ', self._prods.DIMENSION, 'khz'), + ('CSS_DIMENSION', self._prods.DIMENSION, None), + ('CSS_STRING', self._prods.STRING, None), + ('CSS_URI', self._prods.URI, None), + ('CSS_IDENT', self._prods.IDENT, None), + ('CSS_ATTR', self._prods.FUNCTION, 'attr('), + ('CSS_COUNTER', self._prods.FUNCTION, 'counter('), + ('CSS_RECT', self._prods.FUNCTION, 'rect('), + ('CSS_RGBCOLOR', self._prods.FUNCTION, 'rgb('), + ('CSS_RGBACOLOR', self._prods.FUNCTION, 'rgba('), + ] + + def __set_primitiveType(self): + """ + primitiveType is readonly but is set lazy if accessed + no value is given as self._value is used + """ + primitiveType = self.CSS_UNKNOWN + _floatType = False # if unary expect NUMBER DIMENSION or PERCENTAGE + tokenizer = self._tokenize2(self._value) + t = self._nexttoken(tokenizer) + if not t: + self._log.error(u'CSSPrimitiveValue: No value.') + + # unary operator: + if self._tokenvalue(t) in (u'-', u'+'): + t = self._nexttoken(tokenizer) + if not t: + self._log.error(u'CSSPrimitiveValue: No value.') + + _floatType = True + + # check for font1, "font2" etc which is treated as ONE string + fontstring = 0 # should be at leayst 2 + expected = 'ident or string' + tokenizer = self._tokenize2(self._value) # add used tokens again + for token in tokenizer: + val, typ = self._tokenvalue(token, normalize=True), self._type(token) + if expected == 'ident or string' and typ in ( + self._prods.IDENT, self._prods.STRING): + expected = 'comma' + fontstring += 1 + elif expected == 'comma' and val == ',': + expected = 'ident or string' + fontstring += 1 + elif typ in (self._prods.S, self._prods.COMMENT): + continue + else: + fontstring = False + break + + if fontstring > 2: + # special case: e.g. for font-family: a, b; only COMMA IDENT and STRING + primitiveType = CSSPrimitiveValue.CSS_STRING + elif self._type(t) == self._prods.HASH: + # special case, maybe should be converted to rgb in any case? + primitiveType = CSSPrimitiveValue.CSS_RGBCOLOR + else: + for i, (name, tokentype, search) in enumerate(self._unitinfos): + val, typ = self._tokenvalue(t, normalize=True), self._type(t) + if typ == tokentype: + if typ == self._prods.DIMENSION: + if not search: + primitiveType = i + break + elif re.match(ur'^[^a-z]*(%s)$' % search, val): + primitiveType = i + break + elif typ == self._prods.FUNCTION: + if not search: + primitiveType = i + break + elif val.startswith(search): + primitiveType = i + break + else: + primitiveType = i + break + + if _floatType and primitiveType not in self._floattypes: + # - or + only expected before floattype + primitiveType = self.CSS_UNKNOWN + + self._primitiveType = primitiveType + + def _getPrimitiveType(self): + if not hasattr(self, '_primitivetype'): + self.__set_primitiveType() + return self._primitiveType + + primitiveType = property(_getPrimitiveType, + doc="READONLY: The type of the value as defined by the constants specified above.") + + def _getPrimitiveTypeString(self): + return self._unitinfos[self.primitiveType][0] + + primitiveTypeString = property(_getPrimitiveTypeString, + doc="Name of primitive type of this value.") + + def _getCSSPrimitiveTypeString(self, type): + "get TypeString by given type which may be unknown, used by setters" + try: + return self._unitinfos[type][0] + except (IndexError, TypeError): + return u'%r (UNKNOWN TYPE)' % type + + def __getValDim(self): + "splits self._value in numerical and dimension part" + try: + val, dim = self._reNumDim.findall(self._value)[0] + except IndexError: + val, dim = self._value, u'' + try: + val = float(val) + except ValueError: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: No float value %r' + % (self._value)) + + return val, dim + + def getFloatValue(self, unitType): + """ + (DOM method) This method is used to get a float value in a + specified unit. If this CSS value doesn't contain a float value + or can't be converted into the specified unit, a DOMException + is raised. + + unitType + to get the float value. The unit code can only be a float unit type + (i.e. CSS_NUMBER, CSS_PERCENTAGE, CSS_EMS, CSS_EXS, CSS_PX, CSS_CM, + CSS_MM, CSS_IN, CSS_PT, CSS_PC, CSS_DEG, CSS_RAD, CSS_GRAD, CSS_MS, + CSS_S, CSS_HZ, CSS_KHZ, CSS_DIMENSION). + + returns not necessarily a float but some cases just an int + e.g. if the value is ``1px`` it return ``1`` and **not** ``1.0`` + + conversions might return strange values like 1.000000000001 + """ + if unitType not in self._floattypes: + raise xml.dom.InvalidAccessErr( + u'unitType Parameter is not a float type') + + val, dim = self.__getValDim() + + if self.primitiveType != unitType: + try: + val = self._converter[self.primitiveType, unitType](val) + except KeyError: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: Cannot coerce primitiveType %r to %r' + % (self.primitiveTypeString, + self._getCSSPrimitiveTypeString(unitType))) + + if val == int(val): + val = int(val) + + return val + + def setFloatValue(self, unitType, floatValue): + """ + (DOM method) A method to set the float value with a specified unit. + If the property attached with this value can not accept the + specified unit or the float value, the value will be unchanged and + a DOMException will be raised. + + unitType + a unit code as defined above. The unit code can only be a float + unit type + floatValue + the new float value which does not have to be a float value but + may simple be an int e.g. if setting:: + + setFloatValue(CSS_PX, 1) + + raises DOMException + - INVALID_ACCESS_ERR: Raised if the attached property doesn't + support the float value or the unit type. + - NO_MODIFICATION_ALLOWED_ERR: Raised if this property is readonly. + """ + self._checkReadonly() + if unitType not in self._floattypes: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: unitType %r is not a float type' % + self._getCSSPrimitiveTypeString(unitType)) + try: + val = float(floatValue) + except ValueError, e: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: floatValue %r is not a float' % + floatValue) + + oldval, dim = self.__getValDim() + + if self.primitiveType != unitType: + # convert if possible + try: + val = self._converter[ + unitType, self.primitiveType](val) + except KeyError: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: Cannot coerce primitiveType %r to %r' + % (self.primitiveTypeString, + self._getCSSPrimitiveTypeString(unitType))) + + if val == int(val): + val = int(val) + + self.cssText = '%s%s' % (val, dim) + + def getStringValue(self): + """ + (DOM method) This method is used to get the string value. If the + CSS value doesn't contain a string value, a DOMException is raised. + + Some properties (like 'font-family' or 'voice-family') + convert a whitespace separated list of idents to a string. + + Only the actual value is returned so e.g. all the following return the + actual value ``a``: url(a), attr(a), "a", 'a' + """ + if self.primitiveType not in self._stringtypes: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue %r is not a string type' + % self.primitiveTypeString) + + if CSSPrimitiveValue.CSS_STRING == self.primitiveType: + # _stringtokenvalue expects tuple with at least 2 + return self._stringtokenvalue((None,self._value)) + elif CSSPrimitiveValue.CSS_URI == self.primitiveType: + # _uritokenvalue expects tuple with at least 2 + return self._uritokenvalue((None, self._value)) + elif CSSPrimitiveValue.CSS_ATTR == self.primitiveType: + return self._value[5:-1] + else: + return self._value + + def setStringValue(self, stringType, stringValue): + """ + (DOM method) A method to set the string value with the specified + unit. If the property attached to this value can't accept the + specified unit or the string value, the value will be unchanged and + a DOMException will be raised. + + stringType + a string code as defined above. The string code can only be a + string unit type (i.e. CSS_STRING, CSS_URI, CSS_IDENT, and + CSS_ATTR). + stringValue + the new string value + Only the actual value is expected so for (CSS_URI, "a") the + new value will be ``url(a)``. For (CSS_STRING, "'a'") + the new value will be ``"\\'a\\'"`` as the surrounding ``'`` are + not part of the string value + + raises + DOMException + + - INVALID_ACCESS_ERR: Raised if the CSS value doesn't contain a + string value or if the string value can't be converted into + the specified unit. + + - NO_MODIFICATION_ALLOWED_ERR: Raised if this property is readonly. + """ + self._checkReadonly() + # self not stringType + if self.primitiveType not in self._stringtypes: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue %r is not a string type' + % self.primitiveTypeString) + # given stringType is no StringType + if stringType not in self._stringtypes: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: stringType %s is not a string type' + % self._getCSSPrimitiveTypeString(stringType)) + + if self._primitiveType != stringType: + raise xml.dom.InvalidAccessErr( + u'CSSPrimitiveValue: Cannot coerce primitiveType %r to %r' + % (self.primitiveTypeString, + self._getCSSPrimitiveTypeString(stringType))) + + if CSSPrimitiveValue.CSS_STRING == self._primitiveType: + self.cssText = u'"%s"' % stringValue.replace(u'"', ur'\\"') + elif CSSPrimitiveValue.CSS_URI == self._primitiveType: + # Some characters appearing in an unquoted URI, such as + # parentheses, commas, whitespace characters, single quotes + # (') and double quotes ("), must be escaped with a backslash + # so that the resulting URI value is a URI token: + # '\(', '\)', '\,'. + # + # Here the URI is set in quotes alltogether! + if u'(' in stringValue or\ + u')' in stringValue or\ + u',' in stringValue or\ + u'"' in stringValue or\ + u'\'' in stringValue or\ + u'\n' in stringValue or\ + u'\t' in stringValue or\ + u'\r' in stringValue or\ + u'\f' in stringValue or\ + u' ' in stringValue: + stringValue = '"%s"' % stringValue.replace(u'"', ur'\"') + self.cssText = u'url(%s)' % stringValue + elif CSSPrimitiveValue.CSS_ATTR == self._primitiveType: + self.cssText = u'attr(%s)' % stringValue + else: + self.cssText = stringValue + self._primitiveType = stringType + + def getCounterValue(self): + """ + (DOM method) This method is used to get the Counter value. If + this CSS value doesn't contain a counter value, a DOMException + is raised. Modification to the corresponding style property + can be achieved using the Counter interface. + """ + if not self.CSS_COUNTER == self.primitiveType: + raise xml.dom.InvalidAccessErr(u'Value is not a counter type') + # TODO: use Counter class + raise NotImplementedError() + + def getRGBColorValue(self): + """ + (DOM method) This method is used to get the RGB color. If this + CSS value doesn't contain a RGB color value, a DOMException + is raised. Modification to the corresponding style property + can be achieved using the RGBColor interface. + """ + # TODO: what about coercing #000 to RGBColor? + if self.primitiveType not in self._rbgtypes: + raise xml.dom.InvalidAccessErr(u'Value is not a RGB value') + # TODO: use RGBColor class + raise NotImplementedError() + + def getRectValue(self): + """ + (DOM method) This method is used to get the Rect value. If this CSS + value doesn't contain a rect value, a DOMException is raised. + Modification to the corresponding style property can be achieved + using the Rect interface. + """ + if self.primitiveType not in self._recttypes: + raise xml.dom.InvalidAccessErr(u'value is not a Rect value') + # TODO: use Rect class + raise NotImplementedError() + + def __str__(self): + return "" % ( + self.__class__.__name__, self.primitiveTypeString, + self.cssText, self._propertyName, self.valid, id(self)) + + +class CSSValueList(CSSValue): + """ + The CSSValueList interface provides the abstraction of an ordered + collection of CSS values. + + Some properties allow an empty list into their syntax. In that case, + these properties take the none identifier. So, an empty list means + that the property has the value none. + + The items in the CSSValueList are accessible via an integral index, + starting from 0. + """ + cssValueType = CSSValue.CSS_VALUE_LIST + + def __init__(self, cssText=None, readonly=False, _propertyName=None): + """ + inits a new CSSValueList + """ + super(CSSValueList, self).__init__(cssText=cssText, + readonly=readonly, + _propertyName=_propertyName) + self._init() + + def _init(self): + "called by CSSValue if newly identified as CSSValueList" + # defines which values + ivalueseq, valueseq = 0, self._SHORTHANDPROPERTIES.get( + self._propertyName, []) + self._items = [] + newseq = [] + i, max = 0, len(self.seq) + minus = None + while i < max: + v = self.seq[i] + + if u'-' == v: + if minus: # 2 "-" after another + self._log.error( + u'CSSValueList: Unknown syntax: %r.' + % u''.join(self.seq)) + else: + minus = v + + elif isinstance(v, basestring) and not v.strip() == u'' and\ + not u'/' == v: + if minus: + v = minus + v + minus = None + # TODO: complete + # if shorthand get new propname + if ivalueseq < len(valueseq): + propname, mandatory = valueseq[ivalueseq] + if mandatory: + ivalueseq += 1 + else: + propname = None + ivalueseq = len(valueseq) # end + else: + propname = self._propertyName + + # TODO: more (do not check individual values for these props) + if propname in self._SHORTHANDPROPERTIES: + propname = None + + if i+1 < max and self.seq[i+1] == u',': + # a comma separated list of values as ONE value + # e.g. font-family: a,b + fullvalue = [v] + + expected = 'comma' # or 'value' + for j in range(i+1, max): + testv = self.seq[j] + if u' ' == testv: # a single value follows + break + elif testv in ('-', '+') and expected == 'value': + # unary modifier + fullvalue.append(testv) + expected = 'value' + elif u',' == testv and expected == 'comma': + fullvalue.append(testv) + expected = 'value' + elif u',' != testv and expected == 'value': + fullvalue.append(testv) + expected = 'comma' + else: + self._log.error( + u'CSSValueList: Unknown syntax: %r.' + % testv) + return + if expected == 'value': + self._log.error( + u'CSSValueList: Unknown syntax: %r.' + % u''.join(self.seq)) + return + # setting _propertyName this way does not work + # for compound props like font! + i += len(fullvalue) - 1 + o = CSSValue(cssText=u''.join(fullvalue), + _propertyName=propname) + else: + # a single value, u' ' or nothing should be following + o = CSSValue(cssText=v, _propertyName=propname) + + self._items.append(o) + newseq.append(o) + + else: + # S (or TODO: comment?) + newseq.append(v) + + i += 1 + + self.seq = newseq + + length = property(lambda self: len(self._items), + doc="(DOM attribute) The number of CSSValues in the list.") + + def item(self, index): + """ + (DOM method) Used to retrieve a CSSValue by ordinal index. The + order in this collection represents the order of the values in the + CSS style property. If index is greater than or equal to the number + of values in the list, this returns None. + """ + try: + return self._items[index] + except IndexError: + return None + + def __iter__(self): + "CSSValueList is iterable" + return CSSValueList.__items(self) + + def __items(self): + "the iterator" + for i in range (0, self.length): + yield self.item(i) + + def __str_(self): + return "" % ( + self.__class__.__name__, self.length, id(self)) diff --git a/src/calibre/utils/cssutils/css/property.py b/src/calibre/utils/cssutils/css/property.py new file mode 100644 index 0000000000..413a495b53 --- /dev/null +++ b/src/calibre/utils/cssutils/css/property.py @@ -0,0 +1,414 @@ +"""Property is a single CSS property in a CSSStyleDeclaration + +Internal use only, may be removed in the future! +""" +__all__ = ['Property'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: property.py 1305 2008-06-22 18:42:51Z cthedot $' + +import xml.dom +import cssutils +import cssproperties +from cssvalue import CSSValue +from cssutils.helper import Deprecated + +class Property(cssutils.util.Base): + """ + (cssutils) a CSS property in a StyleDeclaration of a CSSStyleRule + + Properties + ========== + cssText + a parsable textual representation of this property + name + normalized name of the property, e.g. "color" when name is "c\olor" + (since 0.9.5) + literalname (since 0.9.5) + original name of the property in the source CSS which is not normalized + e.g. "C\\OLor" + cssValue + the relevant CSSValue instance for this property + value + the string value of the property, same as cssValue.cssText + priority + of the property (currently only u"important" or None) + literalpriority + original priority of the property in the source CSS which is not + normalized e.g. "IM\portant" + seqs + combination of a list for seq of name, a CSSValue object, and + a list for seq of priority (empty or [!important] currently) + valid + if this Property is valid + wellformed + if this Property is syntactically ok + + DEPRECATED normalname (since 0.9.5) + normalized name of the property, e.g. "color" when name is "c\olor" + + Format + ====== + :: + + property = name + : IDENT S* + ; + + expr = value + : term [ operator term ]* + ; + term + : unary_operator? + [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | + TIME S* | FREQ S* | function ] + | STRING S* | IDENT S* | URI S* | hexcolor + ; + function + : FUNCTION S* expr ')' S* + ; + /* + * There is a constraint on the color that it must + * have either 3 or 6 hex-digits (i.e., [0-9a-fA-F]) + * after the "#"; e.g., "#000" is OK, but "#abcd" is not. + */ + hexcolor + : HASH S* + ; + + prio + : IMPORTANT_SYM S* + ; + + """ + def __init__(self, name=None, value=None, priority=u'', _mediaQuery=False): + """ + inits property + + name + a property name string (will be normalized) + value + a property value string + priority + an optional priority string which currently must be u'', + u'!important' or u'important' + _mediaQuery boolean + if True value is optional as used by MediaQuery objects + """ + super(Property, self).__init__() + + self.seqs = [[], None, []] + self.valid = False + self.wellformed = False + self._mediaQuery = _mediaQuery + + if name: + self.name = name + else: + self._name = u'' + self._literalname = u'' + self.__normalname = u'' # DEPRECATED + + if value: + self.cssValue = value + else: + self.seqs[1] = CSSValue() + + if priority: + self.priority = priority + else: + self._priority = u'' + self._literalpriority = u'' + + def _getCssText(self): + """ + returns serialized property cssText + """ + return cssutils.ser.do_Property(self) + + def _setCssText(self, cssText): + """ + DOMException on setting + + - NO_MODIFICATION_ALLOWED_ERR: (CSSRule) + Raised if the rule is readonly. + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error and + is unparsable. + """ + # check and prepare tokenlists for setting + tokenizer = self._tokenize2(cssText) + nametokens = self._tokensupto2(tokenizer, propertynameendonly=True) + if nametokens: + wellformed = True + + valuetokens = self._tokensupto2(tokenizer, + propertyvalueendonly=True) + prioritytokens = self._tokensupto2(tokenizer, + propertypriorityendonly=True) + + if self._mediaQuery and not valuetokens: + # MediaQuery may consist of name only + self.name = nametokens + self.cssValue = None + self.priority = None + return + + # remove colon from nametokens + colontoken = nametokens.pop() + if self._tokenvalue(colontoken) != u':': + wellformed = False + self._log.error(u'Property: No ":" after name found: %r' % + self._valuestr(cssText), colontoken) + elif not nametokens: + wellformed = False + self._log.error(u'Property: No property name found: %r.' % + self._valuestr(cssText), colontoken) + + if valuetokens: + if self._tokenvalue(valuetokens[-1]) == u'!': + # priority given, move "!" to prioritytokens + prioritytokens.insert(0, valuetokens.pop(-1)) + else: + wellformed = False + self._log.error(u'Property: No property value found: %r.' % + self._valuestr(cssText), colontoken) + + if wellformed: + self.wellformed = True + self.name = nametokens + self.cssValue = valuetokens + self.priority = prioritytokens + + else: + self._log.error(u'Property: No property name found: %r.' % + self._valuestr(cssText)) + + cssText = property(fget=_getCssText, fset=_setCssText, + doc="A parsable textual representation.") + + def _setName(self, name): + """ + DOMException on setting + + - SYNTAX_ERR: (self) + Raised if the specified name has a syntax error and is + unparsable. + """ + # for closures: must be a mutable + new = {'literalname': None, + 'wellformed': True} + + def _ident(expected, seq, token, tokenizer=None): + # name + if 'name' == expected: + new['literalname'] = self._tokenvalue(token).lower() + seq.append(new['literalname']) + return 'EOF' + else: + new['wellformed'] = False + self._log.error(u'Property: Unexpected ident.', token) + return expected + + newseq = [] + wellformed, expected = self._parse(expected='name', + seq=newseq, + tokenizer=self._tokenize2(name), + productions={'IDENT': _ident}) + wellformed = wellformed and new['wellformed'] + + # post conditions + # define a token for error logging + if isinstance(name, list): + token = name[0] + else: + token = None + + if not new['literalname']: + wellformed = False + self._log.error(u'Property: No name found: %r' % + self._valuestr(name), token=token) + + if wellformed: + self.wellformed = True + self._literalname = new['literalname'] + self._name = self._normalize(self._literalname) + self.__normalname = self._name # DEPRECATED + self.seqs[0] = newseq + + # validate + if self._name not in cssproperties.cssvalues: + self.valid = False + tokenizer=self._tokenize2(name) + self._log.info(u'Property: No CSS2 Property: %r.' % + new['literalname'], token=token, neverraise=True) + else: + self.valid = True + if self.cssValue: + self.cssValue._propertyName = self._name + self.valid = self.cssValue.valid + else: + self.wellformed = False + + name = property(lambda self: self._name, _setName, + doc="Name of this property") + + literalname = property(lambda self: self._literalname, + doc="Readonly literal (not normalized) name of this property") + + def _getCSSValue(self): + return self.seqs[1] + + def _setCSSValue(self, cssText): + """ + see css.CSSValue + + DOMException on setting? + + - SYNTAX_ERR: (self) + Raised if the specified CSS string value has a syntax error + (according to the attached property) or is unparsable. + - TODO: INVALID_MODIFICATION_ERR: + Raised if the specified CSS string value represents a different + type of values than the values allowed by the CSS property. + """ + if self._mediaQuery and not cssText: + self.seqs[1] = CSSValue() + else: + if not self.seqs[1]: + self.seqs[1] = CSSValue() + + cssvalue = self.seqs[1] + cssvalue._propertyName = self.name + cssvalue.cssText = cssText + if cssvalue._value and cssvalue.wellformed: + self.seqs[1] = cssvalue + self.valid = self.valid and cssvalue.valid + self.wellformed = self.wellformed and cssvalue.wellformed + + cssValue = property(_getCSSValue, _setCSSValue, + doc="(cssutils) CSSValue object of this property") + + def _getValue(self): + if self.cssValue: + return self.cssValue._value + else: + return u'' + + def _setValue(self, value): + self.cssValue.cssText = value + self.valid = self.valid and self.cssValue.valid + self.wellformed = self.wellformed and self.cssValue.wellformed + + value = property(_getValue, _setValue, + doc="The textual value of this Properties cssValue.") + + def _setPriority(self, priority): + """ + priority + a string, currently either u'', u'!important' or u'important' + + Format + ====== + :: + + prio + : IMPORTANT_SYM S* + ; + + "!"{w}"important" {return IMPORTANT_SYM;} + + DOMException on setting + + - SYNTAX_ERR: (self) + Raised if the specified priority has a syntax error and is + unparsable. + In this case a priority not equal to None, "" or "!{w}important". + As CSSOM defines CSSStyleDeclaration.getPropertyPriority resulting in + u'important' this value is also allowed to set a Properties priority + """ + if self._mediaQuery: + self._priority = u'' + self._literalpriority = u'' + if priority: + self._log.error(u'Property: No priority in a MediaQuery - ignored.') + return + + if isinstance(priority, basestring) and\ + u'important' == self._normalize(priority): + priority = u'!%s' % priority + + # for closures: must be a mutable + new = {'literalpriority': u'', + 'wellformed': True} + + def _char(expected, seq, token, tokenizer=None): + # "!" + val = self._tokenvalue(token) + if u'!' == expected == val: + seq.append(val) + return 'important' + else: + new['wellformed'] = False + self._log.error(u'Property: Unexpected char.', token) + return expected + + def _ident(expected, seq, token, tokenizer=None): + # "important" + val = self._tokenvalue(token) + normalval = self._tokenvalue(token, normalize=True) + if 'important' == expected == normalval: + new['literalpriority'] = val + seq.append(val) + return 'EOF' + else: + new['wellformed'] = False + self._log.error(u'Property: Unexpected ident.', token) + return expected + + newseq = [] + wellformed, expected = self._parse(expected='!', + seq=newseq, + tokenizer=self._tokenize2(priority), + productions={'CHAR': _char, + 'IDENT': _ident}) + wellformed = wellformed and new['wellformed'] + + # post conditions + if priority and not new['literalpriority']: + wellformed = False + self._log.info(u'Property: Invalid priority: %r.' % + self._valuestr(priority)) + + if wellformed: + self.wellformed = self.wellformed and wellformed + self._literalpriority = new['literalpriority'] + self._priority = self._normalize(self.literalpriority) + self.seqs[2] = newseq + + # validate + if self._priority not in (u'', u'important'): + self.valid = False + self._log.info(u'Property: No CSS2 priority value: %r.' % + self._priority, neverraise=True) + + priority = property(lambda self: self._priority, _setPriority, + doc="(cssutils) Priority of this property") + + literalpriority = property(lambda self: self._literalpriority, + doc="Readonly literal (not normalized) priority of this property") + + def __repr__(self): + return "cssutils.css.%s(name=%r, value=%r, priority=%r)" % ( + self.__class__.__name__, + self.literalname, self.cssValue.cssText, self.priority) + + def __str__(self): + return "<%s.%s object name=%r value=%r priority=%r at 0x%x>" % ( + self.__class__.__module__, self.__class__.__name__, + self.name, self.cssValue.cssText, self.priority, id(self)) + + @Deprecated(u'Use property ``name`` instead (since cssutils 0.9.5).') + def _getNormalname(self): + return self.__normalname + normalname = property(_getNormalname, + doc="DEPRECATED since 0.9.5, use name instead") \ No newline at end of file diff --git a/src/calibre/utils/cssutils/css/selector.py b/src/calibre/utils/cssutils/css/selector.py new file mode 100644 index 0000000000..1c8662b4ff --- /dev/null +++ b/src/calibre/utils/cssutils/css/selector.py @@ -0,0 +1,800 @@ +"""Selector is a single Selector of a CSSStyleRule SelectorList. + +Partly implements + http://www.w3.org/TR/css3-selectors/ + +TODO + - .contains(selector) + - .isSubselector(selector) +""" +__all__ = ['Selector'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: selector.py 1429 2008-08-11 19:01:52Z cthedot $' + +import xml.dom +import cssutils +from cssutils.util import _SimpleNamespaces + +class Selector(cssutils.util.Base2): + """ + (cssutils) a single selector in a SelectorList of a CSSStyleRule + + Properties + ========== + element + Effective element target of this selector + parentList: of type SelectorList, readonly + The SelectorList that contains this selector or None if this + Selector is not attached to a SelectorList. + selectorText + textual representation of this Selector + seq + sequence of Selector parts including comments + specificity (READONLY) + tuple of (a, b, c, d) where: + + a + presence of style in document, always 0 if not used on a document + b + number of ID selectors + c + number of .class selectors + d + number of Element (type) selectors + + wellformed + if this selector is wellformed regarding the Selector spec + + Format + ====== + :: + + # implemented in SelectorList + selectors_group + : selector [ COMMA S* selector ]* + ; + + selector + : simple_selector_sequence [ combinator simple_selector_sequence ]* + ; + + combinator + /* combinators can be surrounded by white space */ + : PLUS S* | GREATER S* | TILDE S* | S+ + ; + + simple_selector_sequence + : [ type_selector | universal ] + [ HASH | class | attrib | pseudo | negation ]* + | [ HASH | class | attrib | pseudo | negation ]+ + ; + + type_selector + : [ namespace_prefix ]? element_name + ; + + namespace_prefix + : [ IDENT | '*' ]? '|' + ; + + element_name + : IDENT + ; + + universal + : [ namespace_prefix ]? '*' + ; + + class + : '.' IDENT + ; + + attrib + : '[' S* [ namespace_prefix ]? IDENT S* + [ [ PREFIXMATCH | + SUFFIXMATCH | + SUBSTRINGMATCH | + '=' | + INCLUDES | + DASHMATCH ] S* [ IDENT | STRING ] S* + ]? ']' + ; + + pseudo + /* '::' starts a pseudo-element, ':' a pseudo-class */ + /* Exceptions: :first-line, :first-letter, :before and :after. */ + /* Note that pseudo-elements are restricted to one per selector and */ + /* occur only in the last simple_selector_sequence. */ + : ':' ':'? [ IDENT | functional_pseudo ] + ; + + functional_pseudo + : FUNCTION S* expression ')' + ; + + expression + /* In CSS3, the expressions are identifiers, strings, */ + /* or of the form "an+b" */ + : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ + ; + + negation + : NOT S* negation_arg S* ')' + ; + + negation_arg + : type_selector | universal | HASH | class | attrib | pseudo + ; + + """ + def __init__(self, selectorText=None, parentList=None, + readonly=False): + """ + :Parameters: + selectorText + initial value of this selector + parentList + a SelectorList + readonly + default to False + """ + super(Selector, self).__init__() + + self.__namespaces = _SimpleNamespaces(log=self._log) + self._element = None + self._parent = parentList + self._specificity = (0, 0, 0, 0) + + if selectorText: + self.selectorText = selectorText + + self._readonly = readonly + + def __getNamespaces(self): + "uses own namespaces if not attached to a sheet, else the sheet's ones" + try: + return self._parent.parentRule.parentStyleSheet.namespaces + except AttributeError: + return self.__namespaces + + _namespaces = property(__getNamespaces, doc="""if this Selector is attached + to a CSSStyleSheet the namespaces of that sheet are mirrored here. + While the Selector (or parent SelectorList or parentRule(s) of that are + not attached a own dict of {prefix: namespaceURI} is used.""") + + + element = property(lambda self: self._element, + doc=u"Effective element target of this selector.") + + parentList = property(lambda self: self._parent, + doc="(DOM) The SelectorList that contains this Selector or\ + None if this Selector is not attached to a SelectorList.") + + def _getSelectorText(self): + """ + returns serialized format + """ + return cssutils.ser.do_css_Selector(self) + + def _setSelectorText(self, selectorText): + """ + :param selectorText: + parsable string or a tuple of (selectorText, dict-of-namespaces). + Given namespaces are ignored if this object is attached to a + CSSStyleSheet! + + :Exceptions: + - `NAMESPACE_ERR`: (self) + Raised if the specified selector uses an unknown namespace + prefix. + - `SYNTAX_ERR`: (self) + Raised if the specified CSS string value has a syntax error + and is unparsable. + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if this rule is readonly. + """ + self._checkReadonly() + + # might be (selectorText, namespaces) + selectorText, namespaces = self._splitNamespacesOff(selectorText) + + try: + # uses parent stylesheets namespaces if available, otherwise given ones + namespaces = self.parentList.parentRule.parentStyleSheet.namespaces + except AttributeError: + pass + tokenizer = self._tokenize2(selectorText) + if not tokenizer: + self._log.error(u'Selector: No selectorText given.') + else: + # prepare tokenlist: + # "*" -> type "universal" + # "*"|IDENT + "|" -> combined to "namespace_prefix" + # "|" -> type "namespace_prefix" + # "." + IDENT -> combined to "class" + # ":" + IDENT, ":" + FUNCTION -> pseudo-class + # FUNCTION "not(" -> negation + # "::" + IDENT, "::" + FUNCTION -> pseudo-element + tokens = [] + for t in tokenizer: + typ, val, lin, col = t + if val == u':' and tokens and\ + self._tokenvalue(tokens[-1]) == ':': + # combine ":" and ":" + tokens[-1] = (typ, u'::', lin, col) + + elif typ == 'IDENT' and tokens\ + and self._tokenvalue(tokens[-1]) == u'.': + # class: combine to .IDENT + tokens[-1] = ('class', u'.'+val, lin, col) + elif typ == 'IDENT' and tokens and \ + self._tokenvalue(tokens[-1]).startswith(u':') and\ + not self._tokenvalue(tokens[-1]).endswith(u'('): + # pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b" + if self._tokenvalue(tokens[-1]).startswith(u'::'): + t = 'pseudo-element' + else: + t = 'pseudo-class' + tokens[-1] = (t, self._tokenvalue(tokens[-1])+val, lin, col) + + elif typ == 'FUNCTION' and val == u'not(' and tokens and \ + u':' == self._tokenvalue(tokens[-1]): + tokens[-1] = ('negation', u':' + val, lin, tokens[-1][3]) + elif typ == 'FUNCTION' and tokens\ + and self._tokenvalue(tokens[-1]).startswith(u':'): + # pseudo-X: combine to :FUNCTION( or ::FUNCTION( + if self._tokenvalue(tokens[-1]).startswith(u'::'): + t = 'pseudo-element' + else: + t = 'pseudo-class' + tokens[-1] = (t, self._tokenvalue(tokens[-1])+val, lin, col) + + elif val == u'*' and tokens and\ + self._type(tokens[-1]) == 'namespace_prefix' and\ + self._tokenvalue(tokens[-1]).endswith(u'|'): + # combine prefix|* + tokens[-1] = ('universal', self._tokenvalue(tokens[-1])+val, + lin, col) + elif val == u'*': + # universal: "*" + tokens.append(('universal', val, lin, col)) + + elif val == u'|' and tokens and\ + self._type(tokens[-1]) in (self._prods.IDENT, 'universal') and\ + self._tokenvalue(tokens[-1]).find(u'|') == -1: + # namespace_prefix: "IDENT|" or "*|" + tokens[-1] = ('namespace_prefix', + self._tokenvalue(tokens[-1])+u'|', lin, col) + elif val == u'|': + # namespace_prefix: "|" + tokens.append(('namespace_prefix', val, lin, col)) + + else: + tokens.append(t) + + # TODO: back to generator but not elegant at all! + tokenizer = (t for t in tokens) + + # for closures: must be a mutable + new = {'context': [''], # stack of: 'attrib', 'negation', 'pseudo' + 'element': None, + '_PREFIX': None, + 'specificity': [0, 0, 0, 0], # mutable, finally a tuple! + 'wellformed': True + } + # used for equality checks and setting of a space combinator + S = u' ' + + def append(seq, val, typ=None, token=None): + """ + appends to seq + + namespace_prefix, IDENT will be combined to a tuple + (prefix, name) where prefix might be None, the empty string + or a prefix. + + Saved are also: + - specificity definition: style, id, class/att, type + - element: the element this Selector is for + """ + context = new['context'][-1] + if token: + line, col = token[2], token[3] + else: + line, col = None, None + + if typ == '_PREFIX': + # SPECIAL TYPE: save prefix for combination with next + new['_PREFIX'] = val[:-1] + # handle next time + return + + if new['_PREFIX'] is not None: + # as saved from before and reset to None + prefix, new['_PREFIX'] = new['_PREFIX'], None + elif typ == 'universal' and '|' in val: + # val == *|* or prefix|* + prefix, val = val.split('|') + else: + prefix = None + + # namespace + if (typ.endswith('-selector') or typ == 'universal') and not ( + 'attribute-selector' == typ and not prefix): + # att **IS NOT** in default ns + if prefix == u'*': + # *|name: in ANY_NS + namespaceURI = cssutils._ANYNS + elif prefix is None: + # e or *: default namespace with prefix u'' or local-name() + namespaceURI = namespaces.get(u'', None) + elif prefix == u'': + # |name or |*: in no (or the empty) namespace + namespaceURI = u'' + else: + # explicit namespace prefix + # does not raise KeyError, see _SimpleNamespaces + namespaceURI = namespaces[prefix] + + if namespaceURI is None: + new['wellformed'] = False + self._log.error( + u'Selector: No namespaceURI found for prefix %r' % + prefix, token=token, error=xml.dom.NamespaceErr) + return + + # val is now (namespaceprefix, name) tuple + val = (namespaceURI, val) + + # specificity + if not context or context == 'negation': + if 'id' == typ: + new['specificity'][1] += 1 + elif 'class' == typ or '[' == val: + new['specificity'][2] += 1 + elif typ in ('type-selector', 'negation-type-selector', + 'pseudo-element'): + new['specificity'][3] += 1 + if not context and typ in ('type-selector', 'universal'): + # define element + new['element'] = val + + seq.append(val, typ, line=line, col=col) + + # expected constants + simple_selector_sequence = 'type_selector universal HASH class attrib pseudo negation ' + simple_selector_sequence2 = 'HASH class attrib pseudo negation ' + + element_name = 'element_name' + + negation_arg = 'type_selector universal HASH class attrib pseudo' + negationend = ')' + + attname = 'prefix attribute' + attname2 = 'attribute' + attcombinator = 'combinator ]' # optional + attvalue = 'value' # optional + attend = ']' + + expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT' + expression = expressionstart + ' )' + + combinator = ' combinator' + + def _COMMENT(expected, seq, token, tokenizer=None): + "special implementation for comment token" + append(seq, cssutils.css.CSSComment([token]), 'COMMENT', + token=token) + return expected + + def _S(expected, seq, token, tokenizer=None): + # S + context = new['context'][-1] + if context.startswith('pseudo-'): + if seq and seq[-1].value not in u'+-': + # e.g. x:func(a + b) + append(seq, S, 'S', token=token) + return expected + + elif context != 'attrib' and 'combinator' in expected: + append(seq, S, 'descendant', token=token) + return simple_selector_sequence + combinator + + else: + return expected + + def _universal(expected, seq, token, tokenizer=None): + # *|* or prefix|* + context = new['context'][-1] + val = self._tokenvalue(token) + if 'universal' in expected: + append(seq, val, 'universal', token=token) + + if 'negation' == context: + return negationend + else: + return simple_selector_sequence2 + combinator + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected universal.', token=token) + return expected + + def _namespace_prefix(expected, seq, token, tokenizer=None): + # prefix| => element_name + # or prefix| => attribute_name if attrib + context = new['context'][-1] + val = self._tokenvalue(token) + if 'attrib' == context and 'prefix' in expected: + # [PREFIX|att] + append(seq, val, '_PREFIX', token=token) + return attname2 + elif 'type_selector' in expected: + # PREFIX|* + append(seq, val, '_PREFIX', token=token) + return element_name + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected namespace prefix.', token=token) + return expected + + def _pseudo(expected, seq, token, tokenizer=None): + # pseudo-class or pseudo-element :a ::a :a( ::a( + """ + /* '::' starts a pseudo-element, ':' a pseudo-class */ + /* Exceptions: :first-line, :first-letter, :before and :after. */ + /* Note that pseudo-elements are restricted to one per selector and */ + /* occur only in the last simple_selector_sequence. */ + """ + context = new['context'][-1] + val, typ = self._tokenvalue(token, normalize=True), self._type(token) + if 'pseudo' in expected: + if val in (':first-line', ':first-letter', ':before', ':after'): + # always pseudo-element ??? + typ = 'pseudo-element' + append(seq, val, typ, token=token) + + if val.endswith(u'('): + # function + new['context'].append(typ) # "pseudo-" "class" or "element" + return expressionstart + elif 'negation' == context: + return negationend + elif 'pseudo-element' == typ: + # only one per element, check at ) also! + return combinator + else: + return simple_selector_sequence2 + combinator + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected start of pseudo.', token=token) + return expected + + def _expression(expected, seq, token, tokenizer=None): + # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ + context = new['context'][-1] + val, typ = self._tokenvalue(token), self._type(token) + if context.startswith('pseudo-'): + append(seq, val, typ, token=token) + return expression + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected %s.' % typ, token=token) + return expected + + def _attcombinator(expected, seq, token, tokenizer=None): + # context: attrib + # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | + # DASHMATCH + context = new['context'][-1] + val, typ = self._tokenvalue(token), self._type(token) + if 'attrib' == context and 'combinator' in expected: + # combinator in attrib + append(seq, val, typ.lower(), token=token) + return attvalue + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected %s.' % typ, token=token) + return expected + + def _string(expected, seq, token, tokenizer=None): + # identifier + context = new['context'][-1] + typ, val = self._type(token), self._stringtokenvalue(token) + + # context: attrib + if 'attrib' == context and 'value' in expected: + # attrib: [...=VALUE] + append(seq, val, typ, token=token) + return attend + + # context: pseudo + elif context.startswith('pseudo-'): + # :func(...) + append(seq, val, typ, token=token) + return expression + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected STRING.', token=token) + return expected + + def _ident(expected, seq, token, tokenizer=None): + # identifier + context = new['context'][-1] + val, typ = self._tokenvalue(token), self._type(token) + + # context: attrib + if 'attrib' == context and 'attribute' in expected: + # attrib: [...|ATT...] + append(seq, val, 'attribute-selector', token=token) + return attcombinator + + elif 'attrib' == context and 'value' in expected: + # attrib: [...=VALUE] + append(seq, val, 'attribute-value', token=token) + return attend + + # context: negation + elif 'negation' == context: + # negation: (prefix|IDENT) + append(seq, val, 'negation-type-selector', token=token) + return negationend + + # context: pseudo + elif context.startswith('pseudo-'): + # :func(...) + append(seq, val, typ, token=token) + return expression + + elif 'type_selector' in expected or element_name == expected: + # element name after ns or complete type_selector + append(seq, val, 'type-selector', token=token) + return simple_selector_sequence2 + combinator + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected IDENT.', + token=token) + return expected + + def _class(expected, seq, token, tokenizer=None): + # .IDENT + context = new['context'][-1] + val = self._tokenvalue(token) + if 'class' in expected: + append(seq, val, 'class', token=token) + + if 'negation' == context: + return negationend + else: + return simple_selector_sequence2 + combinator + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected class.', token=token) + return expected + + def _hash(expected, seq, token, tokenizer=None): + # #IDENT + context = new['context'][-1] + val = self._tokenvalue(token) + if 'HASH' in expected: + append(seq, val, 'id', token=token) + + if 'negation' == context: + return negationend + else: + return simple_selector_sequence2 + combinator + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected HASH.', token=token) + return expected + + def _char(expected, seq, token, tokenizer=None): + # + > ~ ) [ ] + - + context = new['context'][-1] + val = self._tokenvalue(token) + + # context: attrib + if u']' == val and 'attrib' == context and ']' in expected: + # end of attrib + append(seq, val, 'attribute-end', token=token) + context = new['context'].pop() # attrib is done + context = new['context'][-1] + if 'negation' == context: + return negationend + else: + return simple_selector_sequence2 + combinator + + elif u'=' == val and 'attrib' == context and 'combinator' in expected: + # combinator in attrib + append(seq, val, 'equals', token=token) + return attvalue + + # context: negation + elif u')' == val and 'negation' == context and u')' in expected: + # not(negation_arg)" + append(seq, val, 'negation-end', token=token) + new['context'].pop() # negation is done + context = new['context'][-1] + return simple_selector_sequence + combinator + + # context: pseudo (at least one expression) + elif val in u'+-' and context.startswith('pseudo-'): + # :func(+ -)" + _names = {'+': 'plus', '-': 'minus'} + if val == u'+' and seq and seq[-1].value == S: + seq.replace(-1, val, _names[val]) + else: + append(seq, val, _names[val], + token=token) + return expression + + elif u')' == val and context.startswith('pseudo-') and\ + expression == expected: + # :func(expression)" + append(seq, val, 'function-end', token=token) + new['context'].pop() # pseudo is done + if 'pseudo-element' == context: + return combinator + else: + return simple_selector_sequence + combinator + + # context: ROOT + elif u'[' == val and 'attrib' in expected: + # start of [attrib] + append(seq, val, 'attribute-start', token=token) + new['context'].append('attrib') + return attname + + elif val in u'+>~' and 'combinator' in expected: + # no other combinator except S may be following + _names = { + '>': 'child', + '+': 'adjacent-sibling', + '~': 'following-sibling'} + if seq and seq[-1].value == S: + seq.replace(-1, val, _names[val]) + else: + append(seq, val, _names[val], token=token) + return simple_selector_sequence + + elif u',' == val: + # not a selectorlist + new['wellformed'] = False + self._log.error( + u'Selector: Single selector only.', + error=xml.dom.InvalidModificationErr, + token=token) + return expected + + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected CHAR.', token=token) + return expected + + def _negation(expected, seq, token, tokenizer=None): + # not( + context = new['context'][-1] + val = self._tokenvalue(token, normalize=True) + if 'negation' in expected: + new['context'].append('negation') + append(seq, val, 'negation-start', token=token) + return negation_arg + else: + new['wellformed'] = False + self._log.error( + u'Selector: Unexpected negation.', token=token) + return expected + + # expected: only|not or mediatype, mediatype, feature, and + newseq = self._tempSeq() + + wellformed, expected = self._parse(expected=simple_selector_sequence, + seq=newseq, tokenizer=tokenizer, + productions={'CHAR': _char, + 'class': _class, + 'HASH': _hash, + 'STRING': _string, + 'IDENT': _ident, + 'namespace_prefix': _namespace_prefix, + 'negation': _negation, + 'pseudo-class': _pseudo, + 'pseudo-element': _pseudo, + 'universal': _universal, + # pseudo + 'NUMBER': _expression, + 'DIMENSION': _expression, + # attribute + 'PREFIXMATCH': _attcombinator, + 'SUFFIXMATCH': _attcombinator, + 'SUBSTRINGMATCH': _attcombinator, + 'DASHMATCH': _attcombinator, + 'INCLUDES': _attcombinator, + + 'S': _S, + 'COMMENT': _COMMENT}) + wellformed = wellformed and new['wellformed'] + + # post condition + if len(new['context']) > 1 or not newseq: + wellformed = False + self._log.error(u'Selector: Invalid or incomplete selector: %s' % + self._valuestr(selectorText)) + + if expected == 'element_name': + wellformed = False + self._log.error(u'Selector: No element name found: %s' % + self._valuestr(selectorText)) + + if expected == simple_selector_sequence and newseq: + wellformed = False + self._log.error(u'Selector: Cannot end with combinator: %s' % + self._valuestr(selectorText)) + + if newseq and hasattr(newseq[-1].value, 'strip') and \ + newseq[-1].value.strip() == u'': + del newseq[-1] + + # set + if wellformed: + self.__namespaces = namespaces + self._element = new['element'] + self._specificity = tuple(new['specificity']) + self._setSeq(newseq) + # filter that only used ones are kept + self.__namespaces = self._getUsedNamespaces() + + selectorText = property(_getSelectorText, _setSelectorText, + doc="(DOM) The parsable textual representation of the selector.") + + + specificity = property(lambda self: self._specificity, + doc="Specificity of this selector (READONLY).") + + wellformed = property(lambda self: bool(len(self.seq))) + + def __repr__(self): + if self.__getNamespaces(): + st = (self.selectorText, self._getUsedNamespaces()) + else: + st = self.selectorText + return u"cssutils.css.%s(selectorText=%r)" % ( + self.__class__.__name__, st) + + def __str__(self): + return u"" % ( + self.__class__.__name__, self.selectorText, self.specificity, + self._getUsedNamespaces(), id(self)) + + def _getUsedUris(self): + "returns list of actually used URIs in this Selector" + uris = set() + for item in self.seq: + type_, val = item.type, item.value + if type_.endswith(u'-selector') or type_ == u'universal' and \ + type(val) == tuple and val[0] not in (None, u'*'): + uris.add(val[0]) + return uris + + def _getUsedNamespaces(self): + "returns actually used namespaces only" + useduris = self._getUsedUris() + namespaces = _SimpleNamespaces(log=self._log) + for p, uri in self._namespaces.items(): + if uri in useduris: + namespaces[p] = uri + return namespaces diff --git a/src/calibre/utils/cssutils/css/selectorlist.py b/src/calibre/utils/cssutils/css/selectorlist.py new file mode 100644 index 0000000000..22c75395c8 --- /dev/null +++ b/src/calibre/utils/cssutils/css/selectorlist.py @@ -0,0 +1,249 @@ +"""SelectorList is a list of CSS Selector objects. + +TODO + - remove duplicate Selectors. -> CSSOM canonicalize + + - ??? CSS2 gives a special meaning to the comma (,) in selectors. + However, since it is not known if the comma may acquire other + meanings in future versions of CSS, the whole statement should be + ignored if there is an error anywhere in the selector, even though + the rest of the selector may look reasonable in CSS2. + + Illegal example(s): + + For example, since the "&" is not a valid token in a CSS2 selector, + a CSS2 user agent must ignore the whole second line, and not set + the color of H3 to red: +""" +__all__ = ['SelectorList'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: selectorlist.py 1174 2008-03-20 17:43:07Z cthedot $' + +import xml.dom +import cssutils +from selector import Selector + +class SelectorList(cssutils.util.Base, cssutils.util.ListSeq): + """ + (cssutils) a list of Selectors of a CSSStyleRule + + Properties + ========== + length: of type unsigned long, readonly + The number of Selector elements in the list. + parentRule: of type CSSRule, readonly + The CSS rule that contains this selector list or None if this + list is not attached to a CSSRule. + selectorText: of type DOMString + The textual representation of the selector for the rule set. The + implementation may have stripped out insignificant whitespace while + parsing the selector. + seq: (internal use!) + A list of Selector objects + wellformed + if this selectorlist is wellformed regarding the Selector spec + """ + def __init__(self, selectorText=None, parentRule=None, + readonly=False): + """ + initializes SelectorList with optional selectorText + + :Parameters: + selectorText + parsable list of Selectors + parentRule + the parent CSSRule if available + """ + super(SelectorList, self).__init__() + + self._parentRule = parentRule + + if selectorText: + self.selectorText = selectorText + + self._readonly = readonly + + def __prepareset(self, newSelector, namespaces=None): + "used by appendSelector and __setitem__" + if not namespaces: + namespaces = {} + self._checkReadonly() + if not isinstance(newSelector, Selector): + newSelector = Selector((newSelector, namespaces), + parentList=self) + if newSelector.wellformed: + newSelector._parent = self # maybe set twice but must be! + return newSelector + + def __setitem__(self, index, newSelector): + """ + overwrites ListSeq.__setitem__ + + Any duplicate Selectors are **not** removed. + """ + newSelector = self.__prepareset(newSelector) + if newSelector: + self.seq[index] = newSelector + + def append(self, newSelector): + "same as appendSelector(newSelector)" + self.appendSelector(newSelector) + + length = property(lambda self: len(self), + doc="The number of Selector elements in the list.") + + + def __getNamespaces(self): + "uses children namespaces if not attached to a sheet, else the sheet's ones" + try: + return self.parentRule.parentStyleSheet.namespaces + except AttributeError: + namespaces = {} + for selector in self.seq: + namespaces.update(selector._namespaces) + return namespaces + + _namespaces = property(__getNamespaces, doc="""if this SelectorList is + attached to a CSSStyleSheet the namespaces of that sheet are mirrored + here. While the SelectorList (or parentRule(s) are + not attached the namespaces of all children Selectors are used.""") + + parentRule = property(lambda self: self._parentRule, + doc="(DOM) The CSS rule that contains this SelectorList or\ + None if this SelectorList is not attached to a CSSRule.") + + def _getSelectorText(self): + "returns serialized format" + return cssutils.ser.do_css_SelectorList(self) + + def _setSelectorText(self, selectorText): + """ + :param selectorText: + comma-separated list of selectors or a tuple of + (selectorText, dict-of-namespaces) + :Exceptions: + - `NAMESPACE_ERR`: (Selector) + Raised if the specified selector uses an unknown namespace + prefix. + - `SYNTAX_ERR`: (self) + Raised if the specified CSS string value has a syntax error + and is unparsable. + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if this rule is readonly. + """ + self._checkReadonly() + + # might be (selectorText, namespaces) + selectorText, namespaces = self._splitNamespacesOff(selectorText) + try: + # use parent's only if available + namespaces = self.parentRule.parentStyleSheet.namespaces + except AttributeError: + pass + + wellformed = True + tokenizer = self._tokenize2(selectorText) + newseq = [] + + expected = True + while True: + # find all upto and including next ",", EOF or nothing + selectortokens = self._tokensupto2(tokenizer, listseponly=True) + if selectortokens: + if self._tokenvalue(selectortokens[-1]) == ',': + expected = selectortokens.pop() + else: + expected = None + + selector = Selector((selectortokens, namespaces), + parentList=self) + if selector.wellformed: + newseq.append(selector) + else: + wellformed = False + self._log.error(u'SelectorList: Invalid Selector: %s' % + self._valuestr(selectortokens)) + else: + break + + # post condition + if u',' == expected: + wellformed = False + self._log.error(u'SelectorList: Cannot end with ",": %r' % + self._valuestr(selectorText)) + elif expected: + wellformed = False + self._log.error(u'SelectorList: Unknown Syntax: %r' % + self._valuestr(selectorText)) + if wellformed: + self.seq = newseq +# for selector in newseq: +# self.appendSelector(selector) + + selectorText = property(_getSelectorText, _setSelectorText, + doc="""(cssutils) The textual representation of the selector for + a rule set.""") + + wellformed = property(lambda self: bool(len(self.seq))) + + def appendSelector(self, newSelector): + """ + Append newSelector (a string will be converted to a new + Selector). + + :param newSelector: + comma-separated list of selectors or a tuple of + (selectorText, dict-of-namespaces) + :returns: New Selector or None if newSelector is not wellformed. + :Exceptions: + - `NAMESPACE_ERR`: (self) + Raised if the specified selector uses an unknown namespace + prefix. + - `SYNTAX_ERR`: (self) + Raised if the specified CSS string value has a syntax error + and is unparsable. + - `NO_MODIFICATION_ALLOWED_ERR`: (self) + Raised if this rule is readonly. + """ + self._checkReadonly() + + # might be (selectorText, namespaces) + newSelector, namespaces = self._splitNamespacesOff(newSelector) + try: + # use parent's only if available + namespaces = self.parentRule.parentStyleSheet.namespaces + except AttributeError: + # use already present namespaces plus new given ones + _namespaces = self._namespaces + _namespaces.update(namespaces) + namespaces = _namespaces + + newSelector = self.__prepareset(newSelector, namespaces) + if newSelector: + seq = self.seq[:] + del self.seq[:] + for s in seq: + if s.selectorText != newSelector.selectorText: + self.seq.append(s) + self.seq.append(newSelector) + return newSelector + + def __repr__(self): + if self._namespaces: + st = (self.selectorText, self._namespaces) + else: + st = self.selectorText + return "cssutils.css.%s(selectorText=%r)" % ( + self.__class__.__name__, st) + + def __str__(self): + return "" % ( + self.__class__.__name__, self.selectorText, self._namespaces, + id(self)) + + def _getUsedUris(self): + "used by CSSStyleSheet to check if @namespace rules are needed" + uris = set() + for s in self: + uris.update(s._getUsedUris()) + return uris diff --git a/src/calibre/utils/cssutils/css2productions.py b/src/calibre/utils/cssutils/css2productions.py new file mode 100644 index 0000000000..a836df3f93 --- /dev/null +++ b/src/calibre/utils/cssutils/css2productions.py @@ -0,0 +1,131 @@ +"""productions for CSS 2.1 + +CSS2_1_MACROS and CSS2_1_PRODUCTIONS are from both +http://www.w3.org/TR/CSS21/grammar.html and +http://www.w3.org/TR/css3-syntax/#grammar0 + + +""" +__all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: css2productions.py 1394 2008-07-27 13:29:22Z cthedot $' + +# option case-insensitive +MACROS = { + 'h': r'[0-9a-f]', + #'nonascii': r'[\200-\377]', + 'nonascii': r'[^\0-\177]', # CSS3 + 'unicode': r'\\{h}{1,6}(\r\n|[ \t\r\n\f])?', + + 'escape': r'{unicode}|\\[^\r\n\f0-9a-f]', + 'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}', + 'nmchar': r'[_a-zA-Z0-9-]|{nonascii}|{escape}', + 'string1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*\"', + 'string2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*\'", + 'invalid1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*', + 'invalid2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*", + 'comment': r'\/\*[^*]*\*+([^/*][^*]*\*+)*\/', + # CSS list 080725 19:43 + # \/\*([^*\\]|{escape})*\*+(([^/*\\]|{escape})[^*]*\*+)*\/ + + 'ident': r'[-]?{nmstart}{nmchar}*', + 'name': r'{nmchar}+', + # CHANGED TO SPEC: added "-?" + 'num': r'-?[0-9]*\.[0-9]+|[0-9]+', + 'string': r'{string1}|{string2}', + 'invalid': r'{invalid1}|{invalid2}', + 'url': r'([!#$%&*-~]|{nonascii}|{escape})*', + 's': r'[ \t\r\n\f]+', + 'w': r'{s}?', + 'nl': r'\n|\r\n|\r|\f', + 'range': r'\?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))', + + 'A': r'a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?', + 'C': r'c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?', + 'D': r'd|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?', + 'E': r'e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?', + 'F': r'f|\\0{0,4}(46|66)(\r\n|[ \t\r\n\f])?', + 'G': r'g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g', + 'H': r'h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h', + 'I': r'i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i', + 'K': r'k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k', + 'M': r'm|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m', + 'N': r'n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n', + 'O': r'o|\\0{0,4}(51|71)(\r\n|[ \t\r\n\f])?|\\o', + 'P': r'p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p', + 'R': r'r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r', + 'S': r's|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s', + 'T': r't|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t', + 'X': r'x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x', + 'Z': r'z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z', + } + +PRODUCTIONS = [ + ('URI', r'url\({w}{string}{w}\)'), #"url("{w}{string}{w}")" {return URI;} + ('URI', r'url\({w}{url}{w}\)'), #"url("{w}{url}{w}")" {return URI;} + ('FUNCTION', r'{ident}\('), #{ident}"(" {return FUNCTION;} + + ('IMPORT_SYM', r'@{I}{M}{P}{O}{R}{T}'), #"@import" {return IMPORT_SYM;} + ('PAGE_SYM', r'@{P}{A}{G}{E}'), #"@page" {return PAGE_SYM;} + ('MEDIA_SYM', r'@{M}{E}{D}{I}{A}'), #"@media" {return MEDIA_SYM;} + ('FONT_FACE_SYM', r'@{F}{O}{N}{T}\-{F}{A}{C}{E}'), #"@font-face" {return FONT_FACE_SYM;} + + # CHANGED TO SPEC: only @charset + ('CHARSET_SYM', r'@charset '), #"@charset " {return CHARSET_SYM;} + + ('NAMESPACE_SYM', r'@{N}{A}{M}{E}{S}{P}{A}{C}{E}'), #"@namespace" {return NAMESPACE_SYM;} + + # CHANGED TO SPEC: ATKEYWORD + ('ATKEYWORD', r'\@{ident}'), + + ('IDENT', r'{ident}'), #{ident} {return IDENT;} + ('STRING', r'{string}'), #{string} {return STRING;} + ('INVALID', r'{invalid}'), # {return INVALID; /* unclosed string */} + ('HASH', r'\#{name}'), #"#"{name} {return HASH;} + ('PERCENTAGE', r'{num}%'), #{num}% {return PERCENTAGE;} + ('LENGTH', r'{num}{E}{M}'), #{num}em {return EMS;} + ('LENGTH', r'{num}{E}{X}'), #{num}ex {return EXS;} + ('LENGTH', r'{num}{P}{X}'), #{num}px {return LENGTH;} + ('LENGTH', r'{num}{C}{M}'), #{num}cm {return LENGTH;} + ('LENGTH', r'{num}{M}{M}'), #{num}mm {return LENGTH;} + ('LENGTH', r'{num}{I}{N}'), #{num}in {return LENGTH;} + ('LENGTH', r'{num}{P}{T}'), #{num}pt {return LENGTH;} + ('LENGTH', r'{num}{P}{C}'), #{num}pc {return LENGTH;} + ('ANGLE', r'{num}{D}{E}{G}'), #{num}deg {return ANGLE;} + ('ANGLE', r'{num}{R}{A}{D}'), #{num}rad {return ANGLE;} + ('ANGLE', r'{num}{G}{R}{A}{D}'), #{num}grad {return ANGLE;} + ('TIME', r'{num}{M}{S}'), #{num}ms {return TIME;} + ('TIME', r'{num}{S}'), #{num}s {return TIME;} + ('FREQ', r'{num}{H}{Z}'), #{num}Hz {return FREQ;} + ('FREQ', r'{num}{K}{H}{Z}'), #{num}kHz {return FREQ;} + ('DIMEN', r'{num}{ident}'), #{num}{ident} {return DIMEN;} + ('NUMBER', r'{num}'), #{num} {return NUMBER;} + #('UNICODERANGE', r'U\+{range}'), #U\+{range} {return UNICODERANGE;} + #('UNICODERANGE', r'U\+{h}{1,6}-{h}{1,6}'), #U\+{h}{1,6}-{h}{1,6} {return UNICODERANGE;} + # --- CSS3 --- + ('UNICODE-RANGE', r'[0-9A-F?]{1,6}(\-[0-9A-F]{1,6})?'), + ('CDO', r'\<\!\-\-'), #"" {return CDC;} + ('S', r'{s}'),# {return S;} + + # \/\*[^*]*\*+([^/*][^*]*\*+)*\/ /* ignore comments */ + # {s}+\/\*[^*]*\*+([^/*][^*]*\*+)*\/ {unput(' '); /*replace by space*/} + + ('INCLUDES', r'\~\='), #"~=" {return INCLUDES;} + ('DASHMATCH', r'\|\='), #"|=" {return DASHMATCH;} + ('LBRACE', r'\{'), #{w}"{" {return LBRACE;} + ('PLUS', r'\+'), #{w}"+" {return PLUS;} + ('GREATER', r'\>'), #{w}">" {return GREATER;} + ('COMMA', r'\,'), #{w}"," {return COMMA;} + ('IMPORTANT_SYM', r'\!({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T}'), #"!{w}important" {return IMPORTANT_SYM;} + ('COMMENT', '\/\*[^*]*\*+([^/][^*]*\*+)*\/'), # /* ignore comments */ + ('CLASS', r'\.'), #. {return *yytext;} + + # --- CSS3! --- + ('CHAR', r'[^"\']'), + ] + +class CSSProductions(object): + pass +for i, t in enumerate(PRODUCTIONS): + setattr(CSSProductions, t[0].replace('-', '_'), t[0]) \ No newline at end of file diff --git a/src/calibre/utils/cssutils/css3productions.py b/src/calibre/utils/cssutils/css3productions.py new file mode 100644 index 0000000000..bfd02fa8ac --- /dev/null +++ b/src/calibre/utils/cssutils/css3productions.py @@ -0,0 +1,62 @@ +"""productions for CSS 3 + +CSS3_MACROS and CSS3_PRODUCTIONS are from http://www.w3.org/TR/css3-syntax +""" +__all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: css3productions.py 1116 2008-03-05 13:52:23Z cthedot $' + +# a complete list of css3 macros +MACROS = { + 'ident': r'[-]?{nmstart}{nmchar}*', + 'name': r'{nmchar}+', + 'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}', + 'nonascii': r'[^\0-\177]', + 'unicode': r'\\[0-9a-f]{1,6}{wc}?', + 'escape': r'{unicode}|\\[ -~\200-\777]', + # 'escape': r'{unicode}|\\[ -~\200-\4177777]', + 'nmchar': r'[-_a-zA-Z0-9]|{nonascii}|{escape}', + + # CHANGED TO SPEC: added "-?" + 'num': r'-?[0-9]*\.[0-9]+|[0-9]+', #r'[-]?\d+|[-]?\d*\.\d+', + 'string': r'''\'({stringchar}|\")*\'|\"({stringchar}|\')*\"''', + 'stringchar': r'{urlchar}| |\\{nl}', + 'urlchar': r'[\x09\x21\x23-\x26\x27-\x7E]|{nonascii}|{escape}', + # what if \r\n, \n matches first? + 'nl': r'\n|\r\n|\r|\f', + 'w': r'{wc}*', + 'wc': r'\t|\r|\n|\f|\x20' + } + +# The following productions are the complete list of tokens in CSS3, the productions are **ordered**: +PRODUCTIONS = [ + ('BOM', r'\xFEFF'), + ('URI', r'url\({w}({string}|{urlchar}*){w}\)'), + ('FUNCTION', r'{ident}\('), + ('ATKEYWORD', r'\@{ident}'), + ('IDENT', r'{ident}'), + ('STRING', r'{string}'), + ('HASH', r'\#{name}'), + ('PERCENTAGE', r'{num}\%'), + ('DIMENSION', r'{num}{ident}'), + ('NUMBER', r'{num}'), + #??? + ('UNICODE-RANGE', ur'[0-9A-F?]{1,6}(\-[0-9A-F]{1,6})?'), + ('CDO', r'\<\!\-\-'), + ('CDC', r'\-\-\>'), + ('S', r'{wc}+'), + ('INCLUDES', '\~\='), + ('DASHMATCH', r'\|\='), + ('PREFIXMATCH', r'\^\='), + ('SUFFIXMATCH', r'\$\='), + ('SUBSTRINGMATCH', r'\*\='), + ('COMMENT', r'\/\*[^*]*\*+([^/][^*]*\*+)*\/'), + ('CHAR', r'[^"\']'), + ] + +class CSSProductions(object): + "has attributes for all PRODUCTIONS" + pass + +for i, t in enumerate(PRODUCTIONS): + setattr(CSSProductions, t[0].replace('-', '_'), t[0]) diff --git a/src/calibre/utils/cssutils/cssproductions.py b/src/calibre/utils/cssutils/cssproductions.py new file mode 100644 index 0000000000..63f856442b --- /dev/null +++ b/src/calibre/utils/cssutils/cssproductions.py @@ -0,0 +1,121 @@ +"""productions for cssutils based on a mix of CSS 2.1 and CSS 3 Syntax +productions + +- http://www.w3.org/TR/css3-syntax +- http://www.w3.org/TR/css3-syntax/#grammar0 + +open issues + - numbers contain "-" if present + - HASH: #aaa is, #000 is not anymore, + CSS2.1: 'nmchar': r'[_a-z0-9-]|{nonascii}|{escape}', + CSS3: 'nmchar': r'[_a-z-]|{nonascii}|{escape}', +""" +__all__ = ['CSSProductions', 'MACROS', 'PRODUCTIONS'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: cssproductions.py 1378 2008-07-15 20:02:19Z cthedot $' + +# a complete list of css3 macros +MACROS = { + 'ident': r'[-]?{nmstart}{nmchar}*', + 'name': r'{nmchar}+', + 'nmstart': r'[_a-zA-Z]|{nonascii}|{escape}', + 'nonascii': r'[^\0-\177]', + 'unicode': r'\\[0-9a-f]{1,6}(?:{nl}|{wc})?', + 'escape': r'{unicode}|\\[ -~\200-\777]', + # 'escape': r'{unicode}|\\[ -~\200-\4177777]', + 'nmchar': r'[-_a-zA-Z0-9]|{nonascii}|{escape}', + + 'num': r'[0-9]*\.[0-9]+|[0-9]+', #r'[-]?\d+|[-]?\d*\.\d+', + 'string': r"""\'({stringesc1}|{stringchar}|")*\'""" + "|" + '''\"({stringesc2}|{stringchar}|')*\"''', + # seems an error in CSS 3 but is allowed in CSS 2.1 + 'stringesc1' : r"\\'", + 'stringesc2' : r'\\"', + + 'stringchar': r'{urlchar}| |\\{nl}', + + # urlchar ::= [#x9#x21#x23-#x26#x27-#x7E] | nonascii | escape + # 0x27 is "'" which should not be in here..., should ) be in here??? + 'urlchar': r'[\x09\x21\x23-\x26\x28-\x7E]|{nonascii}|{escape}', + + # from CSS2.1 + 'invalid': r'{invalid1}|{invalid2}', + 'invalid1': r'\"([^\n\r\f\\"]|\\{nl}|{escape})*', + 'invalid2': r"\'([^\n\r\f\\']|\\{nl}|{escape})*", + + # \r\n should be counted as one char see unicode above + 'nl': r'\n|\r\n|\r|\f', + 'w': r'{wc}*', + 'wc': r'\t|\r|\n|\f|\x20', + + 'comment': r'\/\*[^*]*\*+([^/][^*]*\*+)*\/', + + 'A': r'A|a|\\0{0,4}(?:41|61)(?:\r\n|[ \t\r\n\f])?', + 'C': r'C|c|\\0{0,4}(?:43|63)(?:\r\n|[ \t\r\n\f])?', + 'D': r'D|d|\\0{0,4}(?:44|64)(?:\r\n|[ \t\r\n\f])?', + 'E': r'E|e|\\0{0,4}(?:45|65)(?:\r\n|[ \t\r\n\f])?', + 'F': r'F|f|\\0{0,4}(?:46|66)(?:\r\n|[ \t\r\n\f])?', + 'G': r'G|g|\\0{0,4}(?:47|67)(?:\r\n|[ \t\r\n\f])?|\\G|\\g', + 'H': r'H|h|\\0{0,4}(?:48|68)(?:\r\n|[ \t\r\n\f])?|\\H|\\h', + 'I': r'I|i|\\0{0,4}(?:49|69)(?:\r\n|[ \t\r\n\f])?|\\I|\\i', + 'K': r'K|k|\\0{0,4}(?:4b|6b)(?:\r\n|[ \t\r\n\f])?|\\K|\\k', + 'L': r'L|l|\\0{0,4}(?:4c|6c)(?:\r\n|[ \t\r\n\f])?|\\L|\\l', + 'M': r'M|m|\\0{0,4}(?:4d|6d)(?:\r\n|[ \t\r\n\f])?|\\M|\\m', + 'N': r'N|n|\\0{0,4}(?:4e|6e)(?:\r\n|[ \t\r\n\f])?|\\N|\\n', + 'O': r'O|o|\\0{0,4}(?:4f|6f)(?:\r\n|[ \t\r\n\f])?|\\O|\\o', + 'P': r'P|p|\\0{0,4}(?:50|70)(?:\r\n|[ \t\r\n\f])?|\\P|\\p', + 'R': r'R|r|\\0{0,4}(?:52|72)(?:\r\n|[ \t\r\n\f])?|\\R|\\r', + 'S': r'S|s|\\0{0,4}(?:53|73)(?:\r\n|[ \t\r\n\f])?|\\S|\\s', + 'T': r'T|t|\\0{0,4}(?:54|74)(?:\r\n|[ \t\r\n\f])?|\\T|\\t', + 'U': r'U|u|\\0{0,4}(?:55|75)(?:\r\n|[ \t\r\n\f])?|\\U|\\u', + 'X': r'X|x|\\0{0,4}(?:58|78)(?:\r\n|[ \t\r\n\f])?|\\X|\\x', + 'Z': r'Z|z|\\0{0,4}(?:5a|7a)(?:\r\n|[ \t\r\n\f])?|\\Z|\\z', + } + +# The following productions are the complete list of tokens +# used by cssutils, a mix of CSS3 and some CSS2.1 productions. +# The productions are **ordered**: +PRODUCTIONS = [ + ('BOM', r'\xFEFF'), # will only be checked at beginning of CSS + + ('S', r'{wc}+'), # 1st in list of general productions + ('URI', r'{U}{R}{L}\({w}({string}|{urlchar}*){w}\)'), + ('FUNCTION', r'{ident}\('), + ('IDENT', r'{ident}'), + ('STRING', r'{string}'), + ('INVALID', r'{invalid}'), # from CSS2.1 + ('HASH', r'\#{name}'), + ('PERCENTAGE', r'{num}\%'), + ('DIMENSION', r'{num}{ident}'), + ('NUMBER', r'{num}'), + # valid ony at start so not checked everytime + #('CHARSET_SYM', r'@charset '), # from Errata includes ending space! + ('ATKEYWORD', r'@{ident}'), # other keywords are done in the tokenizer + #('UNICODE-RANGE', r'[0-9A-F?]{1,6}(\-[0-9A-F]{1,6})?'), #??? + ('CDO', r'\<\!\-\-'), + ('CDC', r'\-\-\>'), + ('INCLUDES', '\~\='), + ('DASHMATCH', r'\|\='), + ('PREFIXMATCH', r'\^\='), + ('SUFFIXMATCH', r'\$\='), + ('SUBSTRINGMATCH', r'\*\='), + # checked specially if fullsheet is parsed + ('COMMENT', r'{comment}'), #r'\/\*[^*]*\*+([^/][^*]*\*+)*\/'), + ('CHAR', r'[^"\']') # MUST always be last + ] + +class CSSProductions(object): + """ + most attributes are set later + """ + EOF = True + # removed from productions as they simply are ATKEYWORD until + # tokenizing + CHARSET_SYM = 'CHARSET_SYM' + FONT_FACE_SYM = 'FONT_FACE_SYM' + MEDIA_SYM = 'MEDIA_SYM' + IMPORT_SYM = 'IMPORT_SYM' + NAMESPACE_SYM = 'NAMESPACE_SYM' + PAGE_SYM = 'PAGE_SYM' + +for i, t in enumerate(PRODUCTIONS): + setattr(CSSProductions, t[0].replace('-', '_'), t[0]) diff --git a/src/calibre/utils/cssutils/errorhandler.py b/src/calibre/utils/cssutils/errorhandler.py new file mode 100644 index 0000000000..c12d0ab432 --- /dev/null +++ b/src/calibre/utils/cssutils/errorhandler.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +"""cssutils ErrorHandler + +ErrorHandler + used as log with usual levels (debug, info, warn, error) + + if instanciated with ``raiseExceptions=True`` raises exeptions instead + of logging + +log + defaults to instance of ErrorHandler for any kind of log message from + lexerm, parser etc. + + - raiseExceptions = [False, True] + - setloglevel(loglevel) +""" +__all__ = ['ErrorHandler'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: errorhandler.py 1361 2008-07-13 18:12:40Z cthedot $' + +import logging +import urllib2 +import xml.dom +from helper import Deprecated + +class _ErrorHandler(object): + """ + handles all errors and log messages + """ + def __init__(self, log, defaultloglevel=logging.INFO, + raiseExceptions=True): + """ + inits log if none given + + log + for parse messages, default logs to sys.stderr + defaultloglevel + if none give this is logging.DEBUG + raiseExceptions + - True: Errors will be raised e.g. during building + - False: Errors will be written to the log, this is the + default behaviour when parsing + """ + if log: + self._log = log + else: + import sys + self._log = logging.getLogger('CSSUTILS') + hdlr = logging.StreamHandler(sys.stderr) + formatter = logging.Formatter('%(levelname)s\t%(message)s') + hdlr.setFormatter(formatter) + self._log.addHandler(hdlr) + self._log.setLevel(defaultloglevel) + + self.raiseExceptions = raiseExceptions + + def __getattr__(self, name): + "use self._log items" + calls = ('debug', 'info', 'warn', 'error', 'critical', 'fatal') + other = ('setLevel', 'getEffectiveLevel', 'addHandler', 'removeHandler') + + if name in calls: + self._logcall = getattr(self._log, name) + return self.__handle + elif name in other: + return getattr(self._log, name) + else: + raise AttributeError( + '(errorhandler) No Attribute %r found' % name) + + def __handle(self, msg=u'', token=None, error=xml.dom.SyntaxErr, + neverraise=False, args=None): + """ + handles all calls + logs or raises exception + """ + if token: + if isinstance(token, tuple): + msg = u'%s [%s:%s: %s]' % ( + msg, token[2], token[3], token[1]) + else: + msg = u'%s [%s:%s: %s]' % ( + msg, token.line, token.col, token.value) + + if error and self.raiseExceptions and not neverraise: + if isinstance(error, urllib2.HTTPError) or isinstance(error, urllib2.URLError): + raise error + else: + raise error(msg) + else: + self._logcall(msg) + + def setLog(self, log): + """set log of errorhandler's log""" + self._log = log + + @Deprecated('Use setLog() instead.') + def setlog(self, log): + self.setLog(log) + + @Deprecated('Use setLevel() instead.') + def setloglevel(self, level): + self.setLevel(level) + + +class ErrorHandler(_ErrorHandler): + "Singleton, see _ErrorHandler" + instance = None + + def __init__(self, + log=None, defaultloglevel=logging.INFO, raiseExceptions=True): + + if ErrorHandler.instance is None: + ErrorHandler.instance = _ErrorHandler(log=log, + defaultloglevel=defaultloglevel, + raiseExceptions=raiseExceptions) + self.__dict__ = ErrorHandler.instance.__dict__ diff --git a/src/calibre/utils/cssutils/helper.py b/src/calibre/utils/cssutils/helper.py new file mode 100644 index 0000000000..9dafd9437c --- /dev/null +++ b/src/calibre/utils/cssutils/helper.py @@ -0,0 +1,51 @@ +"""cssutils helper +""" +__all__ = ['Deprecated', 'normalize'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: errorhandler.py 1234 2008-05-22 20:26:12Z cthedot $' + +import re + +class Deprecated(object): + """This is a decorator which can be used to mark functions + as deprecated. It will result in a warning being emitted + when the function is used. + + It accepts a single paramter ``msg`` which is shown with the warning. + It should contain information which function or method to use instead. + """ + def __init__(self, msg): + self.msg = msg + + def __call__(self, func): + def newFunc(*args, **kwargs): + import warnings + warnings.warn("Call to deprecated method %r. %s" % + (func.__name__, self.msg), + category=DeprecationWarning, + stacklevel=2) + return func(*args, **kwargs) + newFunc.__name__ = func.__name__ + newFunc.__doc__ = func.__doc__ + newFunc.__dict__.update(func.__dict__) + return newFunc + +# simple escapes, all non unicodes +_simpleescapes = re.compile(ur'(\\[^0-9a-fA-F])').sub + +def normalize(x): + """ + normalizes x, namely: + + - remove any \ before non unicode sequences (0-9a-zA-Z) so for + x=="c\olor\" return "color" (unicode escape sequences should have + been resolved by the tokenizer already) + - lowercase + """ + if x: + def removeescape(matchobj): + return matchobj.group(0)[1:] + x = _simpleescapes(removeescape, x) + return x.lower() + else: + return x \ No newline at end of file diff --git a/src/calibre/utils/cssutils/parse.py b/src/calibre/utils/cssutils/parse.py new file mode 100644 index 0000000000..edb62ef0b3 --- /dev/null +++ b/src/calibre/utils/cssutils/parse.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +"""a validating CSSParser +""" +__all__ = ['CSSParser'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: parse.py 1418 2008-08-09 19:27:50Z cthedot $' + +import codecs +import os +import urllib +from helper import Deprecated +import tokenize2 +import cssutils + +class CSSParser(object): + """ + parses a CSS StyleSheet string or file and + returns a DOM Level 2 CSS StyleSheet object + + Usage:: + + parser = CSSParser() + + # optionally + parser.setFetcher(fetcher) + + sheet = parser.parseFile('test1.css', 'ascii') + + print sheet.cssText + """ + def __init__(self, log=None, loglevel=None, raiseExceptions=None, + fetcher=None): + """ + log + logging object + loglevel + logging loglevel + raiseExceptions + if log should simply log (default) or raise errors during + parsing. Later while working with the resulting sheets + the setting used in cssutils.log.raiseExeptions is used + fetcher + see ``setFetchUrl(fetcher)`` + """ + if log is not None: + cssutils.log.setLog(log) + if loglevel is not None: + cssutils.log.setLevel(loglevel) + + # remember global setting + self.__globalRaising = cssutils.log.raiseExceptions + if raiseExceptions: + self.__parseRaising = raiseExceptions + else: + # DEFAULT during parse + self.__parseRaising = False + + self.__tokenizer = tokenize2.Tokenizer() + self.setFetcher(fetcher) + + def __parseSetting(self, parse): + """during parse exceptions may be handled differently depending on + init parameter ``raiseExceptions`` + """ + if parse: + cssutils.log.raiseExceptions = self.__parseRaising + else: + cssutils.log.raiseExceptions = self.__globalRaising + + def parseString(self, cssText, encoding=None, href=None, media=None, + title=None): + """Return parsed CSSStyleSheet from given string cssText. + Raises errors during retrieving (e.g. UnicodeDecodeError). + + cssText + CSS string to parse + encoding + If ``None`` the encoding will be read from BOM or an @charset + rule or defaults to UTF-8. + If given overrides any found encoding including the ones for + imported sheets. + It also will be used to decode ``cssText`` if given as a (byte) + string. + href + The href attribute to assign to the parsed style sheet. + Used to resolve other urls in the parsed sheet like @import hrefs + media + The media attribute to assign to the parsed style sheet + (may be a MediaList, list or a string) + title + The title attribute to assign to the parsed style sheet + """ + self.__parseSetting(True) + if isinstance(cssText, str): + cssText = codecs.getdecoder('css')(cssText, encoding=encoding)[0] + + sheet = cssutils.css.CSSStyleSheet(href=href, + media=cssutils.stylesheets.MediaList(media), + title=title) + sheet._setFetcher(self.__fetcher) + # tokenizing this ways closes open constructs and adds EOF + sheet._setCssTextWithEncodingOverride(self.__tokenizer.tokenize(cssText, + fullsheet=True), + encodingOverride=encoding) + self.__parseSetting(False) + return sheet + + def parseFile(self, filename, encoding=None, + href=None, media=None, title=None): + """Retrieve and return a CSSStyleSheet from given filename. + Raises errors during retrieving (e.g. IOError). + + filename + of the CSS file to parse, if no ``href`` is given filename is + converted to a (file:) URL and set as ``href`` of resulting + stylesheet. + If href is given it is set as ``sheet.href``. Either way + ``sheet.href`` is used to resolve e.g. stylesheet imports via + @import rules. + encoding + Value ``None`` defaults to encoding detection via BOM or an + @charset rule. + Other values override detected encoding for the sheet at + ``filename`` including any imported sheets. + + for other parameters see ``parseString`` + """ + if not href: + # prepend // for file URL, urllib does not do this? + href = u'file:' + urllib.pathname2url(os.path.abspath(filename)) + + return self.parseString(open(filename, 'rb').read(), + encoding=encoding, # read returns a str + href=href, media=media, title=title) + + def parseUrl(self, href, encoding=None, media=None, title=None): + """Retrieve and return a CSSStyleSheet from given href (an URL). + In case of any errors while reading the URL returns None. + + href + URL of the CSS file to parse, will also be set as ``href`` of + resulting stylesheet + encoding + Value ``None`` defaults to encoding detection via HTTP, BOM or an + @charset rule. + A value overrides detected encoding for the sheet at ``href`` + including any imported sheets. + + for other parameters see ``parseString`` + """ + encoding, enctype, text = cssutils.util._readUrl(href, + overrideEncoding=encoding) + if enctype == 5: + # do not used if defaulting to UTF-8 + encoding = None + + if text is not None: + return self.parseString(text, encoding=encoding, + href=href, media=media, title=title) + + def setFetcher(self, fetcher=None): + """Replace the default URL fetch function with a custom one. + The fetcher function gets a single parameter + + ``url`` + the URL to read + + and returns ``(encoding, content)`` where ``encoding`` is the HTTP + charset normally given via the Content-Type header (which may simply + omit the charset) and ``content`` being the (byte) string content. + The Mimetype should be 'text/css' but this has to be checked by the + fetcher itself (the default fetcher emits a warning if encountering + a different mimetype). + + Calling ``setFetcher`` with ``fetcher=None`` resets cssutils + to use its default function. + """ + self.__fetcher = fetcher + + @Deprecated('Use cssutils.CSSParser().parseFile() instead.') + def parse(self, filename, encoding=None, + href=None, media=None, title=None): + self.parseFile(filename, encoding, href, media, title) diff --git a/src/calibre/utils/cssutils/script.py b/src/calibre/utils/cssutils/script.py new file mode 100644 index 0000000000..b016ead9a0 --- /dev/null +++ b/src/calibre/utils/cssutils/script.py @@ -0,0 +1,371 @@ +"""classes and functions used by cssutils scripts +""" +__all__ = ['CSSCapture', 'csscombine'] +__docformat__ = 'restructuredtext' +__version__ = '$Id: parse.py 1323 2008-07-06 18:13:57Z cthedot $' + +import codecs +import errno +import HTMLParser +import logging +import os +import sys +import urllib2 +import urlparse + +import cssutils +try: + import cssutils.encutils as encutils +except ImportError: + try: + import encutils + except ImportError: + sys.exit("You need encutils from http://cthedot.de/encutils/") + +# types of sheets in HTML +LINK = 0 # +STYLE = 1 # + +class CSSCaptureHTMLParser(HTMLParser.HTMLParser): + """CSSCapture helper: Parse given data for link and style elements""" + curtag = u'' + sheets = [] # (type, [atts, cssText]) + + def _loweratts(self, atts): + return dict([(a.lower(), v.lower()) for a, v in atts]) + + def handle_starttag(self, tag, atts): + if tag == u'link': + atts = self._loweratts(atts) + if u'text/css' == atts.get(u'type', u''): + self.sheets.append((LINK, atts)) + elif tag == u'style': + # also get content of style + atts = self._loweratts(atts) + if u'text/css' == atts.get(u'type', u''): + self.sheets.append((STYLE, [atts, u''])) + self.curtag = tag + else: + # close as only intersting