calibre/src/cssutils/prodparser.py
Kovid Goyal fc934f1f66 IGN:...
2008-09-19 10:01:15 -07:00

403 lines
13 KiB
Python

# -*- coding: utf-8 -*-
"""Productions parser used by css and stylesheets classes to parse
test into a cssutils.util.Seq and at the same time retrieving
additional specific cssutils.util.Item objects for later use.
TODO:
- ProdsParser
- handle EOF or STOP?
- handle unknown @rules
- handle S: maybe save to Seq? parameterized?
- store['_raw']: always?
- Sequence:
- opt first(), naive impl for now
"""
__all__ = ['ProdParser', 'Sequence', 'Choice', 'Prod', 'PreDef']
__docformat__ = 'restructuredtext'
__version__ = '$Id: parse.py 1418 2008-08-09 19:27:50Z cthedot $'
import cssutils
class ParseError(Exception):
"""Base Exception class for ProdParser (used internally)."""
pass
class Exhausted(ParseError):
"""Raised if Sequence or Choice is done."""
pass
class NoMatch(ParseError):
"""Raised if Sequence or Choice do not match."""
pass
class MissingToken(ParseError):
"""Raised if Sequence or Choice are not exhausted."""
pass
class Choice(object):
"""A Choice of productions (Sequence or single Prod)."""
def __init__(self, prods):
"""
prods
Prod or Sequence objects
"""
self._prods = prods
self._exhausted = False
def nextProd(self, token):
"""
Return:
- next matching Prod or Sequence
- raises ParseError if nothing matches
- raises Exhausted if choice already done
``token`` may be None but this occurs when no tokens left."""
if not self._exhausted:
for x in self._prods:
if isinstance(x, Prod):
test = x
else:
# nested Sequence matches if 1st prod matches
test = x.first()
try:
if test.matches(token):
self._exhausted = True
return x
except ParseError, e:
# do not raise if other my match
continue
else:
# None matched
raise ParseError(u'No match in choice')
else:
raise Exhausted(u'Extra token')
class Sequence(object):
"""A Sequence of productions (Choice or single Prod)."""
def __init__(self, prods, minmax=None):
"""
prods
Prod or Sequence objects
minmax = lambda: (1, 1)
callback returning number of times this sequence may run
"""
self._prods = prods
if not minmax:
minmax = lambda: (1, 1)
self._min, self._max = minmax()
self._number = len(self._prods)
self._round = 1 # 1 based!
self._pos = 0
def first(self):
"""Return 1st element of Sequence, used by Choice"""
# TODO: current impl first only if 1st if an prod!
for prod in self._prods:
if not prod.optional:
return prod
def _currentName(self):
"""Return current element of Sequence, used by name"""
# TODO: current impl first only if 1st if an prod!
for prod in self._prods[self._pos:]:
if not prod.optional:
return prod.name
else:
return 'Unknown'
name = property(_currentName, doc='Used for Error reporting')
def nextProd(self, token):
"""Return
- next matching Prod or Choice
- raises ParseError if nothing matches
- raises Exhausted if sequence already done
"""
while self._pos < self._number:
x = self._prods[self._pos]
thisround = self._round
self._pos += 1
if self._pos == self._number:
if self._round < self._max:
# new round?
self._pos = 0
self._round += 1
if isinstance(x, Prod):
if not token and (x.optional or thisround > self._min):
# token is None if nothing expected
raise Exhausted()
elif not token and not x.optional:
raise MissingToken(u'Missing token for production %s'
% x.name)
elif x.matches(token):
return x
elif x.optional:
# try next
continue
# elif thisround > self._min:
# # minimum done
# self._round = self._max
# self._pos = self._number
# return None
else:
# should have matched
raise NoMatch(u'No matching production for token')
else:
# nested Sequence or Choice
return x
# Sequence is exhausted
if self._round >= self._max:
raise Exhausted(u'Extra token')
class Prod(object):
"""Single Prod in Sequence or Choice."""
def __init__(self, name, match, toSeq=None, toStore=None,
optional=False):
"""
name
name used for error reporting
match callback
function called with parameters tokentype and tokenvalue
returning True, False or raising ParseError
toSeq callback (optional)
if given calling toSeq(token) will be appended to seq
else simply seq
toStore (optional)
key to save util.Item to store or callback(store, util.Item)
optional = False
wether Prod is optional or not
"""
self.name = name
self.match = match
self.optional=optional
def makeToStore(key):
"Return a function used by toStore."
def toStore(store, item):
"Set or append store item."
if key in store:
store[key].append(item)
else:
store[key] = item
return toStore
if toSeq:
# called: seq.append(toSeq(value))
self.toSeq = toSeq
else:
self.toSeq = lambda val: val
if callable(toStore):
self.toStore = toStore
elif toStore:
self.toStore = makeToStore(toStore)
else:
# always set!
self.toStore = None
def matches(self, token):
"""Return if token matches."""
type_, val, line, col = token
return self.match(type_, val)
def __repr__(self):
return "<cssutils.prodsparser.%s object name=%r at 0x%x>" % (
self.__class__.__name__, self.name, id(self))
class ProdParser(object):
"""Productions parser."""
def __init__(self):
self.types = cssutils.cssproductions.CSSProductions
self._log = cssutils.log
self._tokenizer = cssutils.tokenize2.Tokenizer()
def parse(self, text, name, productions, store=None):
"""
text (or token generator)
to parse, will be tokenized if not a generator yet
may be:
- a string to be tokenized
- a single token, a tuple
- a tuple of (token, tokensGenerator)
- already tokenized so a tokens generator
name
used for logging
productions
used to parse tokens
store UPDATED
If a Prod defines ``toStore`` the key defined there
is a key in store to be set or if store[key] is a list
the next Item is appended here.
TODO: NEEDED? :
Key ``raw`` is always added and holds all unprocessed
values found
returns
:wellformed: True or False
:seq: a filled cssutils.util.Seq object which is NOT readonly yet
:store: filled keys defined by Prod.toStore
:unusedtokens: token generator containing tokens not used yet
"""
if isinstance(text, basestring):
# to tokenize
tokens = self._tokenizer.tokenize(text)
elif isinstance(text, tuple):
# (token, tokens) or a single token
if len(text) == 2:
# (token, tokens)
def gen(token, tokens):
"new generator appending token and tokens"
yield token
for t in tokens:
yield t
tokens = (t for t in gen(*text))
else:
# single token
tokens = [text]
else:
# already tokenized, assume generator
tokens = text
# a new seq to append all Items to
seq = cssutils.util.Seq(readonly=False)
# store for specific values
if not store:
store = {}
# store['_raw'] = []
# stack of productions
prods = [productions]
wellformed = True
for token in tokens:
type_, val, line, col = token
# store['_raw'].append(val)
# default productions
if type_ == self.types.S:
# always append S?
seq.append(val, type_, line, col)
elif type_ == self.types.COMMENT:
# always append COMMENT
seq.append(val, type_, line, col)
# elif type_ == self.types.ATKEYWORD:
# # @rule
# r = cssutils.css.CSSUnknownRule(cssText=val)
# seq.append(r, type(r), line, col)
elif type_ == self.types.EOF:
# do nothing
pass
# next = 'EOF'
else:
# check prods
try:
while True:
# find next matching production
try:
prod = prods[-1].nextProd(token)
except (NoMatch, Exhausted), e:
# try next
prod = None
if isinstance(prod, Prod):
break
elif not prod:
if len(prods) > 1:
# nested exhausted, next in parent
prods.pop()
else:
raise Exhausted('Extra token')
else:
# nested Sequence, Choice
prods.append(prod)
except ParseError, e:
wellformed = False
self._log.error(u'%s: %s: %r' % (name, e, token))
else:
# process prod
if prod.toSeq:
seq.append(prod.toSeq(val), type_, line, col)
else:
seq.append(val, type_, line, col)
if prod.toStore:
prod.toStore(store, seq[-1])
# if 'STOP' == next: # EOF?
# # stop here and ignore following tokens
# break
while True:
# all productions exhausted?
try:
prod = prods[-1].nextProd(token=None)
except Exhausted, e:
prod = None # ok
except (MissingToken, NoMatch), e:
wellformed = False
self._log.error(u'%s: %s'
% (name, e))
else:
try:
if prod.optional:
# ignore optional ones
continue
except AttributeError:
pass
if prod:
wellformed = False
self._log.error(u'%s: Missing token for production %r'
% (name, prod.name))
elif len(prods) > 1:
# nested exhausted, next in parent
prods.pop()
else:
break
# bool, Seq, None or generator
return wellformed, seq, store, tokens
class PreDef(object):
"""Predefined Prod definition for use in productions definition
for ProdParser instances.
"""
@staticmethod
def comma():
","
return Prod(name=u'comma', match=lambda t, v: v == u',')
@staticmethod
def funcEnd():
")"
return Prod(name=u'end FUNC ")"', match=lambda t, v: v == u')')
@staticmethod
def unary():
"+ or -"
return Prod(name=u'unary +-', match=lambda t, v: v in u'+-',
optional=True)