Update pyparsing to 1.5.6

This commit is contained in:
Kovid Goyal 2011-09-15 00:29:32 -06:00
parent 2e994081ed
commit 388968cd88

View File

@ -1,6 +1,6 @@
# module pyparsing.py # module pyparsing.py
# #
# Copyright (c) 2003-2010 Paul T. McGuire # Copyright (c) 2003-2011 Paul T. McGuire
# #
# Permission is hereby granted, free of charge, to any person obtaining # Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the # a copy of this software and associated documentation files (the
@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments - embedded comments
""" """
__version__ = "1.5.5" __version__ = "1.5.6"
__versionTime__ = "12 Aug 2010 03:56" __versionTime__ = "26 June 2011 10:53"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string import string
@ -101,11 +101,12 @@ if _PY3K:
basestring = str basestring = str
unichr = chr unichr = chr
_ustr = str _ustr = str
_str2dict = set
alphas = string.ascii_lowercase + string.ascii_uppercase alphas = string.ascii_lowercase + string.ascii_uppercase
else: else:
_MAX_INT = sys.maxint _MAX_INT = sys.maxint
range = xrange range = xrange
set = lambda s : dict( [(c,0) for c in s] )
alphas = string.lowercase + string.uppercase
def _ustr(obj): def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
@ -134,9 +135,6 @@ else:
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace') #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ... # ...
def _str2dict(strg):
return dict( [(c,0) for c in strg] )
alphas = string.lowercase + string.uppercase alphas = string.lowercase + string.uppercase
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
@ -606,10 +604,10 @@ class ParseResults(object):
def __setstate__(self,state): def __setstate__(self,state):
self.__toklist = state[0] self.__toklist = state[0]
self.__tokdict, \ (self.__tokdict,
par, \ par,
inAccumNames, \ inAccumNames,
self.__name = state[1] self.__name) = state[1]
self.__accumNames = {} self.__accumNames = {}
self.__accumNames.update(inAccumNames) self.__accumNames.update(inAccumNames)
if par is not None: if par is not None:
@ -667,6 +665,35 @@ def nullDebugAction(*args):
"""'Do-nothing' debug action, to suppress debugging output during parsing.""" """'Do-nothing' debug action, to suppress debugging output during parsing."""
pass pass
'decorator to trim function calls to match the arity of the target'
if not _PY3K:
def _trim_arity(func, maxargs=2):
limit = [0]
def wrapper(*args):
while 1:
try:
return func(*args[limit[0]:])
except TypeError:
if limit[0] <= maxargs:
limit[0] += 1
continue
raise
return wrapper
else:
def _trim_arity(func, maxargs=2):
limit = maxargs
def wrapper(*args):
#~ nonlocal limit
while 1:
try:
return func(*args[limit:])
except TypeError:
if limit:
limit -= 1
continue
raise
return wrapper
class ParserElement(object): class ParserElement(object):
"""Abstract base level parser element class.""" """Abstract base level parser element class."""
DEFAULT_WHITE_CHARS = " \n\t\r" DEFAULT_WHITE_CHARS = " \n\t\r"
@ -731,6 +758,9 @@ class ParserElement(object):
see L{I{__call__}<__call__>}. see L{I{__call__}<__call__>}.
""" """
newself = self.copy() newself = self.copy()
if name.endswith("*"):
name = name[:-1]
listAllMatches=True
newself.resultsName = name newself.resultsName = name
newself.modalResults = not listAllMatches newself.modalResults = not listAllMatches
return newself return newself
@ -753,104 +783,6 @@ class ParserElement(object):
self._parse = self._parse._originalParseMethod self._parse = self._parse._originalParseMethod
return self return self
def _normalizeParseActionArgs( f ):
"""Internal method used to decorate parse actions that take fewer than 3 arguments,
so that all parse actions can be called as C{f(s,l,t)}."""
STAR_ARGS = 4
# special handling for single-argument builtins
if (f in singleArgBuiltins):
numargs = 1
else:
try:
restore = None
if isinstance(f,type):
restore = f
f = f.__init__
if not _PY3K:
codeObj = f.func_code
else:
codeObj = f.code
if codeObj.co_flags & STAR_ARGS:
return f
numargs = codeObj.co_argcount
if not _PY3K:
if hasattr(f,"im_self"):
numargs -= 1
else:
if hasattr(f,"__self__"):
numargs -= 1
if restore:
f = restore
except AttributeError:
try:
if not _PY3K:
call_im_func_code = f.__call__.im_func.func_code
else:
call_im_func_code = f.__code__
# not a function, must be a callable object, get info from the
# im_func binding of its bound __call__ method
if call_im_func_code.co_flags & STAR_ARGS:
return f
numargs = call_im_func_code.co_argcount
if not _PY3K:
if hasattr(f.__call__,"im_self"):
numargs -= 1
else:
if hasattr(f.__call__,"__self__"):
numargs -= 0
except AttributeError:
if not _PY3K:
call_func_code = f.__call__.func_code
else:
call_func_code = f.__call__.__code__
# not a bound method, get info directly from __call__ method
if call_func_code.co_flags & STAR_ARGS:
return f
numargs = call_func_code.co_argcount
if not _PY3K:
if hasattr(f.__call__,"im_self"):
numargs -= 1
else:
if hasattr(f.__call__,"__self__"):
numargs -= 1
#~ print ("adding function %s with %d args" % (f.func_name,numargs))
if numargs == 3:
return f
else:
if numargs > 3:
def tmp(s,l,t):
return f(f.__call__.__self__, s,l,t)
if numargs == 2:
def tmp(s,l,t):
return f(l,t)
elif numargs == 1:
def tmp(s,l,t):
return f(t)
else: #~ numargs == 0:
def tmp(s,l,t):
return f()
try:
tmp.__name__ = f.__name__
except (AttributeError,TypeError):
# no need for special handling if attribute doesnt exist
pass
try:
tmp.__doc__ = f.__doc__
except (AttributeError,TypeError):
# no need for special handling if attribute doesnt exist
pass
try:
tmp.__dict__.update(f.__dict__)
except (AttributeError,TypeError):
# no need for special handling if attribute doesnt exist
pass
return tmp
_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
def setParseAction( self, *fns, **kwargs ): def setParseAction( self, *fns, **kwargs ):
"""Define action to perform when successfully matching parse element definition. """Define action to perform when successfully matching parse element definition.
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
@ -868,13 +800,13 @@ class ParserElement(object):
consistent view of the parsed string, the parse location, and line and column consistent view of the parsed string, the parse location, and line and column
positions within the parsed string. positions within the parsed string.
""" """
self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) self.parseAction = list(map(_trim_arity, list(fns)))
self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
return self return self
def addParseAction( self, *fns, **kwargs ): def addParseAction( self, *fns, **kwargs ):
"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) self.parseAction += list(map(_trim_arity, list(fns)))
self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
return self return self
@ -1012,9 +944,9 @@ class ParserElement(object):
lookup = (self,instring,loc,callPreParse,doActions) lookup = (self,instring,loc,callPreParse,doActions)
if lookup in ParserElement._exprArgCache: if lookup in ParserElement._exprArgCache:
value = ParserElement._exprArgCache[ lookup ] value = ParserElement._exprArgCache[ lookup ]
if isinstance(value,Exception): if isinstance(value, Exception):
raise value raise value
return value return (value[0],value[1].copy())
else: else:
try: try:
value = self._parseNoCache( instring, loc, doActions, callPreParse ) value = self._parseNoCache( instring, loc, doActions, callPreParse )
@ -1088,8 +1020,8 @@ class ParserElement(object):
try: try:
loc, tokens = self._parse( instring, 0 ) loc, tokens = self._parse( instring, 0 )
if parseAll: if parseAll:
#loc = self.preParse( instring, loc ) loc = self.preParse( instring, loc )
se = StringEnd() se = Empty() + StringEnd()
se._parse( instring, loc ) se._parse( instring, loc )
except ParseBaseException: except ParseBaseException:
if ParserElement.verbose_stacktrace: if ParserElement.verbose_stacktrace:
@ -1101,10 +1033,11 @@ class ParserElement(object):
else: else:
return tokens return tokens
def scanString( self, instring, maxMatches=_MAX_INT ): def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
"""Scan the input string for expression matches. Each match will return the """Scan the input string for expression matches. Each match will return the
matching tokens, start location, and end location. May be called with optional matching tokens, start location, and end location. May be called with optional
C{maxMatches} argument, to clip scanning after 'n' matches are found. C{maxMatches} argument, to clip scanning after 'n' matches are found. If
C{overlap} is specified, then overlapping matches will be reported.
Note that the start and end locations are reported relative to the string Note that the start and end locations are reported relative to the string
being parsed. See L{I{parseString}<parseString>} for more information on parsing being parsed. See L{I{parseString}<parseString>} for more information on parsing
@ -1133,7 +1066,14 @@ class ParserElement(object):
if nextLoc > loc: if nextLoc > loc:
matches += 1 matches += 1
yield tokens, preloc, nextLoc yield tokens, preloc, nextLoc
loc = nextLoc if overlap:
nextloc = preparseFn( instring, loc )
if nextloc > loc:
loc = nextLoc
else:
loc += 1
else:
loc = nextLoc
else: else:
loc = preloc+1 loc = preloc+1
except ParseBaseException: except ParseBaseException:
@ -1168,6 +1108,7 @@ class ParserElement(object):
out.append(t) out.append(t)
lastE = e lastE = e
out.append(instring[lastE:]) out.append(instring[lastE:])
out = [o for o in out if o]
return "".join(map(_ustr,_flatten(out))) return "".join(map(_ustr,_flatten(out)))
except ParseBaseException: except ParseBaseException:
if ParserElement.verbose_stacktrace: if ParserElement.verbose_stacktrace:
@ -1372,6 +1313,9 @@ class ParserElement(object):
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
could be written as:: could be written as::
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
passed as C{True}.
""" """
return self.setResultsName(name) return self.setResultsName(name)
@ -1398,9 +1342,9 @@ class ParserElement(object):
return self return self
def parseWithTabs( self ): def parseWithTabs( self ):
"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string. """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
Must be called before C{parseString} when the input grammar contains elements that Must be called before C{parseString} when the input grammar contains elements that
match <TAB> characters.""" match C{<TAB>} characters."""
self.keepTabs = True self.keepTabs = True
return self return self
@ -1508,12 +1452,10 @@ class Token(ParserElement):
"""Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
def __init__( self ): def __init__( self ):
super(Token,self).__init__( savelist=False ) super(Token,self).__init__( savelist=False )
#self.myException = ParseException("",0,"",self)
def setName(self, name): def setName(self, name):
s = super(Token,self).setName(name) s = super(Token,self).setName(name)
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
#s.myException.msg = self.errmsg
return s return s
@ -1534,7 +1476,6 @@ class NoMatch(Token):
self.mayReturnEmpty = True self.mayReturnEmpty = True
self.mayIndexError = False self.mayIndexError = False
self.errmsg = "Unmatchable token" self.errmsg = "Unmatchable token"
#self.myException.msg = self.errmsg
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
exc = self.myException exc = self.myException
@ -1558,7 +1499,6 @@ class Literal(Token):
self.name = '"%s"' % _ustr(self.match) self.name = '"%s"' % _ustr(self.match)
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
self.mayReturnEmpty = False self.mayReturnEmpty = False
#self.myException.msg = self.errmsg
self.mayIndexError = False self.mayIndexError = False
# Performance tuning: this routine gets called a *lot* # Performance tuning: this routine gets called a *lot*
@ -1579,12 +1519,12 @@ _L = Literal
class Keyword(Token): class Keyword(Token):
"""Token to exactly match a specified string as a keyword, that is, it must be """Token to exactly match a specified string as a keyword, that is, it must be
immediately followed by a non-keyword character. Compare with C{Literal}:: immediately followed by a non-keyword character. Compare with C{Literal}::
Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
Accepts two optional constructor arguments in addition to the keyword string: Accepts two optional constructor arguments in addition to the keyword string:
C{identChars} is a string of characters that would be valid identifier characters, C{identChars} is a string of characters that would be valid identifier characters,
defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
matching, default is False. matching, default is C{False}.
""" """
DEFAULT_KEYWORD_CHARS = alphanums+"_$" DEFAULT_KEYWORD_CHARS = alphanums+"_$"
@ -1600,13 +1540,12 @@ class Keyword(Token):
self.name = '"%s"' % self.match self.name = '"%s"' % self.match
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
self.mayReturnEmpty = False self.mayReturnEmpty = False
#self.myException.msg = self.errmsg
self.mayIndexError = False self.mayIndexError = False
self.caseless = caseless self.caseless = caseless
if caseless: if caseless:
self.caselessmatch = matchString.upper() self.caselessmatch = matchString.upper()
identChars = identChars.upper() identChars = identChars.upper()
self.identChars = _str2dict(identChars) self.identChars = set(identChars)
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
if self.caseless: if self.caseless:
@ -1648,7 +1587,6 @@ class CaselessLiteral(Literal):
self.returnString = matchString self.returnString = matchString
self.name = "'%s'" % self.returnString self.name = "'%s'" % self.returnString
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
#self.myException.msg = self.errmsg
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
if instring[ loc:loc+self.matchLen ].upper() == self.match: if instring[ loc:loc+self.matchLen ].upper() == self.match:
@ -1680,18 +1618,25 @@ class Word(Token):
defaults to the initial character set), and an optional minimum, defaults to the initial character set), and an optional minimum,
maximum, and/or exact length. The default value for C{min} is 1 (a maximum, and/or exact length. The default value for C{min} is 1 (a
minimum value < 1 is not valid); the default values for C{max} and C{exact} minimum value < 1 is not valid); the default values for C{max} and C{exact}
are 0, meaning no maximum or exact length restriction. are 0, meaning no maximum or exact length restriction. An optional
C{exclude} parameter can list characters that might be found in
the input C{bodyChars} string; useful to define a word of all printables
except for one or two characters, for instance.
""" """
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
super(Word,self).__init__() super(Word,self).__init__()
if excludeChars:
initChars = ''.join([c for c in initChars if c not in excludeChars])
if bodyChars:
bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
self.initCharsOrig = initChars self.initCharsOrig = initChars
self.initChars = _str2dict(initChars) self.initChars = set(initChars)
if bodyChars : if bodyChars :
self.bodyCharsOrig = bodyChars self.bodyCharsOrig = bodyChars
self.bodyChars = _str2dict(bodyChars) self.bodyChars = set(bodyChars)
else: else:
self.bodyCharsOrig = initChars self.bodyCharsOrig = initChars
self.bodyChars = _str2dict(initChars) self.bodyChars = set(initChars)
self.maxSpecified = max > 0 self.maxSpecified = max > 0
@ -1711,7 +1656,6 @@ class Word(Token):
self.name = _ustr(self) self.name = _ustr(self)
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
#self.myException.msg = self.errmsg
self.mayIndexError = False self.mayIndexError = False
self.asKeyword = asKeyword self.asKeyword = asKeyword
@ -1743,7 +1687,7 @@ class Word(Token):
raise exc raise exc
loc = result.end() loc = result.end()
return loc,result.group() return loc, result.group()
if not(instring[ loc ] in self.initChars): if not(instring[ loc ] in self.initChars):
#~ raise ParseException( instring, loc, self.errmsg ) #~ raise ParseException( instring, loc, self.errmsg )
@ -1807,24 +1751,24 @@ class Regex(Token):
""" """
compiledREtype = type(re.compile("[A-Z]")) compiledREtype = type(re.compile("[A-Z]"))
def __init__( self, pattern, flags=0): def __init__( self, pattern, flags=0):
"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
super(Regex,self).__init__() super(Regex,self).__init__()
if isinstance(pattern, basestring): if isinstance(pattern, basestring):
if len(pattern) == 0: if len(pattern) == 0:
warnings.warn("null string passed to Regex; use Empty() instead", warnings.warn("null string passed to Regex; use Empty() instead",
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
self.pattern = pattern self.pattern = pattern
self.flags = flags self.flags = flags
try: try:
self.re = re.compile(self.pattern, self.flags) self.re = re.compile(self.pattern, self.flags)
self.reString = self.pattern self.reString = self.pattern
except sre_constants.error: except sre_constants.error:
warnings.warn("invalid pattern (%s) passed to Regex" % pattern, warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
raise raise
elif isinstance(pattern, Regex.compiledREtype): elif isinstance(pattern, Regex.compiledREtype):
self.re = pattern self.re = pattern
@ -1837,7 +1781,6 @@ class Regex(Token):
self.name = _ustr(self) self.name = _ustr(self)
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
#self.myException.msg = self.errmsg
self.mayIndexError = False self.mayIndexError = False
self.mayReturnEmpty = True self.mayReturnEmpty = True
@ -1929,7 +1872,8 @@ class QuotedString(Token):
self.pattern += (r'|(?:%s)' % re.escape(escQuote)) self.pattern += (r'|(?:%s)' % re.escape(escQuote))
if escChar: if escChar:
self.pattern += (r'|(?:%s.)' % re.escape(escChar)) self.pattern += (r'|(?:%s.)' % re.escape(escChar))
self.escCharReplacePattern = re.escape(self.escChar)+"(.)" charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
try: try:
@ -1942,7 +1886,6 @@ class QuotedString(Token):
self.name = _ustr(self) self.name = _ustr(self)
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
#self.myException.msg = self.errmsg
self.mayIndexError = False self.mayIndexError = False
self.mayReturnEmpty = True self.mayReturnEmpty = True
@ -2014,7 +1957,6 @@ class CharsNotIn(Token):
self.name = _ustr(self) self.name = _ustr(self)
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
self.mayReturnEmpty = ( self.minLen == 0 ) self.mayReturnEmpty = ( self.minLen == 0 )
#self.myException.msg = self.errmsg
self.mayIndexError = False self.mayIndexError = False
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
@ -2077,7 +2019,6 @@ class White(Token):
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
self.mayReturnEmpty = True self.mayReturnEmpty = True
self.errmsg = "Expected " + self.name self.errmsg = "Expected " + self.name
#self.myException.msg = self.errmsg
self.minLen = min self.minLen = min
@ -2150,7 +2091,6 @@ class LineStart(_PositionToken):
super(LineStart,self).__init__() super(LineStart,self).__init__()
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line" self.errmsg = "Expected start of line"
#self.myException.msg = self.errmsg
def preParse( self, instring, loc ): def preParse( self, instring, loc ):
preloc = super(LineStart,self).preParse(instring,loc) preloc = super(LineStart,self).preParse(instring,loc)
@ -2175,7 +2115,6 @@ class LineEnd(_PositionToken):
super(LineEnd,self).__init__() super(LineEnd,self).__init__()
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected end of line" self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
if loc<len(instring): if loc<len(instring):
@ -2200,7 +2139,6 @@ class StringStart(_PositionToken):
def __init__( self ): def __init__( self ):
super(StringStart,self).__init__() super(StringStart,self).__init__()
self.errmsg = "Expected start of text" self.errmsg = "Expected start of text"
#self.myException.msg = self.errmsg
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
if loc != 0: if loc != 0:
@ -2218,7 +2156,6 @@ class StringEnd(_PositionToken):
def __init__( self ): def __init__( self ):
super(StringEnd,self).__init__() super(StringEnd,self).__init__()
self.errmsg = "Expected end of text" self.errmsg = "Expected end of text"
#self.myException.msg = self.errmsg
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
if loc < len(instring): if loc < len(instring):
@ -2239,14 +2176,14 @@ class StringEnd(_PositionToken):
class WordStart(_PositionToken): class WordStart(_PositionToken):
"""Matches if the current position is at the beginning of a Word, and """Matches if the current position is at the beginning of a Word, and
is not preceded by any character in a given set of wordChars is not preceded by any character in a given set of C{wordChars}
(default=C{printables}). To emulate the C{\b} behavior of regular expressions, (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
the string being parsed, or at the beginning of a line. the string being parsed, or at the beginning of a line.
""" """
def __init__(self, wordChars = printables): def __init__(self, wordChars = printables):
super(WordStart,self).__init__() super(WordStart,self).__init__()
self.wordChars = _str2dict(wordChars) self.wordChars = set(wordChars)
self.errmsg = "Not at the start of a word" self.errmsg = "Not at the start of a word"
def parseImpl(self, instring, loc, doActions=True ): def parseImpl(self, instring, loc, doActions=True ):
@ -2261,14 +2198,14 @@ class WordStart(_PositionToken):
class WordEnd(_PositionToken): class WordEnd(_PositionToken):
"""Matches if the current position is at the end of a Word, and """Matches if the current position is at the end of a Word, and
is not followed by any character in a given set of wordChars is not followed by any character in a given set of C{wordChars}
(default=C{printables}). To emulate the C{\b} behavior of regular expressions, (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
the string being parsed, or at the end of a line. the string being parsed, or at the end of a line.
""" """
def __init__(self, wordChars = printables): def __init__(self, wordChars = printables):
super(WordEnd,self).__init__() super(WordEnd,self).__init__()
self.wordChars = _str2dict(wordChars) self.wordChars = set(wordChars)
self.skipWhitespace = False self.skipWhitespace = False
self.errmsg = "Not at the end of a word" self.errmsg = "Not at the end of a word"
@ -2309,7 +2246,7 @@ class ParseExpression(ParserElement):
return self return self
def leaveWhitespace( self ): def leaveWhitespace( self ):
"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
all contained expressions.""" all contained expressions."""
self.skipWhitespace = False self.skipWhitespace = False
self.exprs = [ e.copy() for e in self.exprs ] self.exprs = [ e.copy() for e in self.exprs ]
@ -2380,11 +2317,16 @@ class ParseExpression(ParserElement):
for e in self.exprs: for e in self.exprs:
e.validate(tmp) e.validate(tmp)
self.checkRecursion( [] ) self.checkRecursion( [] )
def copy(self):
ret = super(ParseExpression,self).copy()
ret.exprs = [e.copy() for e in self.exprs]
return ret
class And(ParseExpression): class And(ParseExpression):
"""Requires all given C{ParseExpressions} to be found in the given order. """Requires all given C{ParseExpression}s to be found in the given order.
Expressions may be separated by whitespace. Expressions may be separated by whitespace.
May be constructed using the '+' operator. May be constructed using the C{'+'} operator.
""" """
class _ErrorStop(Empty): class _ErrorStop(Empty):
@ -2453,7 +2395,7 @@ class And(ParseExpression):
class Or(ParseExpression): class Or(ParseExpression):
"""Requires that at least one C{ParseExpression} is found. """Requires that at least one C{ParseExpression} is found.
If two expressions match, the expression that matches the longest string will be used. If two expressions match, the expression that matches the longest string will be used.
May be constructed using the '^' operator. May be constructed using the C{'^'} operator.
""" """
def __init__( self, exprs, savelist = False ): def __init__( self, exprs, savelist = False ):
super(Or,self).__init__(exprs, savelist) super(Or,self).__init__(exprs, savelist)
@ -2515,7 +2457,7 @@ class Or(ParseExpression):
class MatchFirst(ParseExpression): class MatchFirst(ParseExpression):
"""Requires that at least one C{ParseExpression} is found. """Requires that at least one C{ParseExpression} is found.
If two expressions match, the first one listed is the one that will match. If two expressions match, the first one listed is the one that will match.
May be constructed using the '|' operator. May be constructed using the C{'|'} operator.
""" """
def __init__( self, exprs, savelist = False ): def __init__( self, exprs, savelist = False ):
super(MatchFirst,self).__init__(exprs, savelist) super(MatchFirst,self).__init__(exprs, savelist)
@ -2572,9 +2514,9 @@ class MatchFirst(ParseExpression):
class Each(ParseExpression): class Each(ParseExpression):
"""Requires all given C{ParseExpressions} to be found, but in any order. """Requires all given C{ParseExpression}s to be found, but in any order.
Expressions may be separated by whitespace. Expressions may be separated by whitespace.
May be constructed using the '&' operator. May be constructed using the C{'&'} operator.
""" """
def __init__( self, exprs, savelist = True ): def __init__( self, exprs, savelist = True ):
super(Each,self).__init__(exprs, savelist) super(Each,self).__init__(exprs, savelist)
@ -2757,7 +2699,6 @@ class NotAny(ParseElementEnhance):
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
self.mayReturnEmpty = True self.mayReturnEmpty = True
self.errmsg = "Found unwanted token, "+_ustr(self.expr) self.errmsg = "Found unwanted token, "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self)
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
try: try:
@ -2916,7 +2857,6 @@ class SkipTo(ParseElementEnhance):
else: else:
self.failOn = failOn self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr) self.errmsg = "No match found for "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self)
def parseImpl( self, instring, loc, doActions=True ): def parseImpl( self, instring, loc, doActions=True ):
startLoc = loc startLoc = loc
@ -3040,7 +2980,7 @@ class _ForwardNoRecurse(Forward):
return "..." return "..."
class TokenConverter(ParseElementEnhance): class TokenConverter(ParseElementEnhance):
"""Abstract subclass of ParseExpression, for converting parsed results.""" """Abstract subclass of C{ParseExpression}, for converting parsed results."""
def __init__( self, expr, savelist=False ): def __init__( self, expr, savelist=False ):
super(TokenConverter,self).__init__( expr )#, savelist ) super(TokenConverter,self).__init__( expr )#, savelist )
self.saveAsList = False self.saveAsList = False
@ -3089,7 +3029,7 @@ class Combine(TokenConverter):
return retToks return retToks
class Group(TokenConverter): class Group(TokenConverter):
"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
def __init__( self, expr ): def __init__( self, expr ):
super(Group,self).__init__( expr ) super(Group,self).__init__( expr )
self.saveAsList = True self.saveAsList = True
@ -3143,7 +3083,7 @@ class Suppress(TokenConverter):
class OnlyOnce(object): class OnlyOnce(object):
"""Wrapper for parse actions, to ensure they are only called once.""" """Wrapper for parse actions, to ensure they are only called once."""
def __init__(self, methodCall): def __init__(self, methodCall):
self.callable = ParserElement._normalizeParseActionArgs(methodCall) self.callable = _trim_arity(methodCall)
self.called = False self.called = False
def __call__(self,s,l,t): def __call__(self,s,l,t):
if not self.called: if not self.called:
@ -3156,7 +3096,7 @@ class OnlyOnce(object):
def traceParseAction(f): def traceParseAction(f):
"""Decorator for debugging parse actions.""" """Decorator for debugging parse actions."""
f = ParserElement._normalizeParseActionArgs(f) f = _trim_arity(f)
def z(*paArgs): def z(*paArgs):
thisFunc = f.func_name thisFunc = f.func_name
s,l,t = paArgs[-3:] s,l,t = paArgs[-3:]
@ -3194,7 +3134,7 @@ def delimitedList( expr, delim=",", combine=False ):
else: else:
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
def countedArray( expr ): def countedArray( expr, intExpr=None ):
"""Helper to define a counted list of expressions. """Helper to define a counted list of expressions.
This helper defines a pattern of the form:: This helper defines a pattern of the form::
integer expr expr expr... integer expr expr expr...
@ -3203,15 +3143,25 @@ def countedArray( expr ):
""" """
arrayExpr = Forward() arrayExpr = Forward()
def countFieldParseAction(s,l,t): def countFieldParseAction(s,l,t):
n = int(t[0]) n = t[0]
arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
return [] return []
return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) if intExpr is None:
intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
else:
intExpr = intExpr.copy()
intExpr.setName("arrayLen")
intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
return ( intExpr + arrayExpr )
def _flatten(L): def _flatten(L):
if type(L) is not list: return [L] ret = []
if L == []: return L for i in L:
return _flatten(L[0]) + _flatten(L[1:]) if isinstance(i,list):
ret.extend(_flatten(i))
else:
ret.append(i)
return ret
def matchPreviousLiteral(expr): def matchPreviousLiteral(expr):
"""Helper to define an expression that is indirectly defined from """Helper to define an expression that is indirectly defined from
@ -3346,15 +3296,15 @@ def originalTextFor(expr, asString=True):
"""Helper to return the original, untokenized text for a given expression. Useful to """Helper to return the original, untokenized text for a given expression. Useful to
restore the parsed fields of an HTML start tag into the raw tag text itself, or to restore the parsed fields of an HTML start tag into the raw tag text itself, or to
revert separate tokens with intervening whitespace back to the original matching revert separate tokens with intervening whitespace back to the original matching
input text. Simpler to use than the parse action C{keepOriginalText}, and does not input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
require the inspect module to chase up the call stack. By default, returns a require the inspect module to chase up the call stack. By default, returns a
string containing the original parsed text. string containing the original parsed text.
If the optional C{asString} argument is passed as False, then the return value is a If the optional C{asString} argument is passed as C{False}, then the return value is a
C{ParseResults} containing any results names that were originally matched, and a C{ParseResults} containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if single token containing the original matched text from the input string. So if
the expression passed to C{originalTextFor} contains expressions with defined the expression passed to C{L{originalTextFor}} contains expressions with defined
results names, you must set C{asString} to False if you want to preserve those results names, you must set C{asString} to C{False} if you want to preserve those
results name values.""" results name values."""
locMarker = Empty().setParseAction(lambda s,loc,t: loc) locMarker = Empty().setParseAction(lambda s,loc,t: loc)
endlocMarker = locMarker.copy() endlocMarker = locMarker.copy()
@ -3370,7 +3320,12 @@ def originalTextFor(expr, asString=True):
del t["_original_end"] del t["_original_end"]
matchExpr.setParseAction(extractText) matchExpr.setParseAction(extractText)
return matchExpr return matchExpr
def ungroup(expr):
"""Helper to undo pyparsing's default grouping of And expressions, even
if all but one are non-empty."""
return TokenConverter(expr).setParseAction(lambda t:t[0])
# convenience constants for positional expressions # convenience constants for positional expressions
empty = Empty().setName("empty") empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart") lineStart = LineStart().setName("lineStart")
@ -3380,8 +3335,8 @@ stringEnd = StringEnd().setName("stringEnd")
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
_charRange = Group(_singleChar + Suppress("-") + _singleChar) _charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
@ -3399,7 +3354,8 @@ def srange(s):
The values enclosed in the []'s may be:: The values enclosed in the []'s may be::
a single character a single character
an escaped character with a leading backslash (such as \- or \]) an escaped character with a leading backslash (such as \- or \])
an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) an escaped hex character with a leading '\x' (\x21, which is a '!' character)
(\0x## is also supported for backwards compatibility)
an escaped octal character with a leading '\0' (\041, which is a '!' character) an escaped octal character with a leading '\0' (\041, which is a '!' character)
a range of any of the above, separated by a dash ('a-z', etc.) a range of any of the above, separated by a dash ('a-z', etc.)
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
@ -3486,7 +3442,7 @@ def _makeTags(tagStr, xml):
else: else:
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
openTag = Suppress("<") + tagStr + \ openTag = Suppress("<") + tagStr("tag") + \
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
Optional( Suppress("=") + tagAttrValue ) ))) + \ Optional( Suppress("=") + tagAttrValue ) ))) + \
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
@ -3508,19 +3464,21 @@ def makeXMLTags(tagStr):
def withAttribute(*args,**attrDict): def withAttribute(*args,**attrDict):
"""Helper to create a validating parse action to be used with start tags created """Helper to create a validating parse action to be used with start tags created
with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
with a required attribute value, to avoid false matches on common tags such as with a required attribute value, to avoid false matches on common tags such as
<TD> or <DIV>. C{<TD>} or C{<DIV>}.
Call withAttribute with a series of attribute names and values. Specify the list Call C{withAttribute} with a series of attribute names and values. Specify the list
of filter attributes names and values as: of filter attributes names and values as:
- keyword arguments, as in (class="Customer",align="right"), or - keyword arguments, as in C{(align="right")}, or
- as an explicit dict with C{**} operator, when an attribute name is also a Python
reserved word, as in C{**{"class":"Customer", "align":"right"}}
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
For attribute names with a namespace prefix, you must use the second form. Attribute For attribute names with a namespace prefix, you must use the second form. Attribute
names are matched insensitive to upper/lower case. names are matched insensitive to upper/lower case.
To verify that the attribute exists, but without specifying a value, pass To verify that the attribute exists, but without specifying a value, pass
withAttribute.ANY_VALUE as the value. C{withAttribute.ANY_VALUE} as the value.
""" """
if args: if args:
attrs = args[:] attrs = args[:]
@ -3631,12 +3589,12 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
expression will capture all whitespace-delimited content between delimiters expression will capture all whitespace-delimited content between delimiters
as a list of separate values. as a list of separate values.
Use the ignoreExpr argument to define expressions that may contain Use the C{ignoreExpr} argument to define expressions that may contain
opening or closing characters that should not be treated as opening opening or closing characters that should not be treated as opening
or closing characters for nesting, such as quotedString or a comment or closing characters for nesting, such as quotedString or a comment
expression. Specify multiple expressions using an Or or MatchFirst. expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
The default is quotedString, but if no expressions are to be ignored, The default is L{quotedString}, but if no expressions are to be ignored,
then pass None for this argument. then pass C{None} for this argument.
""" """
if opener == closer: if opener == closer:
raise ValueError("opening and closing strings cannot be the same") raise ValueError("opening and closing strings cannot be the same")
@ -3683,7 +3641,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
the current level; set to False for block of left-most statements the current level; set to False for block of left-most statements
(default=True) (default=True)
A valid block must contain at least one blockStatement. A valid block must contain at least one C{blockStatement}.
""" """
def checkPeerIndent(s,l,t): def checkPeerIndent(s,l,t):
if l >= len(s): return if l >= len(s): return