Update bundled pyparsing

This commit is contained in:
Kovid Goyal 2013-01-02 10:42:49 +05:30
parent 99551f1a6d
commit 2fe223bf97

View File

@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments - embedded comments
""" """
__version__ = "1.5.6" __version__ = "1.5.7"
__versionTime__ = "26 June 2011 10:53" __versionTime__ = "17 November 2012 16:18"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string import string
@ -81,66 +81,51 @@ __all__ = [
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'originalTextFor', 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation',
] ]
""" _MAX_INT = sys.maxint
Detect if we are running version 3.X and make appropriate changes range = xrange
Robert A. Clark set = lambda s : dict( [(c,0) for c in s] )
"""
_PY3K = sys.version_info[0] > 2
if _PY3K:
_MAX_INT = sys.maxsize
basestring = str
unichr = chr
_ustr = str
alphas = string.ascii_lowercase + string.ascii_uppercase
else:
_MAX_INT = sys.maxint
range = xrange
set = lambda s : dict( [(c,0) for c in s] )
alphas = string.lowercase + string.uppercase
def _ustr(obj): def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
then < returns the unicode object | encodes it with the default encoding | ... >. then < returns the unicode object | encodes it with the default encoding | ... >.
""" """
if isinstance(obj,unicode): if isinstance(obj,unicode):
return obj return obj
try: try:
# If this works, then _ustr(obj) has the same behaviour as str(obj), so # If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code. # it won't break any existing code.
return str(obj) return str(obj)
except UnicodeEncodeError: except UnicodeEncodeError:
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182) # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
# state that "The return value must be a string object". However, does a # state that "The return value must be a string object". However, does a
# unicode object (being a subclass of basestring) count as a "string # unicode object (being a subclass of basestring) count as a "string
# object"? # object"?
# If so, then return a unicode object: # If so, then return a unicode object:
return unicode(obj) return unicode(obj)
# Else encode it... but how? There are many choices... :) # Else encode it... but how? There are many choices... :)
# Replace unprintables with escape codes? # Replace unprintables with escape codes?
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
# Replace unprintables with question marks? # Replace unprintables with question marks?
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace') #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ... # ...
alphas = string.lowercase + string.uppercase
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = [] singleArgBuiltins = []
import __builtin__ import __builtin__
for fname in "sum len enumerate sorted reversed list tuple set any all".split(): for fname in "sum len sorted reversed list tuple set any all min max".split():
try: try:
singleArgBuiltins.append(getattr(__builtin__,fname)) singleArgBuiltins.append(getattr(__builtin__,fname))
except AttributeError: except AttributeError:
@ -159,7 +144,8 @@ def _xml_escape(data):
class _Constants(object): class _Constants(object):
pass pass
nums = string.digits alphas = string.ascii_lowercase + string.ascii_uppercase
nums = "0123456789"
hexnums = nums + "ABCDEFabcdef" hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums alphanums = alphas + nums
_bslash = chr(92) _bslash = chr(92)
@ -211,7 +197,7 @@ class ParseBaseException(Exception):
return line_str.strip() return line_str.strip()
def __dir__(self): def __dir__(self):
return "loc msg pstr parserElement lineno col line " \ return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split() "markInputline __str__ __repr__".split()
class ParseException(ParseBaseException): class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class; """exception thrown when parse expressions don't match class;
@ -228,8 +214,8 @@ class ParseFatalException(ParseBaseException):
pass pass
class ParseSyntaxException(ParseFatalException): class ParseSyntaxException(ParseFatalException):
"""just like C{ParseFatalException}, but thrown internally when an """just like C{L{ParseFatalException}}, but thrown internally when an
C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found""" an unbacktrackable syntax error has been found"""
def __init__(self, pe): def __init__(self, pe):
super(ParseSyntaxException, self).__init__( super(ParseSyntaxException, self).__init__(
@ -444,16 +430,13 @@ class ParseResults(object):
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
def __str__( self ): def __str__( self ):
out = "[" out = []
sep = ""
for i in self.__toklist: for i in self.__toklist:
if isinstance(i, ParseResults): if isinstance(i, ParseResults):
out += sep + _ustr(i) out.append(_ustr(i))
else: else:
out += sep + repr(i) out.append(repr(i))
sep = ", " return '[' + ', '.join(out) + ']'
out += "]"
return out
def _asStringList( self, sep='' ): def _asStringList( self, sep='' ):
out = [] out = []
@ -616,7 +599,7 @@ class ParseResults(object):
self.__parent = None self.__parent = None
def __dir__(self): def __dir__(self):
return dir(super(ParseResults,self)) + self.keys() return dir(super(ParseResults,self)) + list(self.keys())
def col (loc,strg): def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators. """Returns current column within a string, counting newlines as line separators.
@ -624,7 +607,7 @@ def col (loc,strg):
Note: the default parsing behavior is to expand tabs in the input string Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column consistent view of the parsed string, the parse location, and line and column
positions within the parsed string. positions within the parsed string.
""" """
@ -636,7 +619,7 @@ def lineno(loc,strg):
Note: the default parsing behavior is to expand tabs in the input string Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column consistent view of the parsed string, the parse location, and line and column
positions within the parsed string. positions within the parsed string.
""" """
@ -666,33 +649,23 @@ def nullDebugAction(*args):
pass pass
'decorator to trim function calls to match the arity of the target' 'decorator to trim function calls to match the arity of the target'
if not _PY3K: def _trim_arity(func, maxargs=2):
def _trim_arity(func, maxargs=2): if func in singleArgBuiltins:
limit = [0] return lambda s,l,t: func(t)
def wrapper(*args): limit = [0]
while 1: foundArity = [False]
try: def wrapper(*args):
return func(*args[limit[0]:]) while 1:
except TypeError: try:
if limit[0] <= maxargs: ret = func(*args[limit[0]:])
limit[0] += 1 foundArity[0] = True
continue return ret
raise except TypeError:
return wrapper if limit[0] <= maxargs and not foundArity[0]:
else: limit[0] += 1
def _trim_arity(func, maxargs=2): continue
limit = maxargs raise
def wrapper(*args): return wrapper
#~ nonlocal limit
while 1:
try:
return func(*args[limit:])
except TypeError:
if limit:
limit -= 1
continue
raise
return wrapper
class ParserElement(object): class ParserElement(object):
"""Abstract base level parser element class.""" """Abstract base level parser element class."""
@ -705,6 +678,13 @@ class ParserElement(object):
ParserElement.DEFAULT_WHITE_CHARS = chars ParserElement.DEFAULT_WHITE_CHARS = chars
setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
def inlineLiteralsUsing(cls):
"""
Set class to be used for inclusion of string literals into a parser.
"""
ParserElement.literalStringClass = cls
inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
def __init__( self, savelist=False ): def __init__( self, savelist=False ):
self.parseAction = list() self.parseAction = list()
self.failAction = None self.failAction = None
@ -789,14 +769,14 @@ class ParserElement(object):
C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- s = the original string being parsed (see note below) - s = the original string being parsed (see note below)
- loc = the location of the matching substring - loc = the location of the matching substring
- toks = a list of the matched tokens, packaged as a ParseResults object - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
If the functions in fns modify the tokens, they can return them as the return If the functions in fns modify the tokens, they can return them as the return
value from fn, and the modified list of tokens will replace the original. value from fn, and the modified list of tokens will replace the original.
Otherwise, fn does not need to return any value. Otherwise, fn does not need to return any value.
Note: the default parsing behavior is to expand tabs in the input string Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{parseString}<parseString>} for more information before starting the parsing process. See L{I{parseString}<parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column consistent view of the parsed string, the parse location, and line and column
positions within the parsed string. positions within the parsed string.
""" """
@ -818,7 +798,7 @@ class ParserElement(object):
- loc = location where expression match was attempted and failed - loc = location where expression match was attempted and failed
- expr = the parse expression that failed - expr = the parse expression that failed
- err = the exception thrown - err = the exception thrown
The function returns no value. It may throw C{ParseFatalException} The function returns no value. It may throw C{L{ParseFatalException}}
if it is desired to stop parsing immediately.""" if it is desired to stop parsing immediately."""
self.failAction = fn self.failAction = fn
return self return self
@ -872,15 +852,12 @@ class ParserElement(object):
loc,tokens = self.parseImpl( instring, preloc, doActions ) loc,tokens = self.parseImpl( instring, preloc, doActions )
except IndexError: except IndexError:
raise ParseException( instring, len(instring), self.errmsg, self ) raise ParseException( instring, len(instring), self.errmsg, self )
except ParseBaseException: except ParseBaseException, err:
#~ print ("Exception raised:", err) #~ print ("Exception raised:", err)
err = None err = None
if self.debugActions[2]: if self.debugActions[2]:
err = sys.exc_info()[1]
self.debugActions[2]( instring, tokensStart, self, err ) self.debugActions[2]( instring, tokensStart, self, err )
if self.failAction: if self.failAction:
if err is None:
err = sys.exc_info()[1]
self.failAction( instring, tokensStart, self, err ) self.failAction( instring, tokensStart, self, err )
raise raise
else: else:
@ -910,10 +887,9 @@ class ParserElement(object):
self.resultsName, self.resultsName,
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
modal=self.modalResults ) modal=self.modalResults )
except ParseBaseException: except ParseBaseException, err:
#~ print "Exception raised in user parse action:", err #~ print "Exception raised in user parse action:", err
if (self.debugActions[2] ): if (self.debugActions[2] ):
err = sys.exc_info()[1]
self.debugActions[2]( instring, tokensStart, self, err ) self.debugActions[2]( instring, tokensStart, self, err )
raise raise
else: else:
@ -952,8 +928,7 @@ class ParserElement(object):
value = self._parseNoCache( instring, loc, doActions, callPreParse ) value = self._parseNoCache( instring, loc, doActions, callPreParse )
ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
return value return value
except ParseBaseException: except ParseBaseException, pe:
pe = sys.exc_info()[1]
ParserElement._exprArgCache[ lookup ] = pe ParserElement._exprArgCache[ lookup ] = pe
raise raise
@ -994,7 +969,7 @@ class ParserElement(object):
If you want the grammar to require that the entire input string be If you want the grammar to require that the entire input string be
successfully parsed, then set C{parseAll} to True (equivalent to ending successfully parsed, then set C{parseAll} to True (equivalent to ending
the grammar with C{StringEnd()}). the grammar with C{L{StringEnd()}}).
Note: C{parseString} implicitly calls C{expandtabs()} on the input string, Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
in order to report proper column numbers in parse actions. in order to report proper column numbers in parse actions.
@ -1023,12 +998,11 @@ class ParserElement(object):
loc = self.preParse( instring, loc ) loc = self.preParse( instring, loc )
se = Empty() + StringEnd() se = Empty() + StringEnd()
se._parse( instring, loc ) se._parse( instring, loc )
except ParseBaseException: except ParseBaseException, exc:
if ParserElement.verbose_stacktrace: if ParserElement.verbose_stacktrace:
raise raise
else: else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace # catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc raise exc
else: else:
return tokens return tokens
@ -1076,16 +1050,15 @@ class ParserElement(object):
loc = nextLoc loc = nextLoc
else: else:
loc = preloc+1 loc = preloc+1
except ParseBaseException: except ParseBaseException, exc:
if ParserElement.verbose_stacktrace: if ParserElement.verbose_stacktrace:
raise raise
else: else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace # catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc raise exc
def transformString( self, instring ): def transformString( self, instring ):
"""Extension to C{scanString}, to modify matching text with modified tokens that may """Extension to C{L{scanString}}, to modify matching text with modified tokens that may
be returned from a parse action. To use C{transformString}, define a grammar and be returned from a parse action. To use C{transformString}, define a grammar and
attach a parse action to it that modifies the returned token list. attach a parse action to it that modifies the returned token list.
Invoking C{transformString()} on a target string will then scan for matches, Invoking C{transformString()} on a target string will then scan for matches,
@ -1110,33 +1083,31 @@ class ParserElement(object):
out.append(instring[lastE:]) out.append(instring[lastE:])
out = [o for o in out if o] out = [o for o in out if o]
return "".join(map(_ustr,_flatten(out))) return "".join(map(_ustr,_flatten(out)))
except ParseBaseException: except ParseBaseException, exc:
if ParserElement.verbose_stacktrace: if ParserElement.verbose_stacktrace:
raise raise
else: else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace # catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc raise exc
def searchString( self, instring, maxMatches=_MAX_INT ): def searchString( self, instring, maxMatches=_MAX_INT ):
"""Another extension to C{scanString}, simplifying the access to the tokens found """Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found. C{maxMatches} argument, to clip searching after 'n' matches are found.
""" """
try: try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
except ParseBaseException: except ParseBaseException, exc:
if ParserElement.verbose_stacktrace: if ParserElement.verbose_stacktrace:
raise raise
else: else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace # catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc raise exc
def __add__(self, other ): def __add__(self, other ):
"""Implementation of + operator - returns And""" """Implementation of + operator - returns C{L{And}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1144,9 +1115,9 @@ class ParserElement(object):
return And( [ self, other ] ) return And( [ self, other ] )
def __radd__(self, other ): def __radd__(self, other ):
"""Implementation of + operator when left operand is not a C{ParserElement}""" """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1154,9 +1125,9 @@ class ParserElement(object):
return other + self return other + self
def __sub__(self, other): def __sub__(self, other):
"""Implementation of - operator, returns C{And} with error stop""" """Implementation of - operator, returns C{L{And}} with error stop"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1164,9 +1135,9 @@ class ParserElement(object):
return And( [ self, And._ErrorStop(), other ] ) return And( [ self, And._ErrorStop(), other ] )
def __rsub__(self, other ): def __rsub__(self, other ):
"""Implementation of - operator when left operand is not a C{ParserElement}""" """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1179,12 +1150,12 @@ class ParserElement(object):
tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
may also include C{None} as in: may also include C{None} as in:
- C{expr*(n,None)} or C{expr*(n,)} is equivalent - C{expr*(n,None)} or C{expr*(n,)} is equivalent
to C{expr*n + ZeroOrMore(expr)} to C{expr*n + L{ZeroOrMore}(expr)}
(read as "at least n instances of C{expr}") (read as "at least n instances of C{expr}")
- C{expr*(None,n)} is equivalent to C{expr*(0,n)} - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
(read as "0 to n instances of C{expr}") (read as "0 to n instances of C{expr}")
- C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
Note that C{expr*(None,n)} does not raise an exception if Note that C{expr*(None,n)} does not raise an exception if
more than n exprs exist in the input stream; that is, more than n exprs exist in the input stream; that is,
@ -1245,9 +1216,9 @@ class ParserElement(object):
return self.__mul__(other) return self.__mul__(other)
def __or__(self, other ): def __or__(self, other ):
"""Implementation of | operator - returns C{MatchFirst}""" """Implementation of | operator - returns C{L{MatchFirst}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1255,9 +1226,9 @@ class ParserElement(object):
return MatchFirst( [ self, other ] ) return MatchFirst( [ self, other ] )
def __ror__(self, other ): def __ror__(self, other ):
"""Implementation of | operator when left operand is not a C{ParserElement}""" """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1265,9 +1236,9 @@ class ParserElement(object):
return other | self return other | self
def __xor__(self, other ): def __xor__(self, other ):
"""Implementation of ^ operator - returns C{Or}""" """Implementation of ^ operator - returns C{L{Or}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1275,9 +1246,9 @@ class ParserElement(object):
return Or( [ self, other ] ) return Or( [ self, other ] )
def __rxor__(self, other ): def __rxor__(self, other ):
"""Implementation of ^ operator when left operand is not a C{ParserElement}""" """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1285,9 +1256,9 @@ class ParserElement(object):
return other ^ self return other ^ self
def __and__(self, other ): def __and__(self, other ):
"""Implementation of & operator - returns C{Each}""" """Implementation of & operator - returns C{L{Each}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1295,9 +1266,9 @@ class ParserElement(object):
return Each( [ self, other ] ) return Each( [ self, other ] )
def __rand__(self, other ): def __rand__(self, other ):
"""Implementation of & operator when left operand is not a C{ParserElement}""" """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ): if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2) SyntaxWarning, stacklevel=2)
@ -1305,11 +1276,11 @@ class ParserElement(object):
return other & self return other & self
def __invert__( self ): def __invert__( self ):
"""Implementation of ~ operator - returns C{NotAny}""" """Implementation of ~ operator - returns C{L{NotAny}}"""
return NotAny( self ) return NotAny( self )
def __call__(self, name): def __call__(self, name):
"""Shortcut for C{setResultsName}, with C{listAllMatches=default}:: """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
could be written as:: could be written as::
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
@ -1403,15 +1374,17 @@ class ParserElement(object):
try: try:
file_contents = file_or_filename.read() file_contents = file_or_filename.read()
except AttributeError: except AttributeError:
f = open(file_or_filename, "rb") f = open(file_or_filename, "r")
file_contents = f.read() file_contents = f.read()
f.close() f.close()
try: try:
return self.parseString(file_contents, parseAll) return self.parseString(file_contents, parseAll)
except ParseBaseException: except ParseBaseException, exc:
# catch and re-raise exception from here, clears out pyparsing internal stack trace if ParserElement.verbose_stacktrace:
exc = sys.exc_info()[1] raise
raise exc else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def getException(self): def getException(self):
return ParseException("",0,self.errmsg,self) return ParseException("",0,self.errmsg,self)
@ -1515,10 +1488,11 @@ class Literal(Token):
exc.pstr = instring exc.pstr = instring
raise exc raise exc
_L = Literal _L = Literal
ParserElement.literalStringClass = Literal
class Keyword(Token): class Keyword(Token):
"""Token to exactly match a specified string as a keyword, that is, it must be """Token to exactly match a specified string as a keyword, that is, it must be
immediately followed by a non-keyword character. Compare with C{Literal}:: immediately followed by a non-keyword character. Compare with C{L{Literal}}::
Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
Accepts two optional constructor arguments in addition to the keyword string: Accepts two optional constructor arguments in addition to the keyword string:
@ -1821,9 +1795,9 @@ class QuotedString(Token):
- quoteChar - string of one or more characters defining the quote delimiting string - quoteChar - string of one or more characters defining the quote delimiting string
- escChar - character to escape quotes, typically backslash (default=None) - escChar - character to escape quotes, typically backslash (default=None)
- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
- multiline - boolean indicating whether quotes can span multiple lines (default=False) - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
""" """
super(QuotedString,self).__init__() super(QuotedString,self).__init__()
@ -2003,7 +1977,7 @@ class White(Token):
by pyparsing grammars. This class is included when some whitespace structures by pyparsing grammars. This class is included when some whitespace structures
are significant. Define with a string containing the whitespace characters to be are significant. Define with a string containing the whitespace characters to be
matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
as defined for the C{Word} class.""" as defined for the C{L{Word}} class."""
whiteStrs = { whiteStrs = {
" " : "<SPC>", " " : "<SPC>",
"\t": "<TAB>", "\t": "<TAB>",
@ -2331,7 +2305,8 @@ class And(ParseExpression):
class _ErrorStop(Empty): class _ErrorStop(Empty):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Empty,self).__init__(*args, **kwargs) super(And._ErrorStop,self).__init__(*args, **kwargs)
self.name = '-'
self.leaveWhitespace() self.leaveWhitespace()
def __init__( self, exprs, savelist = True ): def __init__( self, exprs, savelist = True ):
@ -2359,8 +2334,7 @@ class And(ParseExpression):
loc, exprtokens = e._parse( instring, loc, doActions ) loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException: except ParseSyntaxException:
raise raise
except ParseBaseException: except ParseBaseException, pe:
pe = sys.exc_info()[1]
raise ParseSyntaxException(pe) raise ParseSyntaxException(pe)
except IndexError: except IndexError:
raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
@ -2412,8 +2386,7 @@ class Or(ParseExpression):
for e in self.exprs: for e in self.exprs:
try: try:
loc2 = e.tryParse( instring, loc ) loc2 = e.tryParse( instring, loc )
except ParseException: except ParseException, err:
err = sys.exc_info()[1]
if err.loc > maxExcLoc: if err.loc > maxExcLoc:
maxException = err maxException = err
maxExcLoc = err.loc maxExcLoc = err.loc
@ -2436,7 +2409,7 @@ class Or(ParseExpression):
def __ixor__(self, other ): def __ixor__(self, other ):
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
return self.append( other ) #Or( [ self, other ] ) return self.append( other ) #Or( [ self, other ] )
def __str__( self ): def __str__( self ):
@ -2495,7 +2468,7 @@ class MatchFirst(ParseExpression):
def __ior__(self, other ): def __ior__(self, other ):
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal( other ) other = ParserElement.literalStringClass( other )
return self.append( other ) #MatchFirst( [ self, other ] ) return self.append( other ) #MatchFirst( [ self, other ] )
def __str__( self ): def __str__( self ):
@ -2916,13 +2889,14 @@ class Forward(ParseElementEnhance):
thereby leaving b and c out as parseable alternatives. It is recommended that you thereby leaving b and c out as parseable alternatives. It is recommended that you
explicitly group the values inserted into the C{Forward}:: explicitly group the values inserted into the C{Forward}::
fwdExpr << (a | b | c) fwdExpr << (a | b | c)
Converting to use the '<<=' operator instead will avoid this problem.
""" """
def __init__( self, other=None ): def __init__( self, other=None ):
super(Forward,self).__init__( other, savelist=False ) super(Forward,self).__init__( other, savelist=False )
def __lshift__( self, other ): def __lshift__( self, other ):
if isinstance( other, basestring ): if isinstance( other, basestring ):
other = Literal(other) other = ParserElement.literalStringClass(other)
self.expr = other self.expr = other
self.mayReturnEmpty = other.mayReturnEmpty self.mayReturnEmpty = other.mayReturnEmpty
self.strRepr = None self.strRepr = None
@ -2933,7 +2907,8 @@ class Forward(ParseElementEnhance):
self.saveAsList = self.expr.saveAsList self.saveAsList = self.expr.saveAsList
self.ignoreExprs.extend(self.expr.ignoreExprs) self.ignoreExprs.extend(self.expr.ignoreExprs)
return None return None
__ilshift__ = __lshift__
def leaveWhitespace( self ): def leaveWhitespace( self ):
self.skipWhitespace = False self.skipWhitespace = False
return self return self
@ -2993,7 +2968,7 @@ class Upcase(TokenConverter):
DeprecationWarning,stacklevel=2) DeprecationWarning,stacklevel=2)
def postParse( self, instring, loc, tokenlist ): def postParse( self, instring, loc, tokenlist ):
return list(map( string.upper, tokenlist )) return list(map( str.upper, tokenlist ))
class Combine(TokenConverter): class Combine(TokenConverter):
@ -3029,7 +3004,7 @@ class Combine(TokenConverter):
return retToks return retToks
class Group(TokenConverter): class Group(TokenConverter):
"""Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
def __init__( self, expr ): def __init__( self, expr ):
super(Group,self).__init__( expr ) super(Group,self).__init__( expr )
self.saveAsList = True self.saveAsList = True
@ -3105,8 +3080,7 @@ def traceParseAction(f):
sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
try: try:
ret = f(*paArgs) ret = f(*paArgs)
except Exception: except Exception, exc:
exc = sys.exc_info()[1]
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
raise raise
sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
@ -3124,7 +3098,7 @@ def delimitedList( expr, delim=",", combine=False ):
"""Helper to define a delimited list of expressions - the delimiter defaults to ','. """Helper to define a delimited list of expressions - the delimiter defaults to ','.
By default, the list elements and delimiters can have intervening whitespace, and By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing C{combine=True} in the constructor. comments, but this can be overridden by passing C{combine=True} in the constructor.
If C{combine} is set to True, the matching tokens are returned as a single token If C{combine} is set to C{True}, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned string, with the delimiters included; otherwise, the matching tokens are returned
as a list of tokens, with the delimiters suppressed. as a list of tokens, with the delimiters suppressed.
""" """
@ -3226,7 +3200,7 @@ def _escapeRegexRangeChars(s):
def oneOf( strs, caseless=False, useRegex=True ): def oneOf( strs, caseless=False, useRegex=True ):
"""Helper to quickly define a set of alternative Literals, and makes sure to do """Helper to quickly define a set of alternative Literals, and makes sure to do
longest-first testing when there is a conflict, regardless of the input order, longest-first testing when there is a conflict, regardless of the input order,
but returns a C{MatchFirst} for best performance. but returns a C{L{MatchFirst}} for best performance.
Parameters: Parameters:
- strs - a string of space-delimited literals, or a list of string literals - strs - a string of space-delimited literals, or a list of string literals
@ -3284,7 +3258,7 @@ def oneOf( strs, caseless=False, useRegex=True ):
def dictOf( key, value ): def dictOf( key, value ):
"""Helper to easily and clearly define a dictionary by specifying the respective patterns """Helper to easily and clearly define a dictionary by specifying the respective patterns
for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
in the proper order. The key pattern can include delimiting markers or punctuation, in the proper order. The key pattern can include delimiting markers or punctuation,
as long as they are suppressed, thereby leaving the significant key text. The value as long as they are suppressed, thereby leaving the significant key text. The value
pattern can include named results, so that the C{Dict} results can include named token pattern can include named results, so that the C{Dict} results can include named token
@ -3301,7 +3275,7 @@ def originalTextFor(expr, asString=True):
string containing the original parsed text. string containing the original parsed text.
If the optional C{asString} argument is passed as C{False}, then the return value is a If the optional C{asString} argument is passed as C{False}, then the return value is a
C{ParseResults} containing any results names that were originally matched, and a C{L{ParseResults}} containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if single token containing the original matched text from the input string. So if
the expression passed to C{L{originalTextFor}} contains expressions with defined the expression passed to C{L{originalTextFor}} contains expressions with defined
results names, you must set C{asString} to C{False} if you want to preserve those results names, you must set C{asString} to C{False} if you want to preserve those
@ -3335,7 +3309,7 @@ stringEnd = StringEnd().setName("stringEnd")
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16))) _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
_charRange = Group(_singleChar + Suppress("-") + _singleChar) _charRange = Group(_singleChar + Suppress("-") + _singleChar)
@ -3376,7 +3350,7 @@ def matchOnlyAtCol(n):
def replaceWith(replStr): def replaceWith(replStr):
"""Helper method for common parse actions that simply return a literal value. Especially """Helper method for common parse actions that simply return a literal value. Especially
useful when used with C{transformString()}. useful when used with C{L{transformString<ParserElement.transformString>}()}.
""" """
def _replFunc(*args): def _replFunc(*args):
return [replStr] return [replStr]
@ -3398,7 +3372,7 @@ def downcaseTokens(s,l,t):
return [ tt.lower() for tt in map(_ustr,t) ] return [ tt.lower() for tt in map(_ustr,t) ]
def keepOriginalText(s,startLoc,t): def keepOriginalText(s,startLoc,t):
"""DEPRECATED - use new helper method C{originalTextFor}. """DEPRECATED - use new helper method C{L{originalTextFor}}.
Helper parse action to preserve original parsed text, Helper parse action to preserve original parsed text,
overriding any nested parse actions.""" overriding any nested parse actions."""
try: try:
@ -3464,7 +3438,7 @@ def makeXMLTags(tagStr):
def withAttribute(*args,**attrDict): def withAttribute(*args,**attrDict):
"""Helper to create a validating parse action to be used with start tags created """Helper to create a validating parse action to be used with start tags created
with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
with a required attribute value, to avoid false matches on common tags such as with a required attribute value, to avoid false matches on common tags such as
C{<TD>} or C{<DIV>}. C{<TD>} or C{<DIV>}.
@ -3499,7 +3473,7 @@ opAssoc = _Constants()
opAssoc.LEFT = object() opAssoc.LEFT = object()
opAssoc.RIGHT = object() opAssoc.RIGHT = object()
def operatorPrecedence( baseExpr, opList ): def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
"""Helper method for constructing grammars of expressions made up of """Helper method for constructing grammars of expressions made up of
operators working in a precedence hierarchy. Operators may be unary or operators working in a precedence hierarchy. Operators may be unary or
binary, left- or right-associative. Parse actions can also be attached binary, left- or right-associative. Parse actions can also be attached
@ -3518,13 +3492,15 @@ def operatorPrecedence( baseExpr, opList ):
be 1, 2, or 3) be 1, 2, or 3)
- rightLeftAssoc is the indicator whether the operator is - rightLeftAssoc is the indicator whether the operator is
right or left associative, using the pyparsing-defined right or left associative, using the pyparsing-defined
constants opAssoc.RIGHT and opAssoc.LEFT. constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- parseAction is the parse action to be associated with - parseAction is the parse action to be associated with
expressions matching this operator expression (the expressions matching this operator expression (the
parse action tuple member may be omitted) parse action tuple member may be omitted)
- lpar - expression for matching left-parentheses (default=Suppress('('))
- rpar - expression for matching right-parentheses (default=Suppress(')'))
""" """
ret = Forward() ret = Forward()
lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) lastExpr = baseExpr | ( lpar + ret + rpar )
for i,operDef in enumerate(opList): for i,operDef in enumerate(opList):
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
if arity == 3: if arity == 3:
@ -3569,6 +3545,7 @@ def operatorPrecedence( baseExpr, opList ):
lastExpr = thisExpr lastExpr = thisExpr
ret << lastExpr ret << lastExpr
return ret return ret
operatorPrecedence = infixNotation
dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
@ -3715,8 +3692,7 @@ if __name__ == "__main__":
print ("tokens.columns = " + str(tokens.columns)) print ("tokens.columns = " + str(tokens.columns))
print ("tokens.tables = " + str(tokens.tables)) print ("tokens.tables = " + str(tokens.tables))
print (tokens.asXML("SQL",True)) print (tokens.asXML("SQL",True))
except ParseBaseException: except ParseBaseException, err:
err = sys.exc_info()[1]
print (teststring + "->") print (teststring + "->")
print (err.line) print (err.line)
print (" "*(err.column-1) + "^") print (" "*(err.column-1) + "^")