Update bundled pyparsing

This commit is contained in:
Kovid Goyal 2013-01-02 10:42:49 +05:30
parent 99551f1a6d
commit 2fe223bf97

View File

@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments
"""
__version__ = "1.5.6"
__versionTime__ = "26 June 2011 10:53"
__version__ = "1.5.7"
__versionTime__ = "17 November 2012 16:18"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@ -81,66 +81,51 @@ __all__ = [
'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'originalTextFor',
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation',
]
"""
Detect if we are running version 3.X and make appropriate changes
Robert A. Clark
"""
_PY3K = sys.version_info[0] > 2
if _PY3K:
_MAX_INT = sys.maxsize
basestring = str
unichr = chr
_ustr = str
alphas = string.ascii_lowercase + string.ascii_uppercase
else:
_MAX_INT = sys.maxint
range = xrange
set = lambda s : dict( [(c,0) for c in s] )
alphas = string.lowercase + string.uppercase
_MAX_INT = sys.maxint
range = xrange
set = lambda s : dict( [(c,0) for c in s] )
def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
then < returns the unicode object | encodes it with the default encoding | ... >.
"""
if isinstance(obj,unicode):
return obj
def _ustr(obj):
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
then < returns the unicode object | encodes it with the default encoding | ... >.
"""
if isinstance(obj,unicode):
return obj
try:
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code.
return str(obj)
try:
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
# it won't break any existing code.
return str(obj)
except UnicodeEncodeError:
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
# state that "The return value must be a string object". However, does a
# unicode object (being a subclass of basestring) count as a "string
# object"?
# If so, then return a unicode object:
return unicode(obj)
# Else encode it... but how? There are many choices... :)
# Replace unprintables with escape codes?
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
# Replace unprintables with question marks?
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ...
alphas = string.lowercase + string.uppercase
except UnicodeEncodeError:
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
# state that "The return value must be a string object". However, does a
# unicode object (being a subclass of basestring) count as a "string
# object"?
# If so, then return a unicode object:
return unicode(obj)
# Else encode it... but how? There are many choices... :)
# Replace unprintables with escape codes?
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
# Replace unprintables with question marks?
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
# ...
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
import __builtin__
for fname in "sum len enumerate sorted reversed list tuple set any all".split():
for fname in "sum len sorted reversed list tuple set any all min max".split():
try:
singleArgBuiltins.append(getattr(__builtin__,fname))
except AttributeError:
@ -159,7 +144,8 @@ def _xml_escape(data):
class _Constants(object):
pass
nums = string.digits
alphas = string.ascii_lowercase + string.ascii_uppercase
nums = "0123456789"
hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
_bslash = chr(92)
@ -211,7 +197,7 @@ class ParseBaseException(Exception):
return line_str.strip()
def __dir__(self):
return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split()
"markInputline __str__ __repr__".split()
class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class;
@ -228,8 +214,8 @@ class ParseFatalException(ParseBaseException):
pass
class ParseSyntaxException(ParseFatalException):
"""just like C{ParseFatalException}, but thrown internally when an
C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because
"""just like C{L{ParseFatalException}}, but thrown internally when an
C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found"""
def __init__(self, pe):
super(ParseSyntaxException, self).__init__(
@ -444,16 +430,13 @@ class ParseResults(object):
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
def __str__( self ):
out = "["
sep = ""
out = []
for i in self.__toklist:
if isinstance(i, ParseResults):
out += sep + _ustr(i)
out.append(_ustr(i))
else:
out += sep + repr(i)
sep = ", "
out += "]"
return out
out.append(repr(i))
return '[' + ', '.join(out) + ']'
def _asStringList( self, sep='' ):
out = []
@ -616,7 +599,7 @@ class ParseResults(object):
self.__parent = None
def __dir__(self):
return dir(super(ParseResults,self)) + self.keys()
return dir(super(ParseResults,self)) + list(self.keys())
def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators.
@ -624,7 +607,7 @@ def col (loc,strg):
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
"""
@ -636,7 +619,7 @@ def lineno(loc,strg):
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
"""
@ -666,33 +649,23 @@ def nullDebugAction(*args):
pass
'decorator to trim function calls to match the arity of the target'
if not _PY3K:
def _trim_arity(func, maxargs=2):
limit = [0]
def wrapper(*args):
while 1:
try:
return func(*args[limit[0]:])
except TypeError:
if limit[0] <= maxargs:
limit[0] += 1
continue
raise
return wrapper
else:
def _trim_arity(func, maxargs=2):
limit = maxargs
def wrapper(*args):
#~ nonlocal limit
while 1:
try:
return func(*args[limit:])
except TypeError:
if limit:
limit -= 1
continue
raise
return wrapper
def _trim_arity(func, maxargs=2):
if func in singleArgBuiltins:
return lambda s,l,t: func(t)
limit = [0]
foundArity = [False]
def wrapper(*args):
while 1:
try:
ret = func(*args[limit[0]:])
foundArity[0] = True
return ret
except TypeError:
if limit[0] <= maxargs and not foundArity[0]:
limit[0] += 1
continue
raise
return wrapper
class ParserElement(object):
"""Abstract base level parser element class."""
@ -705,6 +678,13 @@ class ParserElement(object):
ParserElement.DEFAULT_WHITE_CHARS = chars
setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
def inlineLiteralsUsing(cls):
"""
Set class to be used for inclusion of string literals into a parser.
"""
ParserElement.literalStringClass = cls
inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
def __init__( self, savelist=False ):
self.parseAction = list()
self.failAction = None
@ -789,14 +769,14 @@ class ParserElement(object):
C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- s = the original string being parsed (see note below)
- loc = the location of the matching substring
- toks = a list of the matched tokens, packaged as a ParseResults object
- toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
If the functions in fns modify the tokens, they can return them as the return
value from fn, and the modified list of tokens will replace the original.
Otherwise, fn does not need to return any value.
Note: the default parsing behavior is to expand tabs in the input string
before starting the parsing process. See L{I{parseString}<parseString>} for more information
on parsing strings containing <TAB>s, and suggested methods to maintain a
on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
consistent view of the parsed string, the parse location, and line and column
positions within the parsed string.
"""
@ -818,7 +798,7 @@ class ParserElement(object):
- loc = location where expression match was attempted and failed
- expr = the parse expression that failed
- err = the exception thrown
The function returns no value. It may throw C{ParseFatalException}
The function returns no value. It may throw C{L{ParseFatalException}}
if it is desired to stop parsing immediately."""
self.failAction = fn
return self
@ -872,15 +852,12 @@ class ParserElement(object):
loc,tokens = self.parseImpl( instring, preloc, doActions )
except IndexError:
raise ParseException( instring, len(instring), self.errmsg, self )
except ParseBaseException:
except ParseBaseException, err:
#~ print ("Exception raised:", err)
err = None
if self.debugActions[2]:
err = sys.exc_info()[1]
self.debugActions[2]( instring, tokensStart, self, err )
if self.failAction:
if err is None:
err = sys.exc_info()[1]
self.failAction( instring, tokensStart, self, err )
raise
else:
@ -910,10 +887,9 @@ class ParserElement(object):
self.resultsName,
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
modal=self.modalResults )
except ParseBaseException:
except ParseBaseException, err:
#~ print "Exception raised in user parse action:", err
if (self.debugActions[2] ):
err = sys.exc_info()[1]
self.debugActions[2]( instring, tokensStart, self, err )
raise
else:
@ -952,8 +928,7 @@ class ParserElement(object):
value = self._parseNoCache( instring, loc, doActions, callPreParse )
ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
return value
except ParseBaseException:
pe = sys.exc_info()[1]
except ParseBaseException, pe:
ParserElement._exprArgCache[ lookup ] = pe
raise
@ -994,7 +969,7 @@ class ParserElement(object):
If you want the grammar to require that the entire input string be
successfully parsed, then set C{parseAll} to True (equivalent to ending
the grammar with C{StringEnd()}).
the grammar with C{L{StringEnd()}}).
Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
in order to report proper column numbers in parse actions.
@ -1023,12 +998,11 @@ class ParserElement(object):
loc = self.preParse( instring, loc )
se = Empty() + StringEnd()
se._parse( instring, loc )
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
else:
return tokens
@ -1076,16 +1050,15 @@ class ParserElement(object):
loc = nextLoc
else:
loc = preloc+1
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
def transformString( self, instring ):
"""Extension to C{scanString}, to modify matching text with modified tokens that may
"""Extension to C{L{scanString}}, to modify matching text with modified tokens that may
be returned from a parse action. To use C{transformString}, define a grammar and
attach a parse action to it that modifies the returned token list.
Invoking C{transformString()} on a target string will then scan for matches,
@ -1110,33 +1083,31 @@ class ParserElement(object):
out.append(instring[lastE:])
out = [o for o in out if o]
return "".join(map(_ustr,_flatten(out)))
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
def searchString( self, instring, maxMatches=_MAX_INT ):
"""Another extension to C{scanString}, simplifying the access to the tokens found
"""Another extension to C{L{scanString}}, simplifying the access to the tokens found
to match the given parse expression. May be called with optional
C{maxMatches} argument, to clip searching after 'n' matches are found.
"""
try:
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
except ParseBaseException:
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
def __add__(self, other ):
"""Implementation of + operator - returns And"""
"""Implementation of + operator - returns C{L{And}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1144,9 +1115,9 @@ class ParserElement(object):
return And( [ self, other ] )
def __radd__(self, other ):
"""Implementation of + operator when left operand is not a C{ParserElement}"""
"""Implementation of + operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1154,9 +1125,9 @@ class ParserElement(object):
return other + self
def __sub__(self, other):
"""Implementation of - operator, returns C{And} with error stop"""
"""Implementation of - operator, returns C{L{And}} with error stop"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1164,9 +1135,9 @@ class ParserElement(object):
return And( [ self, And._ErrorStop(), other ] )
def __rsub__(self, other ):
"""Implementation of - operator when left operand is not a C{ParserElement}"""
"""Implementation of - operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1179,12 +1150,12 @@ class ParserElement(object):
tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
may also include C{None} as in:
- C{expr*(n,None)} or C{expr*(n,)} is equivalent
to C{expr*n + ZeroOrMore(expr)}
to C{expr*n + L{ZeroOrMore}(expr)}
(read as "at least n instances of C{expr}")
- C{expr*(None,n)} is equivalent to C{expr*(0,n)}
(read as "0 to n instances of C{expr}")
- C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)}
- C{expr*(1,None)} is equivalent to C{OneOrMore(expr)}
- C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
Note that C{expr*(None,n)} does not raise an exception if
more than n exprs exist in the input stream; that is,
@ -1245,9 +1216,9 @@ class ParserElement(object):
return self.__mul__(other)
def __or__(self, other ):
"""Implementation of | operator - returns C{MatchFirst}"""
"""Implementation of | operator - returns C{L{MatchFirst}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1255,9 +1226,9 @@ class ParserElement(object):
return MatchFirst( [ self, other ] )
def __ror__(self, other ):
"""Implementation of | operator when left operand is not a C{ParserElement}"""
"""Implementation of | operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1265,9 +1236,9 @@ class ParserElement(object):
return other | self
def __xor__(self, other ):
"""Implementation of ^ operator - returns C{Or}"""
"""Implementation of ^ operator - returns C{L{Or}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1275,9 +1246,9 @@ class ParserElement(object):
return Or( [ self, other ] )
def __rxor__(self, other ):
"""Implementation of ^ operator when left operand is not a C{ParserElement}"""
"""Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1285,9 +1256,9 @@ class ParserElement(object):
return other ^ self
def __and__(self, other ):
"""Implementation of & operator - returns C{Each}"""
"""Implementation of & operator - returns C{L{Each}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1295,9 +1266,9 @@ class ParserElement(object):
return Each( [ self, other ] )
def __rand__(self, other ):
"""Implementation of & operator when left operand is not a C{ParserElement}"""
"""Implementation of & operator when left operand is not a C{L{ParserElement}}"""
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
if not isinstance( other, ParserElement ):
warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
SyntaxWarning, stacklevel=2)
@ -1305,11 +1276,11 @@ class ParserElement(object):
return other & self
def __invert__( self ):
"""Implementation of ~ operator - returns C{NotAny}"""
"""Implementation of ~ operator - returns C{L{NotAny}}"""
return NotAny( self )
def __call__(self, name):
"""Shortcut for C{setResultsName}, with C{listAllMatches=default}::
"""Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
could be written as::
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
@ -1403,15 +1374,17 @@ class ParserElement(object):
try:
file_contents = file_or_filename.read()
except AttributeError:
f = open(file_or_filename, "rb")
f = open(file_or_filename, "r")
file_contents = f.read()
f.close()
try:
return self.parseString(file_contents, parseAll)
except ParseBaseException:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
exc = sys.exc_info()[1]
raise exc
except ParseBaseException, exc:
if ParserElement.verbose_stacktrace:
raise
else:
# catch and re-raise exception from here, clears out pyparsing internal stack trace
raise exc
def getException(self):
return ParseException("",0,self.errmsg,self)
@ -1515,10 +1488,11 @@ class Literal(Token):
exc.pstr = instring
raise exc
_L = Literal
ParserElement.literalStringClass = Literal
class Keyword(Token):
"""Token to exactly match a specified string as a keyword, that is, it must be
immediately followed by a non-keyword character. Compare with C{Literal}::
immediately followed by a non-keyword character. Compare with C{L{Literal}}::
Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
Accepts two optional constructor arguments in addition to the keyword string:
@ -1821,9 +1795,9 @@ class QuotedString(Token):
- quoteChar - string of one or more characters defining the quote delimiting string
- escChar - character to escape quotes, typically backslash (default=None)
- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
- multiline - boolean indicating whether quotes can span multiple lines (default=False)
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
- multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
"""
super(QuotedString,self).__init__()
@ -2003,7 +1977,7 @@ class White(Token):
by pyparsing grammars. This class is included when some whitespace structures
are significant. Define with a string containing the whitespace characters to be
matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
as defined for the C{Word} class."""
as defined for the C{L{Word}} class."""
whiteStrs = {
" " : "<SPC>",
"\t": "<TAB>",
@ -2331,7 +2305,8 @@ class And(ParseExpression):
class _ErrorStop(Empty):
def __init__(self, *args, **kwargs):
super(Empty,self).__init__(*args, **kwargs)
super(And._ErrorStop,self).__init__(*args, **kwargs)
self.name = '-'
self.leaveWhitespace()
def __init__( self, exprs, savelist = True ):
@ -2359,8 +2334,7 @@ class And(ParseExpression):
loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException:
raise
except ParseBaseException:
pe = sys.exc_info()[1]
except ParseBaseException, pe:
raise ParseSyntaxException(pe)
except IndexError:
raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
@ -2412,8 +2386,7 @@ class Or(ParseExpression):
for e in self.exprs:
try:
loc2 = e.tryParse( instring, loc )
except ParseException:
err = sys.exc_info()[1]
except ParseException, err:
if err.loc > maxExcLoc:
maxException = err
maxExcLoc = err.loc
@ -2436,7 +2409,7 @@ class Or(ParseExpression):
def __ixor__(self, other ):
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
return self.append( other ) #Or( [ self, other ] )
def __str__( self ):
@ -2495,7 +2468,7 @@ class MatchFirst(ParseExpression):
def __ior__(self, other ):
if isinstance( other, basestring ):
other = Literal( other )
other = ParserElement.literalStringClass( other )
return self.append( other ) #MatchFirst( [ self, other ] )
def __str__( self ):
@ -2916,13 +2889,14 @@ class Forward(ParseElementEnhance):
thereby leaving b and c out as parseable alternatives. It is recommended that you
explicitly group the values inserted into the C{Forward}::
fwdExpr << (a | b | c)
Converting to use the '<<=' operator instead will avoid this problem.
"""
def __init__( self, other=None ):
super(Forward,self).__init__( other, savelist=False )
def __lshift__( self, other ):
if isinstance( other, basestring ):
other = Literal(other)
other = ParserElement.literalStringClass(other)
self.expr = other
self.mayReturnEmpty = other.mayReturnEmpty
self.strRepr = None
@ -2933,7 +2907,8 @@ class Forward(ParseElementEnhance):
self.saveAsList = self.expr.saveAsList
self.ignoreExprs.extend(self.expr.ignoreExprs)
return None
__ilshift__ = __lshift__
def leaveWhitespace( self ):
self.skipWhitespace = False
return self
@ -2993,7 +2968,7 @@ class Upcase(TokenConverter):
DeprecationWarning,stacklevel=2)
def postParse( self, instring, loc, tokenlist ):
return list(map( string.upper, tokenlist ))
return list(map( str.upper, tokenlist ))
class Combine(TokenConverter):
@ -3029,7 +3004,7 @@ class Combine(TokenConverter):
return retToks
class Group(TokenConverter):
"""Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
"""Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
def __init__( self, expr ):
super(Group,self).__init__( expr )
self.saveAsList = True
@ -3105,8 +3080,7 @@ def traceParseAction(f):
sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
try:
ret = f(*paArgs)
except Exception:
exc = sys.exc_info()[1]
except Exception, exc:
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
raise
sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
@ -3124,7 +3098,7 @@ def delimitedList( expr, delim=",", combine=False ):
"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
By default, the list elements and delimiters can have intervening whitespace, and
comments, but this can be overridden by passing C{combine=True} in the constructor.
If C{combine} is set to True, the matching tokens are returned as a single token
If C{combine} is set to C{True}, the matching tokens are returned as a single token
string, with the delimiters included; otherwise, the matching tokens are returned
as a list of tokens, with the delimiters suppressed.
"""
@ -3226,7 +3200,7 @@ def _escapeRegexRangeChars(s):
def oneOf( strs, caseless=False, useRegex=True ):
"""Helper to quickly define a set of alternative Literals, and makes sure to do
longest-first testing when there is a conflict, regardless of the input order,
but returns a C{MatchFirst} for best performance.
but returns a C{L{MatchFirst}} for best performance.
Parameters:
- strs - a string of space-delimited literals, or a list of string literals
@ -3284,7 +3258,7 @@ def oneOf( strs, caseless=False, useRegex=True ):
def dictOf( key, value ):
"""Helper to easily and clearly define a dictionary by specifying the respective patterns
for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens
for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
in the proper order. The key pattern can include delimiting markers or punctuation,
as long as they are suppressed, thereby leaving the significant key text. The value
pattern can include named results, so that the C{Dict} results can include named token
@ -3301,7 +3275,7 @@ def originalTextFor(expr, asString=True):
string containing the original parsed text.
If the optional C{asString} argument is passed as C{False}, then the return value is a
C{ParseResults} containing any results names that were originally matched, and a
C{L{ParseResults}} containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if
the expression passed to C{L{originalTextFor}} contains expressions with defined
results names, you must set C{asString} to C{False} if you want to preserve those
@ -3335,7 +3309,7 @@ stringEnd = StringEnd().setName("stringEnd")
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
@ -3376,7 +3350,7 @@ def matchOnlyAtCol(n):
def replaceWith(replStr):
"""Helper method for common parse actions that simply return a literal value. Especially
useful when used with C{transformString()}.
useful when used with C{L{transformString<ParserElement.transformString>}()}.
"""
def _replFunc(*args):
return [replStr]
@ -3398,7 +3372,7 @@ def downcaseTokens(s,l,t):
return [ tt.lower() for tt in map(_ustr,t) ]
def keepOriginalText(s,startLoc,t):
"""DEPRECATED - use new helper method C{originalTextFor}.
"""DEPRECATED - use new helper method C{L{originalTextFor}}.
Helper parse action to preserve original parsed text,
overriding any nested parse actions."""
try:
@ -3464,7 +3438,7 @@ def makeXMLTags(tagStr):
def withAttribute(*args,**attrDict):
"""Helper to create a validating parse action to be used with start tags created
with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
with a required attribute value, to avoid false matches on common tags such as
C{<TD>} or C{<DIV>}.
@ -3499,7 +3473,7 @@ opAssoc = _Constants()
opAssoc.LEFT = object()
opAssoc.RIGHT = object()
def operatorPrecedence( baseExpr, opList ):
def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
"""Helper method for constructing grammars of expressions made up of
operators working in a precedence hierarchy. Operators may be unary or
binary, left- or right-associative. Parse actions can also be attached
@ -3518,13 +3492,15 @@ def operatorPrecedence( baseExpr, opList ):
be 1, 2, or 3)
- rightLeftAssoc is the indicator whether the operator is
right or left associative, using the pyparsing-defined
constants opAssoc.RIGHT and opAssoc.LEFT.
constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- parseAction is the parse action to be associated with
expressions matching this operator expression (the
parse action tuple member may be omitted)
- lpar - expression for matching left-parentheses (default=Suppress('('))
- rpar - expression for matching right-parentheses (default=Suppress(')'))
"""
ret = Forward()
lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
lastExpr = baseExpr | ( lpar + ret + rpar )
for i,operDef in enumerate(opList):
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
if arity == 3:
@ -3569,6 +3545,7 @@ def operatorPrecedence( baseExpr, opList ):
lastExpr = thisExpr
ret << lastExpr
return ret
operatorPrecedence = infixNotation
dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
@ -3715,8 +3692,7 @@ if __name__ == "__main__":
print ("tokens.columns = " + str(tokens.columns))
print ("tokens.tables = " + str(tokens.tables))
print (tokens.asXML("SQL",True))
except ParseBaseException:
err = sys.exc_info()[1]
except ParseBaseException, err:
print (teststring + "->")
print (err.line)
print (" "*(err.column-1) + "^")