mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update pyparsing to 1.5.6
This commit is contained in:
parent
2e994081ed
commit
388968cd88
@ -1,6 +1,6 @@
|
|||||||
# module pyparsing.py
|
# module pyparsing.py
|
||||||
#
|
#
|
||||||
# Copyright (c) 2003-2010 Paul T. McGuire
|
# Copyright (c) 2003-2011 Paul T. McGuire
|
||||||
#
|
#
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||||||
# a copy of this software and associated documentation files (the
|
# a copy of this software and associated documentation files (the
|
||||||
@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when
|
|||||||
- embedded comments
|
- embedded comments
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "1.5.5"
|
__version__ = "1.5.6"
|
||||||
__versionTime__ = "12 Aug 2010 03:56"
|
__versionTime__ = "26 June 2011 10:53"
|
||||||
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
|
||||||
|
|
||||||
import string
|
import string
|
||||||
@ -101,11 +101,12 @@ if _PY3K:
|
|||||||
basestring = str
|
basestring = str
|
||||||
unichr = chr
|
unichr = chr
|
||||||
_ustr = str
|
_ustr = str
|
||||||
_str2dict = set
|
|
||||||
alphas = string.ascii_lowercase + string.ascii_uppercase
|
alphas = string.ascii_lowercase + string.ascii_uppercase
|
||||||
else:
|
else:
|
||||||
_MAX_INT = sys.maxint
|
_MAX_INT = sys.maxint
|
||||||
range = xrange
|
range = xrange
|
||||||
|
set = lambda s : dict( [(c,0) for c in s] )
|
||||||
|
alphas = string.lowercase + string.uppercase
|
||||||
|
|
||||||
def _ustr(obj):
|
def _ustr(obj):
|
||||||
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
|
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
|
||||||
@ -134,9 +135,6 @@ else:
|
|||||||
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
|
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
|
||||||
# ...
|
# ...
|
||||||
|
|
||||||
def _str2dict(strg):
|
|
||||||
return dict( [(c,0) for c in strg] )
|
|
||||||
|
|
||||||
alphas = string.lowercase + string.uppercase
|
alphas = string.lowercase + string.uppercase
|
||||||
|
|
||||||
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
|
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
|
||||||
@ -606,10 +604,10 @@ class ParseResults(object):
|
|||||||
|
|
||||||
def __setstate__(self,state):
|
def __setstate__(self,state):
|
||||||
self.__toklist = state[0]
|
self.__toklist = state[0]
|
||||||
self.__tokdict, \
|
(self.__tokdict,
|
||||||
par, \
|
par,
|
||||||
inAccumNames, \
|
inAccumNames,
|
||||||
self.__name = state[1]
|
self.__name) = state[1]
|
||||||
self.__accumNames = {}
|
self.__accumNames = {}
|
||||||
self.__accumNames.update(inAccumNames)
|
self.__accumNames.update(inAccumNames)
|
||||||
if par is not None:
|
if par is not None:
|
||||||
@ -667,6 +665,35 @@ def nullDebugAction(*args):
|
|||||||
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
|
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
'decorator to trim function calls to match the arity of the target'
|
||||||
|
if not _PY3K:
|
||||||
|
def _trim_arity(func, maxargs=2):
|
||||||
|
limit = [0]
|
||||||
|
def wrapper(*args):
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
return func(*args[limit[0]:])
|
||||||
|
except TypeError:
|
||||||
|
if limit[0] <= maxargs:
|
||||||
|
limit[0] += 1
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
return wrapper
|
||||||
|
else:
|
||||||
|
def _trim_arity(func, maxargs=2):
|
||||||
|
limit = maxargs
|
||||||
|
def wrapper(*args):
|
||||||
|
#~ nonlocal limit
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
return func(*args[limit:])
|
||||||
|
except TypeError:
|
||||||
|
if limit:
|
||||||
|
limit -= 1
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
||||||
class ParserElement(object):
|
class ParserElement(object):
|
||||||
"""Abstract base level parser element class."""
|
"""Abstract base level parser element class."""
|
||||||
DEFAULT_WHITE_CHARS = " \n\t\r"
|
DEFAULT_WHITE_CHARS = " \n\t\r"
|
||||||
@ -731,6 +758,9 @@ class ParserElement(object):
|
|||||||
see L{I{__call__}<__call__>}.
|
see L{I{__call__}<__call__>}.
|
||||||
"""
|
"""
|
||||||
newself = self.copy()
|
newself = self.copy()
|
||||||
|
if name.endswith("*"):
|
||||||
|
name = name[:-1]
|
||||||
|
listAllMatches=True
|
||||||
newself.resultsName = name
|
newself.resultsName = name
|
||||||
newself.modalResults = not listAllMatches
|
newself.modalResults = not listAllMatches
|
||||||
return newself
|
return newself
|
||||||
@ -753,104 +783,6 @@ class ParserElement(object):
|
|||||||
self._parse = self._parse._originalParseMethod
|
self._parse = self._parse._originalParseMethod
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def _normalizeParseActionArgs( f ):
|
|
||||||
"""Internal method used to decorate parse actions that take fewer than 3 arguments,
|
|
||||||
so that all parse actions can be called as C{f(s,l,t)}."""
|
|
||||||
STAR_ARGS = 4
|
|
||||||
|
|
||||||
# special handling for single-argument builtins
|
|
||||||
if (f in singleArgBuiltins):
|
|
||||||
numargs = 1
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
restore = None
|
|
||||||
if isinstance(f,type):
|
|
||||||
restore = f
|
|
||||||
f = f.__init__
|
|
||||||
if not _PY3K:
|
|
||||||
codeObj = f.func_code
|
|
||||||
else:
|
|
||||||
codeObj = f.code
|
|
||||||
if codeObj.co_flags & STAR_ARGS:
|
|
||||||
return f
|
|
||||||
numargs = codeObj.co_argcount
|
|
||||||
if not _PY3K:
|
|
||||||
if hasattr(f,"im_self"):
|
|
||||||
numargs -= 1
|
|
||||||
else:
|
|
||||||
if hasattr(f,"__self__"):
|
|
||||||
numargs -= 1
|
|
||||||
if restore:
|
|
||||||
f = restore
|
|
||||||
except AttributeError:
|
|
||||||
try:
|
|
||||||
if not _PY3K:
|
|
||||||
call_im_func_code = f.__call__.im_func.func_code
|
|
||||||
else:
|
|
||||||
call_im_func_code = f.__code__
|
|
||||||
|
|
||||||
# not a function, must be a callable object, get info from the
|
|
||||||
# im_func binding of its bound __call__ method
|
|
||||||
if call_im_func_code.co_flags & STAR_ARGS:
|
|
||||||
return f
|
|
||||||
numargs = call_im_func_code.co_argcount
|
|
||||||
if not _PY3K:
|
|
||||||
if hasattr(f.__call__,"im_self"):
|
|
||||||
numargs -= 1
|
|
||||||
else:
|
|
||||||
if hasattr(f.__call__,"__self__"):
|
|
||||||
numargs -= 0
|
|
||||||
except AttributeError:
|
|
||||||
if not _PY3K:
|
|
||||||
call_func_code = f.__call__.func_code
|
|
||||||
else:
|
|
||||||
call_func_code = f.__call__.__code__
|
|
||||||
# not a bound method, get info directly from __call__ method
|
|
||||||
if call_func_code.co_flags & STAR_ARGS:
|
|
||||||
return f
|
|
||||||
numargs = call_func_code.co_argcount
|
|
||||||
if not _PY3K:
|
|
||||||
if hasattr(f.__call__,"im_self"):
|
|
||||||
numargs -= 1
|
|
||||||
else:
|
|
||||||
if hasattr(f.__call__,"__self__"):
|
|
||||||
numargs -= 1
|
|
||||||
|
|
||||||
|
|
||||||
#~ print ("adding function %s with %d args" % (f.func_name,numargs))
|
|
||||||
if numargs == 3:
|
|
||||||
return f
|
|
||||||
else:
|
|
||||||
if numargs > 3:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f(f.__call__.__self__, s,l,t)
|
|
||||||
if numargs == 2:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f(l,t)
|
|
||||||
elif numargs == 1:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f(t)
|
|
||||||
else: #~ numargs == 0:
|
|
||||||
def tmp(s,l,t):
|
|
||||||
return f()
|
|
||||||
try:
|
|
||||||
tmp.__name__ = f.__name__
|
|
||||||
except (AttributeError,TypeError):
|
|
||||||
# no need for special handling if attribute doesnt exist
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
tmp.__doc__ = f.__doc__
|
|
||||||
except (AttributeError,TypeError):
|
|
||||||
# no need for special handling if attribute doesnt exist
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
tmp.__dict__.update(f.__dict__)
|
|
||||||
except (AttributeError,TypeError):
|
|
||||||
# no need for special handling if attribute doesnt exist
|
|
||||||
pass
|
|
||||||
return tmp
|
|
||||||
_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
|
|
||||||
|
|
||||||
def setParseAction( self, *fns, **kwargs ):
|
def setParseAction( self, *fns, **kwargs ):
|
||||||
"""Define action to perform when successfully matching parse element definition.
|
"""Define action to perform when successfully matching parse element definition.
|
||||||
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
|
Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
|
||||||
@ -868,13 +800,13 @@ class ParserElement(object):
|
|||||||
consistent view of the parsed string, the parse location, and line and column
|
consistent view of the parsed string, the parse location, and line and column
|
||||||
positions within the parsed string.
|
positions within the parsed string.
|
||||||
"""
|
"""
|
||||||
self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
|
self.parseAction = list(map(_trim_arity, list(fns)))
|
||||||
self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def addParseAction( self, *fns, **kwargs ):
|
def addParseAction( self, *fns, **kwargs ):
|
||||||
"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
|
"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
|
||||||
self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
|
self.parseAction += list(map(_trim_arity, list(fns)))
|
||||||
self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -1012,9 +944,9 @@ class ParserElement(object):
|
|||||||
lookup = (self,instring,loc,callPreParse,doActions)
|
lookup = (self,instring,loc,callPreParse,doActions)
|
||||||
if lookup in ParserElement._exprArgCache:
|
if lookup in ParserElement._exprArgCache:
|
||||||
value = ParserElement._exprArgCache[ lookup ]
|
value = ParserElement._exprArgCache[ lookup ]
|
||||||
if isinstance(value,Exception):
|
if isinstance(value, Exception):
|
||||||
raise value
|
raise value
|
||||||
return value
|
return (value[0],value[1].copy())
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
value = self._parseNoCache( instring, loc, doActions, callPreParse )
|
value = self._parseNoCache( instring, loc, doActions, callPreParse )
|
||||||
@ -1088,8 +1020,8 @@ class ParserElement(object):
|
|||||||
try:
|
try:
|
||||||
loc, tokens = self._parse( instring, 0 )
|
loc, tokens = self._parse( instring, 0 )
|
||||||
if parseAll:
|
if parseAll:
|
||||||
#loc = self.preParse( instring, loc )
|
loc = self.preParse( instring, loc )
|
||||||
se = StringEnd()
|
se = Empty() + StringEnd()
|
||||||
se._parse( instring, loc )
|
se._parse( instring, loc )
|
||||||
except ParseBaseException:
|
except ParseBaseException:
|
||||||
if ParserElement.verbose_stacktrace:
|
if ParserElement.verbose_stacktrace:
|
||||||
@ -1101,10 +1033,11 @@ class ParserElement(object):
|
|||||||
else:
|
else:
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
def scanString( self, instring, maxMatches=_MAX_INT ):
|
def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
|
||||||
"""Scan the input string for expression matches. Each match will return the
|
"""Scan the input string for expression matches. Each match will return the
|
||||||
matching tokens, start location, and end location. May be called with optional
|
matching tokens, start location, and end location. May be called with optional
|
||||||
C{maxMatches} argument, to clip scanning after 'n' matches are found.
|
C{maxMatches} argument, to clip scanning after 'n' matches are found. If
|
||||||
|
C{overlap} is specified, then overlapping matches will be reported.
|
||||||
|
|
||||||
Note that the start and end locations are reported relative to the string
|
Note that the start and end locations are reported relative to the string
|
||||||
being parsed. See L{I{parseString}<parseString>} for more information on parsing
|
being parsed. See L{I{parseString}<parseString>} for more information on parsing
|
||||||
@ -1133,7 +1066,14 @@ class ParserElement(object):
|
|||||||
if nextLoc > loc:
|
if nextLoc > loc:
|
||||||
matches += 1
|
matches += 1
|
||||||
yield tokens, preloc, nextLoc
|
yield tokens, preloc, nextLoc
|
||||||
loc = nextLoc
|
if overlap:
|
||||||
|
nextloc = preparseFn( instring, loc )
|
||||||
|
if nextloc > loc:
|
||||||
|
loc = nextLoc
|
||||||
|
else:
|
||||||
|
loc += 1
|
||||||
|
else:
|
||||||
|
loc = nextLoc
|
||||||
else:
|
else:
|
||||||
loc = preloc+1
|
loc = preloc+1
|
||||||
except ParseBaseException:
|
except ParseBaseException:
|
||||||
@ -1168,6 +1108,7 @@ class ParserElement(object):
|
|||||||
out.append(t)
|
out.append(t)
|
||||||
lastE = e
|
lastE = e
|
||||||
out.append(instring[lastE:])
|
out.append(instring[lastE:])
|
||||||
|
out = [o for o in out if o]
|
||||||
return "".join(map(_ustr,_flatten(out)))
|
return "".join(map(_ustr,_flatten(out)))
|
||||||
except ParseBaseException:
|
except ParseBaseException:
|
||||||
if ParserElement.verbose_stacktrace:
|
if ParserElement.verbose_stacktrace:
|
||||||
@ -1372,6 +1313,9 @@ class ParserElement(object):
|
|||||||
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
|
userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
|
||||||
could be written as::
|
could be written as::
|
||||||
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
|
||||||
|
|
||||||
|
If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
|
||||||
|
passed as C{True}.
|
||||||
"""
|
"""
|
||||||
return self.setResultsName(name)
|
return self.setResultsName(name)
|
||||||
|
|
||||||
@ -1398,9 +1342,9 @@ class ParserElement(object):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def parseWithTabs( self ):
|
def parseWithTabs( self ):
|
||||||
"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
|
"""Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
|
||||||
Must be called before C{parseString} when the input grammar contains elements that
|
Must be called before C{parseString} when the input grammar contains elements that
|
||||||
match <TAB> characters."""
|
match C{<TAB>} characters."""
|
||||||
self.keepTabs = True
|
self.keepTabs = True
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -1508,12 +1452,10 @@ class Token(ParserElement):
|
|||||||
"""Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
|
"""Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
|
||||||
def __init__( self ):
|
def __init__( self ):
|
||||||
super(Token,self).__init__( savelist=False )
|
super(Token,self).__init__( savelist=False )
|
||||||
#self.myException = ParseException("",0,"",self)
|
|
||||||
|
|
||||||
def setName(self, name):
|
def setName(self, name):
|
||||||
s = super(Token,self).setName(name)
|
s = super(Token,self).setName(name)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#s.myException.msg = self.errmsg
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
@ -1534,7 +1476,6 @@ class NoMatch(Token):
|
|||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.errmsg = "Unmatchable token"
|
self.errmsg = "Unmatchable token"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
exc = self.myException
|
exc = self.myException
|
||||||
@ -1558,7 +1499,6 @@ class Literal(Token):
|
|||||||
self.name = '"%s"' % _ustr(self.match)
|
self.name = '"%s"' % _ustr(self.match)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
self.mayReturnEmpty = False
|
self.mayReturnEmpty = False
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
|
|
||||||
# Performance tuning: this routine gets called a *lot*
|
# Performance tuning: this routine gets called a *lot*
|
||||||
@ -1579,12 +1519,12 @@ _L = Literal
|
|||||||
class Keyword(Token):
|
class Keyword(Token):
|
||||||
"""Token to exactly match a specified string as a keyword, that is, it must be
|
"""Token to exactly match a specified string as a keyword, that is, it must be
|
||||||
immediately followed by a non-keyword character. Compare with C{Literal}::
|
immediately followed by a non-keyword character. Compare with C{Literal}::
|
||||||
Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
|
Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
|
||||||
Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
|
Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
|
||||||
Accepts two optional constructor arguments in addition to the keyword string:
|
Accepts two optional constructor arguments in addition to the keyword string:
|
||||||
C{identChars} is a string of characters that would be valid identifier characters,
|
C{identChars} is a string of characters that would be valid identifier characters,
|
||||||
defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
|
defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
|
||||||
matching, default is False.
|
matching, default is C{False}.
|
||||||
"""
|
"""
|
||||||
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
|
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
|
||||||
|
|
||||||
@ -1600,13 +1540,12 @@ class Keyword(Token):
|
|||||||
self.name = '"%s"' % self.match
|
self.name = '"%s"' % self.match
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
self.mayReturnEmpty = False
|
self.mayReturnEmpty = False
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.caseless = caseless
|
self.caseless = caseless
|
||||||
if caseless:
|
if caseless:
|
||||||
self.caselessmatch = matchString.upper()
|
self.caselessmatch = matchString.upper()
|
||||||
identChars = identChars.upper()
|
identChars = identChars.upper()
|
||||||
self.identChars = _str2dict(identChars)
|
self.identChars = set(identChars)
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if self.caseless:
|
if self.caseless:
|
||||||
@ -1648,7 +1587,6 @@ class CaselessLiteral(Literal):
|
|||||||
self.returnString = matchString
|
self.returnString = matchString
|
||||||
self.name = "'%s'" % self.returnString
|
self.name = "'%s'" % self.returnString
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if instring[ loc:loc+self.matchLen ].upper() == self.match:
|
if instring[ loc:loc+self.matchLen ].upper() == self.match:
|
||||||
@ -1680,18 +1618,25 @@ class Word(Token):
|
|||||||
defaults to the initial character set), and an optional minimum,
|
defaults to the initial character set), and an optional minimum,
|
||||||
maximum, and/or exact length. The default value for C{min} is 1 (a
|
maximum, and/or exact length. The default value for C{min} is 1 (a
|
||||||
minimum value < 1 is not valid); the default values for C{max} and C{exact}
|
minimum value < 1 is not valid); the default values for C{max} and C{exact}
|
||||||
are 0, meaning no maximum or exact length restriction.
|
are 0, meaning no maximum or exact length restriction. An optional
|
||||||
|
C{exclude} parameter can list characters that might be found in
|
||||||
|
the input C{bodyChars} string; useful to define a word of all printables
|
||||||
|
except for one or two characters, for instance.
|
||||||
"""
|
"""
|
||||||
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
|
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
|
||||||
super(Word,self).__init__()
|
super(Word,self).__init__()
|
||||||
|
if excludeChars:
|
||||||
|
initChars = ''.join([c for c in initChars if c not in excludeChars])
|
||||||
|
if bodyChars:
|
||||||
|
bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
|
||||||
self.initCharsOrig = initChars
|
self.initCharsOrig = initChars
|
||||||
self.initChars = _str2dict(initChars)
|
self.initChars = set(initChars)
|
||||||
if bodyChars :
|
if bodyChars :
|
||||||
self.bodyCharsOrig = bodyChars
|
self.bodyCharsOrig = bodyChars
|
||||||
self.bodyChars = _str2dict(bodyChars)
|
self.bodyChars = set(bodyChars)
|
||||||
else:
|
else:
|
||||||
self.bodyCharsOrig = initChars
|
self.bodyCharsOrig = initChars
|
||||||
self.bodyChars = _str2dict(initChars)
|
self.bodyChars = set(initChars)
|
||||||
|
|
||||||
self.maxSpecified = max > 0
|
self.maxSpecified = max > 0
|
||||||
|
|
||||||
@ -1711,7 +1656,6 @@ class Word(Token):
|
|||||||
|
|
||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.asKeyword = asKeyword
|
self.asKeyword = asKeyword
|
||||||
|
|
||||||
@ -1743,7 +1687,7 @@ class Word(Token):
|
|||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
loc = result.end()
|
loc = result.end()
|
||||||
return loc,result.group()
|
return loc, result.group()
|
||||||
|
|
||||||
if not(instring[ loc ] in self.initChars):
|
if not(instring[ loc ] in self.initChars):
|
||||||
#~ raise ParseException( instring, loc, self.errmsg )
|
#~ raise ParseException( instring, loc, self.errmsg )
|
||||||
@ -1807,24 +1751,24 @@ class Regex(Token):
|
|||||||
"""
|
"""
|
||||||
compiledREtype = type(re.compile("[A-Z]"))
|
compiledREtype = type(re.compile("[A-Z]"))
|
||||||
def __init__( self, pattern, flags=0):
|
def __init__( self, pattern, flags=0):
|
||||||
"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
|
"""The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
|
||||||
super(Regex,self).__init__()
|
super(Regex,self).__init__()
|
||||||
|
|
||||||
if isinstance(pattern, basestring):
|
if isinstance(pattern, basestring):
|
||||||
if len(pattern) == 0:
|
if len(pattern) == 0:
|
||||||
warnings.warn("null string passed to Regex; use Empty() instead",
|
warnings.warn("null string passed to Regex; use Empty() instead",
|
||||||
SyntaxWarning, stacklevel=2)
|
SyntaxWarning, stacklevel=2)
|
||||||
|
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.flags = flags
|
self.flags = flags
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.re = re.compile(self.pattern, self.flags)
|
self.re = re.compile(self.pattern, self.flags)
|
||||||
self.reString = self.pattern
|
self.reString = self.pattern
|
||||||
except sre_constants.error:
|
except sre_constants.error:
|
||||||
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
|
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
|
||||||
SyntaxWarning, stacklevel=2)
|
SyntaxWarning, stacklevel=2)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
elif isinstance(pattern, Regex.compiledREtype):
|
elif isinstance(pattern, Regex.compiledREtype):
|
||||||
self.re = pattern
|
self.re = pattern
|
||||||
@ -1837,7 +1781,6 @@ class Regex(Token):
|
|||||||
|
|
||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
|
|
||||||
@ -1929,7 +1872,8 @@ class QuotedString(Token):
|
|||||||
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
|
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
|
||||||
if escChar:
|
if escChar:
|
||||||
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
|
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
|
||||||
self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
|
charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
|
||||||
|
self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
|
||||||
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
|
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -1942,7 +1886,6 @@ class QuotedString(Token):
|
|||||||
|
|
||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
|
|
||||||
@ -2014,7 +1957,6 @@ class CharsNotIn(Token):
|
|||||||
self.name = _ustr(self)
|
self.name = _ustr(self)
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
self.mayReturnEmpty = ( self.minLen == 0 )
|
self.mayReturnEmpty = ( self.minLen == 0 )
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
self.mayIndexError = False
|
self.mayIndexError = False
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
@ -2077,7 +2019,6 @@ class White(Token):
|
|||||||
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
|
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
self.errmsg = "Expected " + self.name
|
self.errmsg = "Expected " + self.name
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
self.minLen = min
|
self.minLen = min
|
||||||
|
|
||||||
@ -2150,7 +2091,6 @@ class LineStart(_PositionToken):
|
|||||||
super(LineStart,self).__init__()
|
super(LineStart,self).__init__()
|
||||||
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
||||||
self.errmsg = "Expected start of line"
|
self.errmsg = "Expected start of line"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def preParse( self, instring, loc ):
|
def preParse( self, instring, loc ):
|
||||||
preloc = super(LineStart,self).preParse(instring,loc)
|
preloc = super(LineStart,self).preParse(instring,loc)
|
||||||
@ -2175,7 +2115,6 @@ class LineEnd(_PositionToken):
|
|||||||
super(LineEnd,self).__init__()
|
super(LineEnd,self).__init__()
|
||||||
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
|
||||||
self.errmsg = "Expected end of line"
|
self.errmsg = "Expected end of line"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if loc<len(instring):
|
if loc<len(instring):
|
||||||
@ -2200,7 +2139,6 @@ class StringStart(_PositionToken):
|
|||||||
def __init__( self ):
|
def __init__( self ):
|
||||||
super(StringStart,self).__init__()
|
super(StringStart,self).__init__()
|
||||||
self.errmsg = "Expected start of text"
|
self.errmsg = "Expected start of text"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if loc != 0:
|
if loc != 0:
|
||||||
@ -2218,7 +2156,6 @@ class StringEnd(_PositionToken):
|
|||||||
def __init__( self ):
|
def __init__( self ):
|
||||||
super(StringEnd,self).__init__()
|
super(StringEnd,self).__init__()
|
||||||
self.errmsg = "Expected end of text"
|
self.errmsg = "Expected end of text"
|
||||||
#self.myException.msg = self.errmsg
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
if loc < len(instring):
|
if loc < len(instring):
|
||||||
@ -2239,14 +2176,14 @@ class StringEnd(_PositionToken):
|
|||||||
|
|
||||||
class WordStart(_PositionToken):
|
class WordStart(_PositionToken):
|
||||||
"""Matches if the current position is at the beginning of a Word, and
|
"""Matches if the current position is at the beginning of a Word, and
|
||||||
is not preceded by any character in a given set of wordChars
|
is not preceded by any character in a given set of C{wordChars}
|
||||||
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
||||||
use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
|
use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
|
||||||
the string being parsed, or at the beginning of a line.
|
the string being parsed, or at the beginning of a line.
|
||||||
"""
|
"""
|
||||||
def __init__(self, wordChars = printables):
|
def __init__(self, wordChars = printables):
|
||||||
super(WordStart,self).__init__()
|
super(WordStart,self).__init__()
|
||||||
self.wordChars = _str2dict(wordChars)
|
self.wordChars = set(wordChars)
|
||||||
self.errmsg = "Not at the start of a word"
|
self.errmsg = "Not at the start of a word"
|
||||||
|
|
||||||
def parseImpl(self, instring, loc, doActions=True ):
|
def parseImpl(self, instring, loc, doActions=True ):
|
||||||
@ -2261,14 +2198,14 @@ class WordStart(_PositionToken):
|
|||||||
|
|
||||||
class WordEnd(_PositionToken):
|
class WordEnd(_PositionToken):
|
||||||
"""Matches if the current position is at the end of a Word, and
|
"""Matches if the current position is at the end of a Word, and
|
||||||
is not followed by any character in a given set of wordChars
|
is not followed by any character in a given set of C{wordChars}
|
||||||
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
(default=C{printables}). To emulate the C{\b} behavior of regular expressions,
|
||||||
use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
|
use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
|
||||||
the string being parsed, or at the end of a line.
|
the string being parsed, or at the end of a line.
|
||||||
"""
|
"""
|
||||||
def __init__(self, wordChars = printables):
|
def __init__(self, wordChars = printables):
|
||||||
super(WordEnd,self).__init__()
|
super(WordEnd,self).__init__()
|
||||||
self.wordChars = _str2dict(wordChars)
|
self.wordChars = set(wordChars)
|
||||||
self.skipWhitespace = False
|
self.skipWhitespace = False
|
||||||
self.errmsg = "Not at the end of a word"
|
self.errmsg = "Not at the end of a word"
|
||||||
|
|
||||||
@ -2309,7 +2246,7 @@ class ParseExpression(ParserElement):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def leaveWhitespace( self ):
|
def leaveWhitespace( self ):
|
||||||
"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
|
"""Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
|
||||||
all contained expressions."""
|
all contained expressions."""
|
||||||
self.skipWhitespace = False
|
self.skipWhitespace = False
|
||||||
self.exprs = [ e.copy() for e in self.exprs ]
|
self.exprs = [ e.copy() for e in self.exprs ]
|
||||||
@ -2380,11 +2317,16 @@ class ParseExpression(ParserElement):
|
|||||||
for e in self.exprs:
|
for e in self.exprs:
|
||||||
e.validate(tmp)
|
e.validate(tmp)
|
||||||
self.checkRecursion( [] )
|
self.checkRecursion( [] )
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
ret = super(ParseExpression,self).copy()
|
||||||
|
ret.exprs = [e.copy() for e in self.exprs]
|
||||||
|
return ret
|
||||||
|
|
||||||
class And(ParseExpression):
|
class And(ParseExpression):
|
||||||
"""Requires all given C{ParseExpressions} to be found in the given order.
|
"""Requires all given C{ParseExpression}s to be found in the given order.
|
||||||
Expressions may be separated by whitespace.
|
Expressions may be separated by whitespace.
|
||||||
May be constructed using the '+' operator.
|
May be constructed using the C{'+'} operator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class _ErrorStop(Empty):
|
class _ErrorStop(Empty):
|
||||||
@ -2453,7 +2395,7 @@ class And(ParseExpression):
|
|||||||
class Or(ParseExpression):
|
class Or(ParseExpression):
|
||||||
"""Requires that at least one C{ParseExpression} is found.
|
"""Requires that at least one C{ParseExpression} is found.
|
||||||
If two expressions match, the expression that matches the longest string will be used.
|
If two expressions match, the expression that matches the longest string will be used.
|
||||||
May be constructed using the '^' operator.
|
May be constructed using the C{'^'} operator.
|
||||||
"""
|
"""
|
||||||
def __init__( self, exprs, savelist = False ):
|
def __init__( self, exprs, savelist = False ):
|
||||||
super(Or,self).__init__(exprs, savelist)
|
super(Or,self).__init__(exprs, savelist)
|
||||||
@ -2515,7 +2457,7 @@ class Or(ParseExpression):
|
|||||||
class MatchFirst(ParseExpression):
|
class MatchFirst(ParseExpression):
|
||||||
"""Requires that at least one C{ParseExpression} is found.
|
"""Requires that at least one C{ParseExpression} is found.
|
||||||
If two expressions match, the first one listed is the one that will match.
|
If two expressions match, the first one listed is the one that will match.
|
||||||
May be constructed using the '|' operator.
|
May be constructed using the C{'|'} operator.
|
||||||
"""
|
"""
|
||||||
def __init__( self, exprs, savelist = False ):
|
def __init__( self, exprs, savelist = False ):
|
||||||
super(MatchFirst,self).__init__(exprs, savelist)
|
super(MatchFirst,self).__init__(exprs, savelist)
|
||||||
@ -2572,9 +2514,9 @@ class MatchFirst(ParseExpression):
|
|||||||
|
|
||||||
|
|
||||||
class Each(ParseExpression):
|
class Each(ParseExpression):
|
||||||
"""Requires all given C{ParseExpressions} to be found, but in any order.
|
"""Requires all given C{ParseExpression}s to be found, but in any order.
|
||||||
Expressions may be separated by whitespace.
|
Expressions may be separated by whitespace.
|
||||||
May be constructed using the '&' operator.
|
May be constructed using the C{'&'} operator.
|
||||||
"""
|
"""
|
||||||
def __init__( self, exprs, savelist = True ):
|
def __init__( self, exprs, savelist = True ):
|
||||||
super(Each,self).__init__(exprs, savelist)
|
super(Each,self).__init__(exprs, savelist)
|
||||||
@ -2757,7 +2699,6 @@ class NotAny(ParseElementEnhance):
|
|||||||
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
|
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
|
||||||
self.mayReturnEmpty = True
|
self.mayReturnEmpty = True
|
||||||
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
|
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
|
||||||
#self.myException = ParseException("",0,self.errmsg,self)
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
try:
|
try:
|
||||||
@ -2916,7 +2857,6 @@ class SkipTo(ParseElementEnhance):
|
|||||||
else:
|
else:
|
||||||
self.failOn = failOn
|
self.failOn = failOn
|
||||||
self.errmsg = "No match found for "+_ustr(self.expr)
|
self.errmsg = "No match found for "+_ustr(self.expr)
|
||||||
#self.myException = ParseException("",0,self.errmsg,self)
|
|
||||||
|
|
||||||
def parseImpl( self, instring, loc, doActions=True ):
|
def parseImpl( self, instring, loc, doActions=True ):
|
||||||
startLoc = loc
|
startLoc = loc
|
||||||
@ -3040,7 +2980,7 @@ class _ForwardNoRecurse(Forward):
|
|||||||
return "..."
|
return "..."
|
||||||
|
|
||||||
class TokenConverter(ParseElementEnhance):
|
class TokenConverter(ParseElementEnhance):
|
||||||
"""Abstract subclass of ParseExpression, for converting parsed results."""
|
"""Abstract subclass of C{ParseExpression}, for converting parsed results."""
|
||||||
def __init__( self, expr, savelist=False ):
|
def __init__( self, expr, savelist=False ):
|
||||||
super(TokenConverter,self).__init__( expr )#, savelist )
|
super(TokenConverter,self).__init__( expr )#, savelist )
|
||||||
self.saveAsList = False
|
self.saveAsList = False
|
||||||
@ -3089,7 +3029,7 @@ class Combine(TokenConverter):
|
|||||||
return retToks
|
return retToks
|
||||||
|
|
||||||
class Group(TokenConverter):
|
class Group(TokenConverter):
|
||||||
"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
|
"""Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions."""
|
||||||
def __init__( self, expr ):
|
def __init__( self, expr ):
|
||||||
super(Group,self).__init__( expr )
|
super(Group,self).__init__( expr )
|
||||||
self.saveAsList = True
|
self.saveAsList = True
|
||||||
@ -3143,7 +3083,7 @@ class Suppress(TokenConverter):
|
|||||||
class OnlyOnce(object):
|
class OnlyOnce(object):
|
||||||
"""Wrapper for parse actions, to ensure they are only called once."""
|
"""Wrapper for parse actions, to ensure they are only called once."""
|
||||||
def __init__(self, methodCall):
|
def __init__(self, methodCall):
|
||||||
self.callable = ParserElement._normalizeParseActionArgs(methodCall)
|
self.callable = _trim_arity(methodCall)
|
||||||
self.called = False
|
self.called = False
|
||||||
def __call__(self,s,l,t):
|
def __call__(self,s,l,t):
|
||||||
if not self.called:
|
if not self.called:
|
||||||
@ -3156,7 +3096,7 @@ class OnlyOnce(object):
|
|||||||
|
|
||||||
def traceParseAction(f):
|
def traceParseAction(f):
|
||||||
"""Decorator for debugging parse actions."""
|
"""Decorator for debugging parse actions."""
|
||||||
f = ParserElement._normalizeParseActionArgs(f)
|
f = _trim_arity(f)
|
||||||
def z(*paArgs):
|
def z(*paArgs):
|
||||||
thisFunc = f.func_name
|
thisFunc = f.func_name
|
||||||
s,l,t = paArgs[-3:]
|
s,l,t = paArgs[-3:]
|
||||||
@ -3194,7 +3134,7 @@ def delimitedList( expr, delim=",", combine=False ):
|
|||||||
else:
|
else:
|
||||||
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
|
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
|
||||||
|
|
||||||
def countedArray( expr ):
|
def countedArray( expr, intExpr=None ):
|
||||||
"""Helper to define a counted list of expressions.
|
"""Helper to define a counted list of expressions.
|
||||||
This helper defines a pattern of the form::
|
This helper defines a pattern of the form::
|
||||||
integer expr expr expr...
|
integer expr expr expr...
|
||||||
@ -3203,15 +3143,25 @@ def countedArray( expr ):
|
|||||||
"""
|
"""
|
||||||
arrayExpr = Forward()
|
arrayExpr = Forward()
|
||||||
def countFieldParseAction(s,l,t):
|
def countFieldParseAction(s,l,t):
|
||||||
n = int(t[0])
|
n = t[0]
|
||||||
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
|
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
|
||||||
return []
|
return []
|
||||||
return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
|
if intExpr is None:
|
||||||
|
intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
|
||||||
|
else:
|
||||||
|
intExpr = intExpr.copy()
|
||||||
|
intExpr.setName("arrayLen")
|
||||||
|
intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
|
||||||
|
return ( intExpr + arrayExpr )
|
||||||
|
|
||||||
def _flatten(L):
|
def _flatten(L):
|
||||||
if type(L) is not list: return [L]
|
ret = []
|
||||||
if L == []: return L
|
for i in L:
|
||||||
return _flatten(L[0]) + _flatten(L[1:])
|
if isinstance(i,list):
|
||||||
|
ret.extend(_flatten(i))
|
||||||
|
else:
|
||||||
|
ret.append(i)
|
||||||
|
return ret
|
||||||
|
|
||||||
def matchPreviousLiteral(expr):
|
def matchPreviousLiteral(expr):
|
||||||
"""Helper to define an expression that is indirectly defined from
|
"""Helper to define an expression that is indirectly defined from
|
||||||
@ -3346,15 +3296,15 @@ def originalTextFor(expr, asString=True):
|
|||||||
"""Helper to return the original, untokenized text for a given expression. Useful to
|
"""Helper to return the original, untokenized text for a given expression. Useful to
|
||||||
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
|
||||||
revert separate tokens with intervening whitespace back to the original matching
|
revert separate tokens with intervening whitespace back to the original matching
|
||||||
input text. Simpler to use than the parse action C{keepOriginalText}, and does not
|
input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
|
||||||
require the inspect module to chase up the call stack. By default, returns a
|
require the inspect module to chase up the call stack. By default, returns a
|
||||||
string containing the original parsed text.
|
string containing the original parsed text.
|
||||||
|
|
||||||
If the optional C{asString} argument is passed as False, then the return value is a
|
If the optional C{asString} argument is passed as C{False}, then the return value is a
|
||||||
C{ParseResults} containing any results names that were originally matched, and a
|
C{ParseResults} containing any results names that were originally matched, and a
|
||||||
single token containing the original matched text from the input string. So if
|
single token containing the original matched text from the input string. So if
|
||||||
the expression passed to C{originalTextFor} contains expressions with defined
|
the expression passed to C{L{originalTextFor}} contains expressions with defined
|
||||||
results names, you must set C{asString} to False if you want to preserve those
|
results names, you must set C{asString} to C{False} if you want to preserve those
|
||||||
results name values."""
|
results name values."""
|
||||||
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
|
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
|
||||||
endlocMarker = locMarker.copy()
|
endlocMarker = locMarker.copy()
|
||||||
@ -3370,7 +3320,12 @@ def originalTextFor(expr, asString=True):
|
|||||||
del t["_original_end"]
|
del t["_original_end"]
|
||||||
matchExpr.setParseAction(extractText)
|
matchExpr.setParseAction(extractText)
|
||||||
return matchExpr
|
return matchExpr
|
||||||
|
|
||||||
|
def ungroup(expr):
|
||||||
|
"""Helper to undo pyparsing's default grouping of And expressions, even
|
||||||
|
if all but one are non-empty."""
|
||||||
|
return TokenConverter(expr).setParseAction(lambda t:t[0])
|
||||||
|
|
||||||
# convenience constants for positional expressions
|
# convenience constants for positional expressions
|
||||||
empty = Empty().setName("empty")
|
empty = Empty().setName("empty")
|
||||||
lineStart = LineStart().setName("lineStart")
|
lineStart = LineStart().setName("lineStart")
|
||||||
@ -3380,8 +3335,8 @@ stringEnd = StringEnd().setName("stringEnd")
|
|||||||
|
|
||||||
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
|
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
|
||||||
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
|
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
|
||||||
_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
|
_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],16)))
|
||||||
_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
|
_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
|
||||||
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
|
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
|
||||||
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
|
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
|
||||||
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
|
_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
|
||||||
@ -3399,7 +3354,8 @@ def srange(s):
|
|||||||
The values enclosed in the []'s may be::
|
The values enclosed in the []'s may be::
|
||||||
a single character
|
a single character
|
||||||
an escaped character with a leading backslash (such as \- or \])
|
an escaped character with a leading backslash (such as \- or \])
|
||||||
an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
|
an escaped hex character with a leading '\x' (\x21, which is a '!' character)
|
||||||
|
(\0x## is also supported for backwards compatibility)
|
||||||
an escaped octal character with a leading '\0' (\041, which is a '!' character)
|
an escaped octal character with a leading '\0' (\041, which is a '!' character)
|
||||||
a range of any of the above, separated by a dash ('a-z', etc.)
|
a range of any of the above, separated by a dash ('a-z', etc.)
|
||||||
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
|
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
|
||||||
@ -3486,7 +3442,7 @@ def _makeTags(tagStr, xml):
|
|||||||
else:
|
else:
|
||||||
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
|
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
|
||||||
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
|
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
|
||||||
openTag = Suppress("<") + tagStr + \
|
openTag = Suppress("<") + tagStr("tag") + \
|
||||||
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
|
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
|
||||||
Optional( Suppress("=") + tagAttrValue ) ))) + \
|
Optional( Suppress("=") + tagAttrValue ) ))) + \
|
||||||
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
|
||||||
@ -3508,19 +3464,21 @@ def makeXMLTags(tagStr):
|
|||||||
|
|
||||||
def withAttribute(*args,**attrDict):
|
def withAttribute(*args,**attrDict):
|
||||||
"""Helper to create a validating parse action to be used with start tags created
|
"""Helper to create a validating parse action to be used with start tags created
|
||||||
with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
|
with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag
|
||||||
with a required attribute value, to avoid false matches on common tags such as
|
with a required attribute value, to avoid false matches on common tags such as
|
||||||
<TD> or <DIV>.
|
C{<TD>} or C{<DIV>}.
|
||||||
|
|
||||||
Call withAttribute with a series of attribute names and values. Specify the list
|
Call C{withAttribute} with a series of attribute names and values. Specify the list
|
||||||
of filter attributes names and values as:
|
of filter attributes names and values as:
|
||||||
- keyword arguments, as in (class="Customer",align="right"), or
|
- keyword arguments, as in C{(align="right")}, or
|
||||||
|
- as an explicit dict with C{**} operator, when an attribute name is also a Python
|
||||||
|
reserved word, as in C{**{"class":"Customer", "align":"right"}}
|
||||||
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
|
- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
|
||||||
For attribute names with a namespace prefix, you must use the second form. Attribute
|
For attribute names with a namespace prefix, you must use the second form. Attribute
|
||||||
names are matched insensitive to upper/lower case.
|
names are matched insensitive to upper/lower case.
|
||||||
|
|
||||||
To verify that the attribute exists, but without specifying a value, pass
|
To verify that the attribute exists, but without specifying a value, pass
|
||||||
withAttribute.ANY_VALUE as the value.
|
C{withAttribute.ANY_VALUE} as the value.
|
||||||
"""
|
"""
|
||||||
if args:
|
if args:
|
||||||
attrs = args[:]
|
attrs = args[:]
|
||||||
@ -3631,12 +3589,12 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop
|
|||||||
expression will capture all whitespace-delimited content between delimiters
|
expression will capture all whitespace-delimited content between delimiters
|
||||||
as a list of separate values.
|
as a list of separate values.
|
||||||
|
|
||||||
Use the ignoreExpr argument to define expressions that may contain
|
Use the C{ignoreExpr} argument to define expressions that may contain
|
||||||
opening or closing characters that should not be treated as opening
|
opening or closing characters that should not be treated as opening
|
||||||
or closing characters for nesting, such as quotedString or a comment
|
or closing characters for nesting, such as quotedString or a comment
|
||||||
expression. Specify multiple expressions using an Or or MatchFirst.
|
expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
|
||||||
The default is quotedString, but if no expressions are to be ignored,
|
The default is L{quotedString}, but if no expressions are to be ignored,
|
||||||
then pass None for this argument.
|
then pass C{None} for this argument.
|
||||||
"""
|
"""
|
||||||
if opener == closer:
|
if opener == closer:
|
||||||
raise ValueError("opening and closing strings cannot be the same")
|
raise ValueError("opening and closing strings cannot be the same")
|
||||||
@ -3683,7 +3641,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
|
|||||||
the current level; set to False for block of left-most statements
|
the current level; set to False for block of left-most statements
|
||||||
(default=True)
|
(default=True)
|
||||||
|
|
||||||
A valid block must contain at least one blockStatement.
|
A valid block must contain at least one C{blockStatement}.
|
||||||
"""
|
"""
|
||||||
def checkPeerIndent(s,l,t):
|
def checkPeerIndent(s,l,t):
|
||||||
if l >= len(s): return
|
if l >= len(s): return
|
||||||
|
Loading…
x
Reference in New Issue
Block a user