IGN:Update pyparsing to 1.5.1

This commit is contained in:
Kovid Goyal 2009-01-16 09:52:12 -08:00
parent e3bc28106e
commit 4e9dc3e87f

View File

@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments - embedded comments
""" """
__version__ = "1.5.0" __version__ = "1.5.1"
__versionTime__ = "28 May 2008 10:05" __versionTime__ = "2 October 2008 00:44"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string import string
from weakref import ref as wkref from weakref import ref as wkref
import copy,sys import copy
import sys
import warnings import warnings
import re import re
import sre_constants import sre_constants
import xml.sax.saxutils
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [ __all__ = [
@ -88,7 +88,7 @@ __all__ = [
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock', 'indentedBlock', 'originalTextFor',
] ]
@ -130,11 +130,22 @@ if not _PY3K:
# ... # ...
else: else:
_ustr = str _ustr = str
unichr = chr
def _str2dict(strg): def _str2dict(strg):
return dict( [(c,0) for c in strg] ) return dict( [(c,0) for c in strg] )
#~ return set( [c for c in strg] ) #~ return set( [c for c in strg] )
def _xml_escape(data):
"""Escape &, <, >, ", ', etc. in a string of data."""
# ampersand must be replaced first
from_symbols = '&><"\''
to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
for from_,to_ in zip(from_symbols, to_symbols):
data = data.replace(from_, to_)
return data
class _Constants(object): class _Constants(object):
pass pass
@ -145,7 +156,7 @@ else:
nums = string.digits nums = string.digits
hexnums = nums + "ABCDEFabcdef" hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums alphanums = alphas + nums
_bslash = "\\" _bslash = chr(92)
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
class ParseBaseException(Exception): class ParseBaseException(Exception):
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
line_str = "".join( [line_str[:line_column], line_str = "".join( [line_str[:line_column],
markerString, line_str[line_column:]]) markerString, line_str[line_column:]])
return line_str.strip() return line_str.strip()
def __dir__(self):
return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split()
class ParseException(ParseBaseException): class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class; """exception thrown when parse expressions don't match class;
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
ErrorStop indicates that parsing is to stop immediately because ErrorStop indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found""" an unbacktrackable syntax error has been found"""
def __init__(self, pe): def __init__(self, pe):
ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement) super(ParseSyntaxException, self).__init__(
pe.pstr, pe.loc, pe.msg, pe.parserElement)
#~ class ReparseException(ParseBaseException): #~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause #~ """Experimental class - parse actions can raise this exception to cause
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
return self.tup[i] return self.tup[i]
def __repr__(self): def __repr__(self):
return repr(self.tup) return repr(self.tup)
def setOffset(self,i):
self.tup = (self.tup[0],i)
class ParseResults(object): class ParseResults(object):
"""Structured parse results, to provide multiple means of access to the parsed data: """Structured parse results, to provide multiple means of access to the parsed data:
@ -272,9 +289,6 @@ class ParseResults(object):
self.__toklist = [toklist] self.__toklist = [toklist]
self.__tokdict = dict() self.__tokdict = dict()
# this line is related to debugging the asXML bug
#~ asList = False
if name: if name:
if not modal: if not modal:
self.__accumNames[name] = 0 self.__accumNames[name] = 0
@ -286,9 +300,9 @@ class ParseResults(object):
toklist = [ toklist ] toklist = [ toklist ]
if asList: if asList:
if isinstance(toklist,ParseResults): if isinstance(toklist,ParseResults):
self[name] = _ParseResultsWithOffset(toklist.copy(),-1) self[name] = _ParseResultsWithOffset(toklist.copy(),0)
else: else:
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
self[name].__name = name self[name].__name = name
else: else:
try: try:
@ -374,7 +388,7 @@ class ParseResults(object):
for name in self.__tokdict: for name in self.__tokdict:
occurrences = self.__tokdict[name] occurrences = self.__tokdict[name]
for k, (value, position) in enumerate(occurrences): for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > j)) occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def items( self ): def items( self ):
"""Returns all named result keys and values as a list of tuples.""" """Returns all named result keys and values as a list of tuples."""
@ -411,6 +425,7 @@ class ParseResults(object):
self[k] = v self[k] = v
if isinstance(v[0],ParseResults): if isinstance(v[0],ParseResults):
v[0].__parent = wkref(self) v[0].__parent = wkref(self)
self.__toklist += other.__toklist self.__toklist += other.__toklist
self.__accumNames.update( other.__accumNames ) self.__accumNames.update( other.__accumNames )
del other del other
@ -517,7 +532,7 @@ class ParseResults(object):
continue continue
else: else:
resTag = "ITEM" resTag = "ITEM"
xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) xmlBodyText = _xml_escape(_ustr(res))
out += [ nl, nextLevelIndent, "<", resTag, ">", out += [ nl, nextLevelIndent, "<", resTag, ">",
xmlBodyText, xmlBodyText,
"</", resTag, ">" ] "</", resTag, ">" ]
@ -594,6 +609,8 @@ class ParseResults(object):
else: else:
self.__parent = None self.__parent = None
def __dir__(self):
return dir(super(ParseResults,self)) + self.keys()
def col (loc,strg): def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators. """Returns current column within a string, counting newlines as line separators.
@ -715,7 +732,7 @@ class ParserElement(object):
def breaker(instring, loc, doActions=True, callPreParse=True): def breaker(instring, loc, doActions=True, callPreParse=True):
import pdb import pdb
pdb.set_trace() pdb.set_trace()
_parseMethod( instring, loc, doActions, callPreParse ) return _parseMethod( instring, loc, doActions, callPreParse )
breaker._originalParseMethod = _parseMethod breaker._originalParseMethod = _parseMethod
self._parse = breaker self._parse = breaker
else: else:
@ -1047,6 +1064,7 @@ class ParserElement(object):
instring = instring.expandtabs() instring = instring.expandtabs()
loc, tokens = self._parse( instring, 0 ) loc, tokens = self._parse( instring, 0 )
if parseAll: if parseAll:
loc = self.preParse( instring, loc )
StringEnd()._parse( instring, loc ) StringEnd()._parse( instring, loc )
return tokens return tokens
@ -1158,11 +1176,7 @@ class ParserElement(object):
if isinstance(other,int): if isinstance(other,int):
minElements, optElements = other,0 minElements, optElements = other,0
elif isinstance(other,tuple): elif isinstance(other,tuple):
if len(other)==0: other = (other + (None, None))[:2]
other = (None,None)
elif len(other)==1:
other = (other[0],None)
if len(other)==2:
if other[0] is None: if other[0] is None:
other = (0, other[1]) other = (0, other[1])
if isinstance(other[0],int) and other[1] is None: if isinstance(other[0],int) and other[1] is None:
@ -1177,8 +1191,6 @@ class ParserElement(object):
optElements -= minElements optElements -= minElements
else: else:
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
else:
raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
else: else:
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
@ -1360,7 +1372,7 @@ class ParserElement(object):
"""Check defined expressions for valid structure, check for infinite recursive definitions.""" """Check defined expressions for valid structure, check for infinite recursive definitions."""
self.checkRecursion( [] ) self.checkRecursion( [] )
def parseFile( self, file_or_filename ): def parseFile( self, file_or_filename, parseAll=False ):
"""Execute the parse expression on the given file or filename. """Execute the parse expression on the given file or filename.
If a filename is specified (instead of a file object), If a filename is specified (instead of a file object),
the entire file is opened, read, and closed before parsing. the entire file is opened, read, and closed before parsing.
@ -1371,7 +1383,7 @@ class ParserElement(object):
f = open(file_or_filename, "rb") f = open(file_or_filename, "rb")
file_contents = f.read() file_contents = f.read()
f.close() f.close()
return self.parseString(file_contents) return self.parseString(file_contents, parseAll)
def getException(self): def getException(self):
return ParseException("",0,self.errmsg,self) return ParseException("",0,self.errmsg,self)
@ -1393,12 +1405,18 @@ class ParserElement(object):
else: else:
return super(ParserElement,self)==other return super(ParserElement,self)==other
def __ne__(self,other):
return not (self == other)
def __hash__(self): def __hash__(self):
return hash(id(self)) return hash(id(self))
def __req__(self,other): def __req__(self,other):
return self == other return self == other
def __rne__(self,other):
return not (self == other)
class Token(ParserElement): class Token(ParserElement):
"""Abstract ParserElement subclass, for defining atomic matching patterns.""" """Abstract ParserElement subclass, for defining atomic matching patterns."""
@ -1533,7 +1551,6 @@ class Keyword(Token):
Keyword.DEFAULT_KEYWORD_CHARS = chars Keyword.DEFAULT_KEYWORD_CHARS = chars
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
class CaselessLiteral(Literal): class CaselessLiteral(Literal):
"""Token to match a specified string, ignoring case of letters. """Token to match a specified string, ignoring case of letters.
Note: the matched results will always be in the case of the given Note: the matched results will always be in the case of the given
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
"""Matches if current position is at the beginning of a line within the parse string""" """Matches if current position is at the beginning of a line within the parse string"""
def __init__( self ): def __init__( self ):
super(LineStart,self).__init__() super(LineStart,self).__init__()
self.setWhitespaceChars( " \t" ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line" self.errmsg = "Expected start of line"
#self.myException.msg = self.errmsg #self.myException.msg = self.errmsg
@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
"""Matches if current position is at the end of a line within the parse string""" """Matches if current position is at the end of a line within the parse string"""
def __init__( self ): def __init__( self ):
super(LineEnd,self).__init__() super(LineEnd,self).__init__()
self.setWhitespaceChars( " \t" ) self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected end of line" self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg #self.myException.msg = self.errmsg
@ -2271,10 +2288,9 @@ class And(ParseExpression):
""" """
class _ErrorStop(Empty): class _ErrorStop(Empty):
def __new__(cls,*args,**kwargs): def __init__(self, *args, **kwargs):
return And._ErrorStop.instance super(Empty,self).__init__(*args, **kwargs)
_ErrorStop.instance = Empty() self.leaveWhitespace()
_ErrorStop.instance.leaveWhitespace()
def __init__( self, exprs, savelist = True ): def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist) super(And,self).__init__(exprs, savelist)
@ -2293,12 +2309,14 @@ class And(ParseExpression):
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
errorStop = False errorStop = False
for e in self.exprs[1:]: for e in self.exprs[1:]:
if e is And._ErrorStop.instance: if isinstance(e, And._ErrorStop):
errorStop = True errorStop = True
continue continue
if errorStop: if errorStop:
try: try:
loc, exprtokens = e._parse( instring, loc, doActions ) loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException:
raise
except ParseBaseException, pe: except ParseBaseException, pe:
raise ParseSyntaxException(pe) raise ParseSyntaxException(pe)
except IndexError, ie: except IndexError, ie:
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
# add any unmatched Optionals, in case they have default values defined # add any unmatched Optionals, in case they have default values defined
matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
resultlist = [] resultlist = []
for e in matchOrder: for e in matchOrder:
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
argument is used to define grammars (typically quoted strings and comments) that argument is used to define grammars (typically quoted strings and comments) that
might contain false matches. might contain false matches.
""" """
def __init__( self, other, include=False, ignore=None ): def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other ) super( SkipTo, self ).__init__( other )
if ignore is not None: if ignore is not None:
self.expr = self.expr.copy() self.expr = self.expr.copy()
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
self.mayIndexError = False self.mayIndexError = False
self.includeMatch = include self.includeMatch = include
self.asList = False self.asList = False
if failOn is not None and isinstance(failOn, basestring):
self.failOn = Literal(failOn)
else:
self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr) self.errmsg = "No match found for "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self) #self.myException = ParseException("",0,self.errmsg,self)
@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
startLoc = loc startLoc = loc
instrlen = len(instring) instrlen = len(instring)
expr = self.expr expr = self.expr
failParse = False
while loc <= instrlen: while loc <= instrlen:
try: try:
if self.failOn:
failParse = True
self.failOn.tryParse(instring, loc)
failParse = False
loc = expr._skipIgnorables( instring, loc ) loc = expr._skipIgnorables( instring, loc )
expr._parse( instring, loc, doActions=False, callPreParse=False ) expr._parse( instring, loc, doActions=False, callPreParse=False )
if self.includeMatch:
skipText = instring[startLoc:loc] skipText = instring[startLoc:loc]
if self.includeMatch:
loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
if mat: if mat:
skipRes = ParseResults( skipText ) skipRes = ParseResults( skipText )
@ -2811,8 +2838,11 @@ class SkipTo(ParseElementEnhance):
else: else:
return loc, [ skipText ] return loc, [ skipText ]
else: else:
return loc, [ instring[startLoc:loc] ] return loc, [ skipText ]
except (ParseException,IndexError): except (ParseException,IndexError):
if failParse:
raise
else:
loc += 1 loc += 1
exc = self.myException exc = self.myException
exc.loc = loc exc.loc = loc
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
if hasattr(self,"name"): if hasattr(self,"name"):
return self.name return self.name
self._revertClass = self.__class__
self.__class__ = _ForwardNoRecurse self.__class__ = _ForwardNoRecurse
try: try:
if self.expr is not None: if self.expr is not None:
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
else: else:
retString = "None" retString = "None"
finally: finally:
self.__class__ = Forward self.__class__ = self._revertClass
return "Forward: "+retString return self.__class__.__name__ + ": " + retString
def copy(self): def copy(self):
if self.expr is not None: if self.expr is not None:
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
def _escapeRegexRangeChars(s): def _escapeRegexRangeChars(s):
#~ escape these chars: ^-] #~ escape these chars: ^-]
for c in r"\^-]": for c in r"\^-]":
s = s.replace(c,"\\"+c) s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n") s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t") s = s.replace("\t",r"\t")
return _ustr(s) return _ustr(s)
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
""" """
return Dict( ZeroOrMore( Group ( key + value ) ) ) return Dict( ZeroOrMore( Group ( key + value ) ) )
def originalTextFor(expr, asString=True):
"""Helper to return the original, untokenized text for a given expression. Useful to
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
revert separate tokens with intervening whitespace back to the original matching
input text. Simpler to use than the parse action keepOriginalText, and does not
require the inspect module to chase up the call stack. By default, returns a
string containing the original parsed text.
If the optional asString argument is passed as False, then the return value is a
ParseResults containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if
the expression passed to originalTextFor contains expressions with defined
results names, you must set asString to False if you want to preserve those
results name values."""
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
if asString:
extractText = lambda s,l,t: s[t._original_start:t._original_end]
else:
def extractText(s,l,t):
del t[:]
t.insert(0, s[t._original_start:t._original_end])
del t["_original_start"]
del t["_original_end"]
matchExpr.setParseAction(extractText)
return matchExpr
# convenience constants for positional expressions # convenience constants for positional expressions
empty = Empty().setName("empty") empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart") lineStart = LineStart().setName("lineStart")
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
raise ValueError("opening and closing strings cannot be the same") raise ValueError("opening and closing strings cannot be the same")
if content is None: if content is None:
if isinstance(opener,basestring) and isinstance(closer,basestring): if isinstance(opener,basestring) and isinstance(closer,basestring):
if len(opener) == 1 and len(closer)==1:
if ignoreExpr is not None: if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr + content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip())) ).setParseAction(lambda t:t[0].strip()))
else: else:
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip())) content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
).setParseAction(lambda t:t[0].strip()))
else:
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else: else:
raise ValueError("opening and closing arguments must be strings if no content expression is given") raise ValueError("opening and closing arguments must be strings if no content expression is given")
ret = Forward() ret = Forward()
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
else: else:
smExpr = Group( Optional(NL) + smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
blockStatementExpr.ignore("\\" + LineEnd()) blockStatementExpr.ignore(_bslash + LineEnd())
return smExpr return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '")) _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available # it's easy to get these comment structures wrong - they're very common, so may as well make them available