IGN:Update pyparsing to 1.5.1

This commit is contained in:
Kovid Goyal 2009-01-16 09:52:12 -08:00
parent e3bc28106e
commit 4e9dc3e87f

View File

@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
- embedded comments
"""
__version__ = "1.5.0"
__versionTime__ = "28 May 2008 10:05"
__version__ = "1.5.1"
__versionTime__ = "2 October 2008 00:44"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
from weakref import ref as wkref
import copy,sys
import copy
import sys
import warnings
import re
import sre_constants
import xml.sax.saxutils
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
__all__ = [
@ -88,7 +88,7 @@ __all__ = [
'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
'indentedBlock',
'indentedBlock', 'originalTextFor',
]
@ -130,11 +130,22 @@ if not _PY3K:
# ...
else:
_ustr = str
unichr = chr
def _str2dict(strg):
return dict( [(c,0) for c in strg] )
#~ return set( [c for c in strg] )
def _xml_escape(data):
"""Escape &, <, >, ", ', etc. in a string of data."""
# ampersand must be replaced first
from_symbols = '&><"\''
to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
for from_,to_ in zip(from_symbols, to_symbols):
data = data.replace(from_, to_)
return data
class _Constants(object):
pass
@ -145,7 +156,7 @@ else:
nums = string.digits
hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
_bslash = "\\"
_bslash = chr(92)
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
class ParseBaseException(Exception):
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
line_str = "".join( [line_str[:line_column],
markerString, line_str[line_column:]])
return line_str.strip()
def __dir__(self):
return "loc msg pstr parserElement lineno col line " \
"markInputLine __str__ __repr__".split()
class ParseException(ParseBaseException):
"""exception thrown when parse expressions don't match class;
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
ErrorStop indicates that parsing is to stop immediately because
an unbacktrackable syntax error has been found"""
def __init__(self, pe):
ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement)
super(ParseSyntaxException, self).__init__(
pe.pstr, pe.loc, pe.msg, pe.parserElement)
#~ class ReparseException(ParseBaseException):
#~ """Experimental class - parse actions can raise this exception to cause
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
return self.tup[i]
def __repr__(self):
return repr(self.tup)
def setOffset(self,i):
self.tup = (self.tup[0],i)
class ParseResults(object):
"""Structured parse results, to provide multiple means of access to the parsed data:
@ -272,9 +289,6 @@ class ParseResults(object):
self.__toklist = [toklist]
self.__tokdict = dict()
# this line is related to debugging the asXML bug
#~ asList = False
if name:
if not modal:
self.__accumNames[name] = 0
@ -286,9 +300,9 @@ class ParseResults(object):
toklist = [ toklist ]
if asList:
if isinstance(toklist,ParseResults):
self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
self[name] = _ParseResultsWithOffset(toklist.copy(),0)
else:
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
self[name].__name = name
else:
try:
@ -374,7 +388,7 @@ class ParseResults(object):
for name in self.__tokdict:
occurrences = self.__tokdict[name]
for k, (value, position) in enumerate(occurrences):
occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
def items( self ):
"""Returns all named result keys and values as a list of tuples."""
@ -411,6 +425,7 @@ class ParseResults(object):
self[k] = v
if isinstance(v[0],ParseResults):
v[0].__parent = wkref(self)
self.__toklist += other.__toklist
self.__accumNames.update( other.__accumNames )
del other
@ -517,7 +532,7 @@ class ParseResults(object):
continue
else:
resTag = "ITEM"
xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
xmlBodyText = _xml_escape(_ustr(res))
out += [ nl, nextLevelIndent, "<", resTag, ">",
xmlBodyText,
"</", resTag, ">" ]
@ -594,6 +609,8 @@ class ParseResults(object):
else:
self.__parent = None
def __dir__(self):
return dir(super(ParseResults,self)) + self.keys()
def col (loc,strg):
"""Returns current column within a string, counting newlines as line separators.
@ -715,7 +732,7 @@ class ParserElement(object):
def breaker(instring, loc, doActions=True, callPreParse=True):
import pdb
pdb.set_trace()
_parseMethod( instring, loc, doActions, callPreParse )
return _parseMethod( instring, loc, doActions, callPreParse )
breaker._originalParseMethod = _parseMethod
self._parse = breaker
else:
@ -1047,6 +1064,7 @@ class ParserElement(object):
instring = instring.expandtabs()
loc, tokens = self._parse( instring, 0 )
if parseAll:
loc = self.preParse( instring, loc )
StringEnd()._parse( instring, loc )
return tokens
@ -1158,27 +1176,21 @@ class ParserElement(object):
if isinstance(other,int):
minElements, optElements = other,0
elif isinstance(other,tuple):
if len(other)==0:
other = (None,None)
elif len(other)==1:
other = (other[0],None)
if len(other)==2:
if other[0] is None:
other = (0, other[1])
if isinstance(other[0],int) and other[1] is None:
if other[0] == 0:
return ZeroOrMore(self)
if other[0] == 1:
return OneOrMore(self)
else:
return self*other[0] + ZeroOrMore(self)
elif isinstance(other[0],int) and isinstance(other[1],int):
minElements, optElements = other
optElements -= minElements
other = (other + (None, None))[:2]
if other[0] is None:
other = (0, other[1])
if isinstance(other[0],int) and other[1] is None:
if other[0] == 0:
return ZeroOrMore(self)
if other[0] == 1:
return OneOrMore(self)
else:
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
return self*other[0] + ZeroOrMore(self)
elif isinstance(other[0],int) and isinstance(other[1],int):
minElements, optElements = other
optElements -= minElements
else:
raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
else:
raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
@ -1360,7 +1372,7 @@ class ParserElement(object):
"""Check defined expressions for valid structure, check for infinite recursive definitions."""
self.checkRecursion( [] )
def parseFile( self, file_or_filename ):
def parseFile( self, file_or_filename, parseAll=False ):
"""Execute the parse expression on the given file or filename.
If a filename is specified (instead of a file object),
the entire file is opened, read, and closed before parsing.
@ -1371,7 +1383,7 @@ class ParserElement(object):
f = open(file_or_filename, "rb")
file_contents = f.read()
f.close()
return self.parseString(file_contents)
return self.parseString(file_contents, parseAll)
def getException(self):
return ParseException("",0,self.errmsg,self)
@ -1393,12 +1405,18 @@ class ParserElement(object):
else:
return super(ParserElement,self)==other
def __ne__(self,other):
return not (self == other)
def __hash__(self):
return hash(id(self))
def __req__(self,other):
return self == other
def __rne__(self,other):
return not (self == other)
class Token(ParserElement):
"""Abstract ParserElement subclass, for defining atomic matching patterns."""
@ -1533,7 +1551,6 @@ class Keyword(Token):
Keyword.DEFAULT_KEYWORD_CHARS = chars
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
class CaselessLiteral(Literal):
"""Token to match a specified string, ignoring case of letters.
Note: the matched results will always be in the case of the given
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
"""Matches if current position is at the beginning of a line within the parse string"""
def __init__( self ):
super(LineStart,self).__init__()
self.setWhitespaceChars( " \t" )
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected start of line"
#self.myException.msg = self.errmsg
@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
"""Matches if current position is at the end of a line within the parse string"""
def __init__( self ):
super(LineEnd,self).__init__()
self.setWhitespaceChars( " \t" )
self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
self.errmsg = "Expected end of line"
#self.myException.msg = self.errmsg
@ -2271,10 +2288,9 @@ class And(ParseExpression):
"""
class _ErrorStop(Empty):
def __new__(cls,*args,**kwargs):
return And._ErrorStop.instance
_ErrorStop.instance = Empty()
_ErrorStop.instance.leaveWhitespace()
def __init__(self, *args, **kwargs):
super(Empty,self).__init__(*args, **kwargs)
self.leaveWhitespace()
def __init__( self, exprs, savelist = True ):
super(And,self).__init__(exprs, savelist)
@ -2293,12 +2309,14 @@ class And(ParseExpression):
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
errorStop = False
for e in self.exprs[1:]:
if e is And._ErrorStop.instance:
if isinstance(e, And._ErrorStop):
errorStop = True
continue
if errorStop:
try:
loc, exprtokens = e._parse( instring, loc, doActions )
except ParseSyntaxException:
raise
except ParseBaseException, pe:
raise ParseSyntaxException(pe)
except IndexError, ie:
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
# add any unmatched Optionals, in case they have default values defined
matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
resultlist = []
for e in matchOrder:
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
argument is used to define grammars (typically quoted strings and comments) that
might contain false matches.
"""
def __init__( self, other, include=False, ignore=None ):
def __init__( self, other, include=False, ignore=None, failOn=None ):
super( SkipTo, self ).__init__( other )
if ignore is not None:
self.expr = self.expr.copy()
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
self.mayIndexError = False
self.includeMatch = include
self.asList = False
if failOn is not None and isinstance(failOn, basestring):
self.failOn = Literal(failOn)
else:
self.failOn = failOn
self.errmsg = "No match found for "+_ustr(self.expr)
#self.myException = ParseException("",0,self.errmsg,self)
@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
startLoc = loc
instrlen = len(instring)
expr = self.expr
failParse = False
while loc <= instrlen:
try:
if self.failOn:
failParse = True
self.failOn.tryParse(instring, loc)
failParse = False
loc = expr._skipIgnorables( instring, loc )
expr._parse( instring, loc, doActions=False, callPreParse=False )
skipText = instring[startLoc:loc]
if self.includeMatch:
skipText = instring[startLoc:loc]
loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
if mat:
skipRes = ParseResults( skipText )
@ -2811,9 +2838,12 @@ class SkipTo(ParseElementEnhance):
else:
return loc, [ skipText ]
else:
return loc, [ instring[startLoc:loc] ]
return loc, [ skipText ]
except (ParseException,IndexError):
loc += 1
if failParse:
raise
else:
loc += 1
exc = self.myException
exc.loc = loc
exc.pstr = instring
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
if hasattr(self,"name"):
return self.name
self._revertClass = self.__class__
self.__class__ = _ForwardNoRecurse
try:
if self.expr is not None:
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
else:
retString = "None"
finally:
self.__class__ = Forward
return "Forward: "+retString
self.__class__ = self._revertClass
return self.__class__.__name__ + ": " + retString
def copy(self):
if self.expr is not None:
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
def _escapeRegexRangeChars(s):
#~ escape these chars: ^-]
for c in r"\^-]":
s = s.replace(c,"\\"+c)
s = s.replace(c,_bslash+c)
s = s.replace("\n",r"\n")
s = s.replace("\t",r"\t")
return _ustr(s)
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
"""
return Dict( ZeroOrMore( Group ( key + value ) ) )
def originalTextFor(expr, asString=True):
"""Helper to return the original, untokenized text for a given expression. Useful to
restore the parsed fields of an HTML start tag into the raw tag text itself, or to
revert separate tokens with intervening whitespace back to the original matching
input text. Simpler to use than the parse action keepOriginalText, and does not
require the inspect module to chase up the call stack. By default, returns a
string containing the original parsed text.
If the optional asString argument is passed as False, then the return value is a
ParseResults containing any results names that were originally matched, and a
single token containing the original matched text from the input string. So if
the expression passed to originalTextFor contains expressions with defined
results names, you must set asString to False if you want to preserve those
results name values."""
locMarker = Empty().setParseAction(lambda s,loc,t: loc)
matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
if asString:
extractText = lambda s,l,t: s[t._original_start:t._original_end]
else:
def extractText(s,l,t):
del t[:]
t.insert(0, s[t._original_start:t._original_end])
del t["_original_start"]
del t["_original_end"]
matchExpr.setParseAction(extractText)
return matchExpr
# convenience constants for positional expressions
empty = Empty().setName("empty")
lineStart = LineStart().setName("lineStart")
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
raise ValueError("opening and closing strings cannot be the same")
if content is None:
if isinstance(opener,basestring) and isinstance(closer,basestring):
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
if len(opener) == 1 and len(closer)==1:
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
).setParseAction(lambda t:t[0].strip()))
else:
content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
if ignoreExpr is not None:
content = (Combine(OneOrMore(~ignoreExpr +
~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
).setParseAction(lambda t:t[0].strip()))
else:
raise ValueError("opening and closing arguments must be strings if no content expression is given")
ret = Forward()
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
else:
smExpr = Group( Optional(NL) +
(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
blockStatementExpr.ignore("\\" + LineEnd())
blockStatementExpr.ignore(_bslash + LineEnd())
return smExpr
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
# it's easy to get these comment structures wrong - they're very common, so may as well make them available