IGN:Update pyparsing to 1.5.1

2025-12-08 06:05:04 -05:00 · 2009-01-16 09:52:12 -08:00 · 2009-01-16 09:52:12 -08:00 · 4e9dc3e87f
commit 4e9dc3e87f
parent e3bc28106e
1 changed files with 3670 additions and 3600 deletions
--- a/src/calibre/utils/pyparsing.py
+++ b/src/calibre/utils/pyparsing.py
@ -58,17 +58,17 @@ The pyparsing module handles some of the problems that are typically vexing when
 - embedded comments
 """
-__version__ = "1.5.0"
+__version__ = "1.5.1"
-__versionTime__ = "28 May 2008 10:05"
+__versionTime__ = "2 October 2008 00:44"
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 import string
 from weakref import ref as wkref
-import copy,sys
+import copy
 import sys
 import warnings
 import re
 import sre_constants
 import xml.sax.saxutils
 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 __all__ = [
@ -88,7 +88,7 @@ __all__ = [
 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock',
+'indentedBlock', 'originalTextFor',
 ]
@ -130,11 +130,22 @@ if not _PY3K:
            # ...
 else:
    _ustr = str
    unichr = chr
 def _str2dict(strg):
    return dict( [(c,0) for c in strg] )
    #~ return set( [c for c in strg] )
 def _xml_escape(data):
    """Escape &, <, >, ", ', etc. in a string of data."""
    # ampersand must be replaced first
    from_symbols = '&><"\''
    to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
    for from_,to_ in zip(from_symbols, to_symbols):
        data = data.replace(from_, to_)
    return data
 class _Constants(object):
    pass
@ -145,7 +156,7 @@ else:
 nums       = string.digits
 hexnums    = nums + "ABCDEFabcdef"
 alphanums  = alphas + nums
-_bslash = "\\"
+_bslash = chr(92)
 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
 class ParseBaseException(Exception):
@ -193,6 +204,9 @@ class ParseBaseException(Exception):
            line_str = "".join( [line_str[:line_column],
                                markerString, line_str[line_column:]])
        return line_str.strip()
    def __dir__(self):
        return "loc msg pstr parserElement lineno col line " \
               "markInputLine __str__ __repr__".split()
 class ParseException(ParseBaseException):
    """exception thrown when parse expressions don't match class;
@ -213,7 +227,8 @@ class ParseSyntaxException(ParseFatalException):
       ErrorStop indicates that parsing is to stop immediately because
       an unbacktrackable syntax error has been found"""
    def __init__(self, pe):
-        ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement)
+        super(ParseSyntaxException, self).__init__(
                                    pe.pstr, pe.loc, pe.msg, pe.parserElement)
 #~ class ReparseException(ParseBaseException):
    #~ """Experimental class - parse actions can raise this exception to cause
@ -243,6 +258,8 @@ class _ParseResultsWithOffset(object):
        return self.tup[i]
    def __repr__(self):
        return repr(self.tup)
    def setOffset(self,i):
        self.tup = (self.tup[0],i)
 class ParseResults(object):
    """Structured parse results, to provide multiple means of access to the parsed data:
@ -272,9 +289,6 @@ class ParseResults(object):
                self.__toklist = [toklist]
            self.__tokdict = dict()
        # this line is related to debugging the asXML bug
        #~ asList = False
        if name:
            if not modal:
                self.__accumNames[name] = 0
@ -286,9 +300,9 @@ class ParseResults(object):
                    toklist = [ toklist ]
                if asList:
                    if isinstance(toklist,ParseResults):
-                        self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
+                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
                    else:
-                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
                    self[name].__name = name
                else:
                    try:
@ -374,7 +388,7 @@ class ParseResults(object):
        for name in self.__tokdict:
            occurrences = self.__tokdict[name]
            for k, (value, position) in enumerate(occurrences):
-                occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
    def items( self ):
        """Returns all named result keys and values as a list of tuples."""
@ -411,6 +425,7 @@ class ParseResults(object):
                self[k] = v
                if isinstance(v[0],ParseResults):
                    v[0].__parent = wkref(self)
        self.__toklist += other.__toklist
        self.__accumNames.update( other.__accumNames )
        del other
@ -517,7 +532,7 @@ class ParseResults(object):
                        continue
                    else:
                        resTag = "ITEM"
-                xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
+                xmlBodyText = _xml_escape(_ustr(res))
                out += [ nl, nextLevelIndent, "<", resTag, ">",
                                                xmlBodyText,
                                                "</", resTag, ">" ]
@ -594,6 +609,8 @@ class ParseResults(object):
        else:
            self.__parent = None
    def __dir__(self):
        return dir(super(ParseResults,self)) + self.keys()
 def col (loc,strg):
    """Returns current column within a string, counting newlines as line separators.
@ -715,7 +732,7 @@ class ParserElement(object):
            def breaker(instring, loc, doActions=True, callPreParse=True):
                import pdb
                pdb.set_trace()
-                _parseMethod( instring, loc, doActions, callPreParse )
+                return _parseMethod( instring, loc, doActions, callPreParse )
            breaker._originalParseMethod = _parseMethod
            self._parse = breaker
        else:
@ -1047,6 +1064,7 @@ class ParserElement(object):
            instring = instring.expandtabs()
        loc, tokens = self._parse( instring, 0 )
        if parseAll:
            loc = self.preParse( instring, loc )
            StringEnd()._parse( instring, loc )
        return tokens
@ -1158,11 +1176,7 @@ class ParserElement(object):
        if isinstance(other,int):
            minElements, optElements = other,0
        elif isinstance(other,tuple):
-            if len(other)==0:
+            other = (other + (None, None))[:2]
                other = (None,None)
            elif len(other)==1:
                other = (other[0],None)
            if len(other)==2:
            if other[0] is None:
                other = (0, other[1])
            if isinstance(other[0],int) and other[1] is None:
@ -1177,8 +1191,6 @@ class ParserElement(object):
                optElements -= minElements
            else:
                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
            else:
                raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
        else:
            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
@ -1360,7 +1372,7 @@ class ParserElement(object):
        """Check defined expressions for valid structure, check for infinite recursive definitions."""
        self.checkRecursion( [] )
-    def parseFile( self, file_or_filename ):
+    def parseFile( self, file_or_filename, parseAll=False ):
        """Execute the parse expression on the given file or filename.
           If a filename is specified (instead of a file object),
           the entire file is opened, read, and closed before parsing.
@ -1371,7 +1383,7 @@ class ParserElement(object):
            f = open(file_or_filename, "rb")
            file_contents = f.read()
            f.close()
-        return self.parseString(file_contents)
+        return self.parseString(file_contents, parseAll)
    def getException(self):
        return ParseException("",0,self.errmsg,self)
@ -1393,12 +1405,18 @@ class ParserElement(object):
        else:
            return super(ParserElement,self)==other
    def __ne__(self,other):
        return not (self == other)
    def __hash__(self):
        return hash(id(self))
    def __req__(self,other):
        return self == other
    def __rne__(self,other):
        return not (self == other)
 class Token(ParserElement):
    """Abstract ParserElement subclass, for defining atomic matching patterns."""
@ -1533,7 +1551,6 @@ class Keyword(Token):
        Keyword.DEFAULT_KEYWORD_CHARS = chars
    setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
 class CaselessLiteral(Literal):
    """Token to match a specified string, ignoring case of letters.
       Note: the matched results will always be in the case of the given
@ -2034,7 +2051,7 @@ class LineStart(_PositionToken):
    """Matches if current position is at the beginning of a line within the parse string"""
    def __init__( self ):
        super(LineStart,self).__init__()
-        self.setWhitespaceChars( " \t" )
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
        self.errmsg = "Expected start of line"
        #self.myException.msg = self.errmsg
@ -2059,7 +2076,7 @@ class LineEnd(_PositionToken):
    """Matches if current position is at the end of a line within the parse string"""
    def __init__( self ):
        super(LineEnd,self).__init__()
-        self.setWhitespaceChars( " \t" )
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
        self.errmsg = "Expected end of line"
        #self.myException.msg = self.errmsg
@ -2271,10 +2288,9 @@ class And(ParseExpression):
    """
    class _ErrorStop(Empty):
-        def __new__(cls,*args,**kwargs):
+        def __init__(self, *args, **kwargs):
-            return And._ErrorStop.instance
+            super(Empty,self).__init__(*args, **kwargs)
-    _ErrorStop.instance = Empty()
+            self.leaveWhitespace()
    _ErrorStop.instance.leaveWhitespace()
    def __init__( self, exprs, savelist = True ):
        super(And,self).__init__(exprs, savelist)
@ -2293,12 +2309,14 @@ class And(ParseExpression):
        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
        errorStop = False
        for e in self.exprs[1:]:
-            if e is And._ErrorStop.instance:
+            if isinstance(e, And._ErrorStop):
                errorStop = True
                continue
            if errorStop:
                try:
                    loc, exprtokens = e._parse( instring, loc, doActions )
                except ParseSyntaxException:
                    raise
                except ParseBaseException, pe:
                    raise ParseSyntaxException(pe)
                except IndexError, ie:
@ -2502,7 +2520,7 @@ class Each(ParseExpression):
            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
        # add any unmatched Optionals, in case they have default values defined
-        matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
+        matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
        resultlist = []
        for e in matchOrder:
@ -2781,7 +2799,7 @@ class SkipTo(ParseElementEnhance):
       argument is used to define grammars (typically quoted strings and comments) that
       might contain false matches.
    """
-    def __init__( self, other, include=False, ignore=None ):
+    def __init__( self, other, include=False, ignore=None, failOn=None ):
        super( SkipTo, self ).__init__( other )
        if ignore is not None:
            self.expr = self.expr.copy()
@ -2790,6 +2808,10 @@ class SkipTo(ParseElementEnhance):
        self.mayIndexError = False
        self.includeMatch = include
        self.asList = False
        if failOn is not None and isinstance(failOn, basestring):
            self.failOn = Literal(failOn)
        else:
            self.failOn = failOn
        self.errmsg = "No match found for "+_ustr(self.expr)
        #self.myException = ParseException("",0,self.errmsg,self)
@ -2797,12 +2819,17 @@ class SkipTo(ParseElementEnhance):
        startLoc = loc
        instrlen = len(instring)
        expr = self.expr
        failParse = False
        while loc <= instrlen:
            try:
                if self.failOn:
                    failParse = True
                    self.failOn.tryParse(instring, loc)
                    failParse = False
                loc = expr._skipIgnorables( instring, loc )
                expr._parse( instring, loc, doActions=False, callPreParse=False )
                if self.includeMatch:
                skipText = instring[startLoc:loc]
                if self.includeMatch:
                    loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
                    if mat:
                        skipRes = ParseResults( skipText )
@ -2811,8 +2838,11 @@ class SkipTo(ParseElementEnhance):
                    else:
                        return loc, [ skipText ]
                else:
-                    return loc, [ instring[startLoc:loc] ]
+                    return loc, [ skipText ]
            except (ParseException,IndexError):
                if failParse:
                    raise
                else:
                    loc += 1
        exc = self.myException
        exc.loc = loc
@ -2872,6 +2902,7 @@ class Forward(ParseElementEnhance):
        if hasattr(self,"name"):
            return self.name
        self._revertClass = self.__class__
        self.__class__ = _ForwardNoRecurse
        try:
            if self.expr is not None:
@ -2879,8 +2910,8 @@ class Forward(ParseElementEnhance):
            else:
                retString = "None"
        finally:
-            self.__class__ = Forward
+            self.__class__ = self._revertClass
-        return "Forward: "+retString
+        return self.__class__.__name__ + ": " + retString
    def copy(self):
        if self.expr is not None:
@ -3121,7 +3152,7 @@ def matchPreviousExpr(expr):
 def _escapeRegexRangeChars(s):
    #~  escape these chars: ^-]
    for c in r"\^-]":
-        s = s.replace(c,"\\"+c)
+        s = s.replace(c,_bslash+c)
    s = s.replace("\n",r"\n")
    s = s.replace("\t",r"\t")
    return _ustr(s)
@ -3195,6 +3226,33 @@ def dictOf( key, value ):
    """
    return Dict( ZeroOrMore( Group ( key + value ) ) )
 def originalTextFor(expr, asString=True):
    """Helper to return the original, untokenized text for a given expression.  Useful to
       restore the parsed fields of an HTML start tag into the raw tag text itself, or to
       revert separate tokens with intervening whitespace back to the original matching
       input text. Simpler to use than the parse action keepOriginalText, and does not
       require the inspect module to chase up the call stack.  By default, returns a 
       string containing the original parsed text.  
       If the optional asString argument is passed as False, then the return value is a 
       ParseResults containing any results names that were originally matched, and a 
       single token containing the original matched text from the input string.  So if 
       the expression passed to originalTextFor contains expressions with defined
       results names, you must set asString to False if you want to preserve those
       results name values."""
    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
    matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
    if asString:
        extractText = lambda s,l,t: s[t._original_start:t._original_end]
    else:
        def extractText(s,l,t):
            del t[:]
            t.insert(0, s[t._original_start:t._original_end])
            del t["_original_start"]
            del t["_original_end"]
    matchExpr.setParseAction(extractText)
    return matchExpr
 # convenience constants for positional expressions
 empty       = Empty().setName("empty")
 lineStart   = LineStart().setName("lineStart")
@ -3464,12 +3522,24 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
        raise ValueError("opening and closing strings cannot be the same")
    if content is None:
        if isinstance(opener,basestring) and isinstance(closer,basestring):
            if len(opener) == 1 and len(closer)==1:
                if ignoreExpr is not None:
                    content = (Combine(OneOrMore(~ignoreExpr +
                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
                                ).setParseAction(lambda t:t[0].strip()))
                else:
-                content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
+                    content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
                                ).setParseAction(lambda t:t[0].strip()))
            else:
                if ignoreExpr is not None:
                    content = (Combine(OneOrMore(~ignoreExpr + 
                                    ~Literal(opener) + ~Literal(closer) +
                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
                                ).setParseAction(lambda t:t[0].strip()))
                else:
                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
                                ).setParseAction(lambda t:t[0].strip()))
        else:
            raise ValueError("opening and closing arguments must be strings if no content expression is given")
    ret = Forward()
@ -3528,7 +3598,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True):
    else:
        smExpr = Group( Optional(NL) +
            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
-    blockStatementExpr.ignore("\\" + LineEnd())
+    blockStatementExpr.ignore(_bslash + LineEnd())
    return smExpr
 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
@ -3536,7 +3606,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
-_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
 # it's easy to get these comment structures wrong - they're very common, so may as well make them available