From 388968cd88c9efe171050e45324fa93514861aea Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 15 Sep 2011 00:29:32 -0600 Subject: [PATCH] Update pyparsing to 1.5.6 --- src/calibre/utils/pyparsing.py | 364 +++++++++++++++------------------ 1 file changed, 161 insertions(+), 203 deletions(-) diff --git a/src/calibre/utils/pyparsing.py b/src/calibre/utils/pyparsing.py index bc5571ea5f..9be97dc287 100644 --- a/src/calibre/utils/pyparsing.py +++ b/src/calibre/utils/pyparsing.py @@ -1,6 +1,6 @@ # module pyparsing.py # -# Copyright (c) 2003-2010 Paul T. McGuire +# Copyright (c) 2003-2011 Paul T. McGuire # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when - embedded comments """ -__version__ = "1.5.5" -__versionTime__ = "12 Aug 2010 03:56" +__version__ = "1.5.6" +__versionTime__ = "26 June 2011 10:53" __author__ = "Paul McGuire " import string @@ -101,11 +101,12 @@ if _PY3K: basestring = str unichr = chr _ustr = str - _str2dict = set alphas = string.ascii_lowercase + string.ascii_uppercase else: _MAX_INT = sys.maxint range = xrange + set = lambda s : dict( [(c,0) for c in s] ) + alphas = string.lowercase + string.uppercase def _ustr(obj): """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries @@ -134,9 +135,6 @@ else: #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') # ... - def _str2dict(strg): - return dict( [(c,0) for c in strg] ) - alphas = string.lowercase + string.uppercase # build list of single arg builtins, tolerant of Python version, that can be used as parse actions @@ -606,10 +604,10 @@ class ParseResults(object): def __setstate__(self,state): self.__toklist = state[0] - self.__tokdict, \ - par, \ - inAccumNames, \ - self.__name = state[1] + (self.__tokdict, + par, + inAccumNames, + self.__name) = state[1] self.__accumNames = {} self.__accumNames.update(inAccumNames) if par is not None: @@ -667,6 +665,35 @@ def nullDebugAction(*args): """'Do-nothing' debug action, to suppress debugging output during parsing.""" pass +'decorator to trim function calls to match the arity of the target' +if not _PY3K: + def _trim_arity(func, maxargs=2): + limit = [0] + def wrapper(*args): + while 1: + try: + return func(*args[limit[0]:]) + except TypeError: + if limit[0] <= maxargs: + limit[0] += 1 + continue + raise + return wrapper +else: + def _trim_arity(func, maxargs=2): + limit = maxargs + def wrapper(*args): + #~ nonlocal limit + while 1: + try: + return func(*args[limit:]) + except TypeError: + if limit: + limit -= 1 + continue + raise + return wrapper + class ParserElement(object): """Abstract base level parser element class.""" DEFAULT_WHITE_CHARS = " \n\t\r" @@ -731,6 +758,9 @@ class ParserElement(object): see L{I{__call__}<__call__>}. """ newself = self.copy() + if name.endswith("*"): + name = name[:-1] + listAllMatches=True newself.resultsName = name newself.modalResults = not listAllMatches return newself @@ -753,104 +783,6 @@ class ParserElement(object): self._parse = self._parse._originalParseMethod return self - def _normalizeParseActionArgs( f ): - """Internal method used to decorate parse actions that take fewer than 3 arguments, - so that all parse actions can be called as C{f(s,l,t)}.""" - STAR_ARGS = 4 - - # special handling for single-argument builtins - if (f in singleArgBuiltins): - numargs = 1 - else: - try: - restore = None - if isinstance(f,type): - restore = f - f = f.__init__ - if not _PY3K: - codeObj = f.func_code - else: - codeObj = f.code - if codeObj.co_flags & STAR_ARGS: - return f - numargs = codeObj.co_argcount - if not _PY3K: - if hasattr(f,"im_self"): - numargs -= 1 - else: - if hasattr(f,"__self__"): - numargs -= 1 - if restore: - f = restore - except AttributeError: - try: - if not _PY3K: - call_im_func_code = f.__call__.im_func.func_code - else: - call_im_func_code = f.__code__ - - # not a function, must be a callable object, get info from the - # im_func binding of its bound __call__ method - if call_im_func_code.co_flags & STAR_ARGS: - return f - numargs = call_im_func_code.co_argcount - if not _PY3K: - if hasattr(f.__call__,"im_self"): - numargs -= 1 - else: - if hasattr(f.__call__,"__self__"): - numargs -= 0 - except AttributeError: - if not _PY3K: - call_func_code = f.__call__.func_code - else: - call_func_code = f.__call__.__code__ - # not a bound method, get info directly from __call__ method - if call_func_code.co_flags & STAR_ARGS: - return f - numargs = call_func_code.co_argcount - if not _PY3K: - if hasattr(f.__call__,"im_self"): - numargs -= 1 - else: - if hasattr(f.__call__,"__self__"): - numargs -= 1 - - - #~ print ("adding function %s with %d args" % (f.func_name,numargs)) - if numargs == 3: - return f - else: - if numargs > 3: - def tmp(s,l,t): - return f(f.__call__.__self__, s,l,t) - if numargs == 2: - def tmp(s,l,t): - return f(l,t) - elif numargs == 1: - def tmp(s,l,t): - return f(t) - else: #~ numargs == 0: - def tmp(s,l,t): - return f() - try: - tmp.__name__ = f.__name__ - except (AttributeError,TypeError): - # no need for special handling if attribute doesnt exist - pass - try: - tmp.__doc__ = f.__doc__ - except (AttributeError,TypeError): - # no need for special handling if attribute doesnt exist - pass - try: - tmp.__dict__.update(f.__dict__) - except (AttributeError,TypeError): - # no need for special handling if attribute doesnt exist - pass - return tmp - _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) - def setParseAction( self, *fns, **kwargs ): """Define action to perform when successfully matching parse element definition. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, @@ -868,13 +800,13 @@ class ParserElement(object): consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ - self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) + self.parseAction = list(map(_trim_arity, list(fns))) self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) return self def addParseAction( self, *fns, **kwargs ): """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" - self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) + self.parseAction += list(map(_trim_arity, list(fns))) self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) return self @@ -1012,9 +944,9 @@ class ParserElement(object): lookup = (self,instring,loc,callPreParse,doActions) if lookup in ParserElement._exprArgCache: value = ParserElement._exprArgCache[ lookup ] - if isinstance(value,Exception): + if isinstance(value, Exception): raise value - return value + return (value[0],value[1].copy()) else: try: value = self._parseNoCache( instring, loc, doActions, callPreParse ) @@ -1088,8 +1020,8 @@ class ParserElement(object): try: loc, tokens = self._parse( instring, 0 ) if parseAll: - #loc = self.preParse( instring, loc ) - se = StringEnd() + loc = self.preParse( instring, loc ) + se = Empty() + StringEnd() se._parse( instring, loc ) except ParseBaseException: if ParserElement.verbose_stacktrace: @@ -1101,10 +1033,11 @@ class ParserElement(object): else: return tokens - def scanString( self, instring, maxMatches=_MAX_INT ): + def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): """Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location. May be called with optional - C{maxMatches} argument, to clip scanning after 'n' matches are found. + C{maxMatches} argument, to clip scanning after 'n' matches are found. If + C{overlap} is specified, then overlapping matches will be reported. Note that the start and end locations are reported relative to the string being parsed. See L{I{parseString}} for more information on parsing @@ -1133,7 +1066,14 @@ class ParserElement(object): if nextLoc > loc: matches += 1 yield tokens, preloc, nextLoc - loc = nextLoc + if overlap: + nextloc = preparseFn( instring, loc ) + if nextloc > loc: + loc = nextLoc + else: + loc += 1 + else: + loc = nextLoc else: loc = preloc+1 except ParseBaseException: @@ -1168,6 +1108,7 @@ class ParserElement(object): out.append(t) lastE = e out.append(instring[lastE:]) + out = [o for o in out if o] return "".join(map(_ustr,_flatten(out))) except ParseBaseException: if ParserElement.verbose_stacktrace: @@ -1372,6 +1313,9 @@ class ParserElement(object): userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") could be written as:: userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + + If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be + passed as C{True}. """ return self.setResultsName(name) @@ -1398,9 +1342,9 @@ class ParserElement(object): return self def parseWithTabs( self ): - """Overrides default behavior to expand s to spaces before parsing the input string. + """Overrides default behavior to expand C{}s to spaces before parsing the input string. Must be called before C{parseString} when the input grammar contains elements that - match characters.""" + match C{} characters.""" self.keepTabs = True return self @@ -1508,12 +1452,10 @@ class Token(ParserElement): """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" def __init__( self ): super(Token,self).__init__( savelist=False ) - #self.myException = ParseException("",0,"",self) def setName(self, name): s = super(Token,self).setName(name) self.errmsg = "Expected " + self.name - #s.myException.msg = self.errmsg return s @@ -1534,7 +1476,6 @@ class NoMatch(Token): self.mayReturnEmpty = True self.mayIndexError = False self.errmsg = "Unmatchable token" - #self.myException.msg = self.errmsg def parseImpl( self, instring, loc, doActions=True ): exc = self.myException @@ -1558,7 +1499,6 @@ class Literal(Token): self.name = '"%s"' % _ustr(self.match) self.errmsg = "Expected " + self.name self.mayReturnEmpty = False - #self.myException.msg = self.errmsg self.mayIndexError = False # Performance tuning: this routine gets called a *lot* @@ -1579,12 +1519,12 @@ _L = Literal class Keyword(Token): """Token to exactly match a specified string as a keyword, that is, it must be immediately followed by a non-keyword character. Compare with C{Literal}:: - Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. - Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' + Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. + Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} Accepts two optional constructor arguments in addition to the keyword string: C{identChars} is a string of characters that would be valid identifier characters, defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive - matching, default is False. + matching, default is C{False}. """ DEFAULT_KEYWORD_CHARS = alphanums+"_$" @@ -1600,13 +1540,12 @@ class Keyword(Token): self.name = '"%s"' % self.match self.errmsg = "Expected " + self.name self.mayReturnEmpty = False - #self.myException.msg = self.errmsg self.mayIndexError = False self.caseless = caseless if caseless: self.caselessmatch = matchString.upper() identChars = identChars.upper() - self.identChars = _str2dict(identChars) + self.identChars = set(identChars) def parseImpl( self, instring, loc, doActions=True ): if self.caseless: @@ -1648,7 +1587,6 @@ class CaselessLiteral(Literal): self.returnString = matchString self.name = "'%s'" % self.returnString self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg def parseImpl( self, instring, loc, doActions=True ): if instring[ loc:loc+self.matchLen ].upper() == self.match: @@ -1680,18 +1618,25 @@ class Word(Token): defaults to the initial character set), and an optional minimum, maximum, and/or exact length. The default value for C{min} is 1 (a minimum value < 1 is not valid); the default values for C{max} and C{exact} - are 0, meaning no maximum or exact length restriction. + are 0, meaning no maximum or exact length restriction. An optional + C{exclude} parameter can list characters that might be found in + the input C{bodyChars} string; useful to define a word of all printables + except for one or two characters, for instance. """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): super(Word,self).__init__() + if excludeChars: + initChars = ''.join([c for c in initChars if c not in excludeChars]) + if bodyChars: + bodyChars = ''.join([c for c in bodyChars if c not in excludeChars]) self.initCharsOrig = initChars - self.initChars = _str2dict(initChars) + self.initChars = set(initChars) if bodyChars : self.bodyCharsOrig = bodyChars - self.bodyChars = _str2dict(bodyChars) + self.bodyChars = set(bodyChars) else: self.bodyCharsOrig = initChars - self.bodyChars = _str2dict(initChars) + self.bodyChars = set(initChars) self.maxSpecified = max > 0 @@ -1711,7 +1656,6 @@ class Word(Token): self.name = _ustr(self) self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg self.mayIndexError = False self.asKeyword = asKeyword @@ -1743,7 +1687,7 @@ class Word(Token): raise exc loc = result.end() - return loc,result.group() + return loc, result.group() if not(instring[ loc ] in self.initChars): #~ raise ParseException( instring, loc, self.errmsg ) @@ -1807,24 +1751,24 @@ class Regex(Token): """ compiledREtype = type(re.compile("[A-Z]")) def __init__( self, pattern, flags=0): - """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" + """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" super(Regex,self).__init__() if isinstance(pattern, basestring): - if len(pattern) == 0: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) - - self.pattern = pattern - self.flags = flags - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) - raise + if len(pattern) == 0: + warnings.warn("null string passed to Regex; use Empty() instead", + SyntaxWarning, stacklevel=2) + + self.pattern = pattern + self.flags = flags + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, stacklevel=2) + raise elif isinstance(pattern, Regex.compiledREtype): self.re = pattern @@ -1837,7 +1781,6 @@ class Regex(Token): self.name = _ustr(self) self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg self.mayIndexError = False self.mayReturnEmpty = True @@ -1929,7 +1872,8 @@ class QuotedString(Token): self.pattern += (r'|(?:%s)' % re.escape(escQuote)) if escChar: self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" + charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') + self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) try: @@ -1942,7 +1886,6 @@ class QuotedString(Token): self.name = _ustr(self) self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg self.mayIndexError = False self.mayReturnEmpty = True @@ -2014,7 +1957,6 @@ class CharsNotIn(Token): self.name = _ustr(self) self.errmsg = "Expected " + self.name self.mayReturnEmpty = ( self.minLen == 0 ) - #self.myException.msg = self.errmsg self.mayIndexError = False def parseImpl( self, instring, loc, doActions=True ): @@ -2077,7 +2019,6 @@ class White(Token): self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) self.mayReturnEmpty = True self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg self.minLen = min @@ -2150,7 +2091,6 @@ class LineStart(_PositionToken): super(LineStart,self).__init__() self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) self.errmsg = "Expected start of line" - #self.myException.msg = self.errmsg def preParse( self, instring, loc ): preloc = super(LineStart,self).preParse(instring,loc) @@ -2175,7 +2115,6 @@ class LineEnd(_PositionToken): super(LineEnd,self).__init__() self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) self.errmsg = "Expected end of line" - #self.myException.msg = self.errmsg def parseImpl( self, instring, loc, doActions=True ): if loc" ] ) tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr + \ + openTag = Suppress("<") + tagStr("tag") + \ Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ Optional( Suppress("=") + tagAttrValue ) ))) + \ Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") @@ -3508,19 +3464,21 @@ def makeXMLTags(tagStr): def withAttribute(*args,**attrDict): """Helper to create a validating parse action to be used with start tags created - with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag + with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag with a required attribute value, to avoid false matches on common tags such as - or
. + C{} or C{
}. - Call withAttribute with a series of attribute names and values. Specify the list + Call C{withAttribute} with a series of attribute names and values. Specify the list of filter attributes names and values as: - - keyword arguments, as in (class="Customer",align="right"), or + - keyword arguments, as in C{(align="right")}, or + - as an explicit dict with C{**} operator, when an attribute name is also a Python + reserved word, as in C{**{"class":"Customer", "align":"right"}} - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. To verify that the attribute exists, but without specifying a value, pass - withAttribute.ANY_VALUE as the value. + C{withAttribute.ANY_VALUE} as the value. """ if args: attrs = args[:] @@ -3631,12 +3589,12 @@ def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.cop expression will capture all whitespace-delimited content between delimiters as a list of separate values. - Use the ignoreExpr argument to define expressions that may contain + Use the C{ignoreExpr} argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an Or or MatchFirst. - The default is quotedString, but if no expressions are to be ignored, - then pass None for this argument. + expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. + The default is L{quotedString}, but if no expressions are to be ignored, + then pass C{None} for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") @@ -3683,7 +3641,7 @@ def indentedBlock(blockStatementExpr, indentStack, indent=True): the current level; set to False for block of left-most statements (default=True) - A valid block must contain at least one blockStatement. + A valid block must contain at least one C{blockStatement}. """ def checkPeerIndent(s,l,t): if l >= len(s): return