diff --git a/src/calibre/utils/pyparsing.py b/src/calibre/utils/pyparsing.py index 9be97dc287..149ccaf1b0 100644 --- a/src/calibre/utils/pyparsing.py +++ b/src/calibre/utils/pyparsing.py @@ -58,8 +58,8 @@ The pyparsing module handles some of the problems that are typically vexing when - embedded comments """ -__version__ = "1.5.6" -__versionTime__ = "26 June 2011 10:53" +__version__ = "1.5.7" +__versionTime__ = "17 November 2012 16:18" __author__ = "Paul McGuire " import string @@ -81,66 +81,51 @@ __all__ = [ 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', 'originalTextFor', +'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', ] -""" -Detect if we are running version 3.X and make appropriate changes -Robert A. Clark -""" -_PY3K = sys.version_info[0] > 2 -if _PY3K: - _MAX_INT = sys.maxsize - basestring = str - unichr = chr - _ustr = str - alphas = string.ascii_lowercase + string.ascii_uppercase -else: - _MAX_INT = sys.maxint - range = xrange - set = lambda s : dict( [(c,0) for c in s] ) - alphas = string.lowercase + string.uppercase +_MAX_INT = sys.maxint +range = xrange +set = lambda s : dict( [(c,0) for c in s] ) - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - if isinstance(obj,unicode): - return obj +def _ustr(obj): + """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. + """ + if isinstance(obj,unicode): + return obj - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) + try: + # If this works, then _ustr(obj) has the same behaviour as str(obj), so + # it won't break any existing code. + return str(obj) - except UnicodeEncodeError: - # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) - # state that "The return value must be a string object". However, does a - # unicode object (being a subclass of basestring) count as a "string - # object"? - # If so, then return a unicode object: - return unicode(obj) - # Else encode it... but how? There are many choices... :) - # Replace unprintables with escape codes? - #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') - # Replace unprintables with question marks? - #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') - # ... - - alphas = string.lowercase + string.uppercase + except UnicodeEncodeError: + # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) + # state that "The return value must be a string object". However, does a + # unicode object (being a subclass of basestring) count as a "string + # object"? + # If so, then return a unicode object: + return unicode(obj) + # Else encode it... but how? There are many choices... :) + # Replace unprintables with escape codes? + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') + # Replace unprintables with question marks? + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') + # ... # build list of single arg builtins, tolerant of Python version, that can be used as parse actions singleArgBuiltins = [] import __builtin__ -for fname in "sum len enumerate sorted reversed list tuple set any all".split(): +for fname in "sum len sorted reversed list tuple set any all min max".split(): try: singleArgBuiltins.append(getattr(__builtin__,fname)) except AttributeError: @@ -159,7 +144,8 @@ def _xml_escape(data): class _Constants(object): pass -nums = string.digits +alphas = string.ascii_lowercase + string.ascii_uppercase +nums = "0123456789" hexnums = nums + "ABCDEFabcdef" alphanums = alphas + nums _bslash = chr(92) @@ -211,7 +197,7 @@ class ParseBaseException(Exception): return line_str.strip() def __dir__(self): return "loc msg pstr parserElement lineno col line " \ - "markInputLine __str__ __repr__".split() + "markInputline __str__ __repr__".split() class ParseException(ParseBaseException): """exception thrown when parse expressions don't match class; @@ -228,8 +214,8 @@ class ParseFatalException(ParseBaseException): pass class ParseSyntaxException(ParseFatalException): - """just like C{ParseFatalException}, but thrown internally when an - C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because + """just like C{L{ParseFatalException}}, but thrown internally when an + C{L{ErrorStop}} ('-' operator) indicates that parsing is to stop immediately because an unbacktrackable syntax error has been found""" def __init__(self, pe): super(ParseSyntaxException, self).__init__( @@ -444,16 +430,13 @@ class ParseResults(object): return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) def __str__( self ): - out = "[" - sep = "" + out = [] for i in self.__toklist: if isinstance(i, ParseResults): - out += sep + _ustr(i) + out.append(_ustr(i)) else: - out += sep + repr(i) - sep = ", " - out += "]" - return out + out.append(repr(i)) + return '[' + ', '.join(out) + ']' def _asStringList( self, sep='' ): out = [] @@ -616,7 +599,7 @@ class ParseResults(object): self.__parent = None def __dir__(self): - return dir(super(ParseResults,self)) + self.keys() + return dir(super(ParseResults,self)) + list(self.keys()) def col (loc,strg): """Returns current column within a string, counting newlines as line separators. @@ -624,7 +607,7 @@ def col (loc,strg): Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a + on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ @@ -636,7 +619,7 @@ def lineno(loc,strg): Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a + on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ @@ -666,33 +649,23 @@ def nullDebugAction(*args): pass 'decorator to trim function calls to match the arity of the target' -if not _PY3K: - def _trim_arity(func, maxargs=2): - limit = [0] - def wrapper(*args): - while 1: - try: - return func(*args[limit[0]:]) - except TypeError: - if limit[0] <= maxargs: - limit[0] += 1 - continue - raise - return wrapper -else: - def _trim_arity(func, maxargs=2): - limit = maxargs - def wrapper(*args): - #~ nonlocal limit - while 1: - try: - return func(*args[limit:]) - except TypeError: - if limit: - limit -= 1 - continue - raise - return wrapper +def _trim_arity(func, maxargs=2): + if func in singleArgBuiltins: + return lambda s,l,t: func(t) + limit = [0] + foundArity = [False] + def wrapper(*args): + while 1: + try: + ret = func(*args[limit[0]:]) + foundArity[0] = True + return ret + except TypeError: + if limit[0] <= maxargs and not foundArity[0]: + limit[0] += 1 + continue + raise + return wrapper class ParserElement(object): """Abstract base level parser element class.""" @@ -705,6 +678,13 @@ class ParserElement(object): ParserElement.DEFAULT_WHITE_CHARS = chars setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) + def inlineLiteralsUsing(cls): + """ + Set class to be used for inclusion of string literals into a parser. + """ + ParserElement.literalStringClass = cls + inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) + def __init__( self, savelist=False ): self.parseAction = list() self.failAction = None @@ -789,14 +769,14 @@ class ParserElement(object): C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: - s = the original string being parsed (see note below) - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a ParseResults object + - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object If the functions in fns modify the tokens, they can return them as the return value from fn, and the modified list of tokens will replace the original. Otherwise, fn does not need to return any value. Note: the default parsing behavior is to expand tabs in the input string before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a + on parsing strings containing C{}s, and suggested methods to maintain a consistent view of the parsed string, the parse location, and line and column positions within the parsed string. """ @@ -818,7 +798,7 @@ class ParserElement(object): - loc = location where expression match was attempted and failed - expr = the parse expression that failed - err = the exception thrown - The function returns no value. It may throw C{ParseFatalException} + The function returns no value. It may throw C{L{ParseFatalException}} if it is desired to stop parsing immediately.""" self.failAction = fn return self @@ -872,15 +852,12 @@ class ParserElement(object): loc,tokens = self.parseImpl( instring, preloc, doActions ) except IndexError: raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException: + except ParseBaseException, err: #~ print ("Exception raised:", err) err = None if self.debugActions[2]: - err = sys.exc_info()[1] self.debugActions[2]( instring, tokensStart, self, err ) if self.failAction: - if err is None: - err = sys.exc_info()[1] self.failAction( instring, tokensStart, self, err ) raise else: @@ -910,10 +887,9 @@ class ParserElement(object): self.resultsName, asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), modal=self.modalResults ) - except ParseBaseException: + except ParseBaseException, err: #~ print "Exception raised in user parse action:", err if (self.debugActions[2] ): - err = sys.exc_info()[1] self.debugActions[2]( instring, tokensStart, self, err ) raise else: @@ -952,8 +928,7 @@ class ParserElement(object): value = self._parseNoCache( instring, loc, doActions, callPreParse ) ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) return value - except ParseBaseException: - pe = sys.exc_info()[1] + except ParseBaseException, pe: ParserElement._exprArgCache[ lookup ] = pe raise @@ -994,7 +969,7 @@ class ParserElement(object): If you want the grammar to require that the entire input string be successfully parsed, then set C{parseAll} to True (equivalent to ending - the grammar with C{StringEnd()}). + the grammar with C{L{StringEnd()}}). Note: C{parseString} implicitly calls C{expandtabs()} on the input string, in order to report proper column numbers in parse actions. @@ -1023,12 +998,11 @@ class ParserElement(object): loc = self.preParse( instring, loc ) se = Empty() + StringEnd() se._parse( instring, loc ) - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc else: return tokens @@ -1076,16 +1050,15 @@ class ParserElement(object): loc = nextLoc else: loc = preloc+1 - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc def transformString( self, instring ): - """Extension to C{scanString}, to modify matching text with modified tokens that may + """Extension to C{L{scanString}}, to modify matching text with modified tokens that may be returned from a parse action. To use C{transformString}, define a grammar and attach a parse action to it that modifies the returned token list. Invoking C{transformString()} on a target string will then scan for matches, @@ -1110,33 +1083,31 @@ class ParserElement(object): out.append(instring[lastE:]) out = [o for o in out if o] return "".join(map(_ustr,_flatten(out))) - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc def searchString( self, instring, maxMatches=_MAX_INT ): - """Another extension to C{scanString}, simplifying the access to the tokens found + """Another extension to C{L{scanString}}, simplifying the access to the tokens found to match the given parse expression. May be called with optional C{maxMatches} argument, to clip searching after 'n' matches are found. """ try: return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - except ParseBaseException: + except ParseBaseException, exc: if ParserElement.verbose_stacktrace: raise else: # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] raise exc def __add__(self, other ): - """Implementation of + operator - returns And""" + """Implementation of + operator - returns C{L{And}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1144,9 +1115,9 @@ class ParserElement(object): return And( [ self, other ] ) def __radd__(self, other ): - """Implementation of + operator when left operand is not a C{ParserElement}""" + """Implementation of + operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1154,9 +1125,9 @@ class ParserElement(object): return other + self def __sub__(self, other): - """Implementation of - operator, returns C{And} with error stop""" + """Implementation of - operator, returns C{L{And}} with error stop""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1164,9 +1135,9 @@ class ParserElement(object): return And( [ self, And._ErrorStop(), other ] ) def __rsub__(self, other ): - """Implementation of - operator when left operand is not a C{ParserElement}""" + """Implementation of - operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1179,12 +1150,12 @@ class ParserElement(object): tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples may also include C{None} as in: - C{expr*(n,None)} or C{expr*(n,)} is equivalent - to C{expr*n + ZeroOrMore(expr)} + to C{expr*n + L{ZeroOrMore}(expr)} (read as "at least n instances of C{expr}") - C{expr*(None,n)} is equivalent to C{expr*(0,n)} (read as "0 to n instances of C{expr}") - - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} - - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} + - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} + - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} Note that C{expr*(None,n)} does not raise an exception if more than n exprs exist in the input stream; that is, @@ -1245,9 +1216,9 @@ class ParserElement(object): return self.__mul__(other) def __or__(self, other ): - """Implementation of | operator - returns C{MatchFirst}""" + """Implementation of | operator - returns C{L{MatchFirst}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1255,9 +1226,9 @@ class ParserElement(object): return MatchFirst( [ self, other ] ) def __ror__(self, other ): - """Implementation of | operator when left operand is not a C{ParserElement}""" + """Implementation of | operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1265,9 +1236,9 @@ class ParserElement(object): return other | self def __xor__(self, other ): - """Implementation of ^ operator - returns C{Or}""" + """Implementation of ^ operator - returns C{L{Or}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1275,9 +1246,9 @@ class ParserElement(object): return Or( [ self, other ] ) def __rxor__(self, other ): - """Implementation of ^ operator when left operand is not a C{ParserElement}""" + """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1285,9 +1256,9 @@ class ParserElement(object): return other ^ self def __and__(self, other ): - """Implementation of & operator - returns C{Each}""" + """Implementation of & operator - returns C{L{Each}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1295,9 +1266,9 @@ class ParserElement(object): return Each( [ self, other ] ) def __rand__(self, other ): - """Implementation of & operator when left operand is not a C{ParserElement}""" + """Implementation of & operator when left operand is not a C{L{ParserElement}}""" if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) if not isinstance( other, ParserElement ): warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), SyntaxWarning, stacklevel=2) @@ -1305,11 +1276,11 @@ class ParserElement(object): return other & self def __invert__( self ): - """Implementation of ~ operator - returns C{NotAny}""" + """Implementation of ~ operator - returns C{L{NotAny}}""" return NotAny( self ) def __call__(self, name): - """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: + """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") could be written as:: userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") @@ -1403,15 +1374,17 @@ class ParserElement(object): try: file_contents = file_or_filename.read() except AttributeError: - f = open(file_or_filename, "rb") + f = open(file_or_filename, "r") file_contents = f.read() f.close() try: return self.parseString(file_contents, parseAll) - except ParseBaseException: - # catch and re-raise exception from here, clears out pyparsing internal stack trace - exc = sys.exc_info()[1] - raise exc + except ParseBaseException, exc: + if ParserElement.verbose_stacktrace: + raise + else: + # catch and re-raise exception from here, clears out pyparsing internal stack trace + raise exc def getException(self): return ParseException("",0,self.errmsg,self) @@ -1515,10 +1488,11 @@ class Literal(Token): exc.pstr = instring raise exc _L = Literal +ParserElement.literalStringClass = Literal class Keyword(Token): """Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with C{Literal}:: + immediately followed by a non-keyword character. Compare with C{L{Literal}}:: Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} Accepts two optional constructor arguments in addition to the keyword string: @@ -1821,9 +1795,9 @@ class QuotedString(Token): - quoteChar - string of one or more characters defining the quote delimiting string - escChar - character to escape quotes, typically backslash (default=None) - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - - multiline - boolean indicating whether quotes can span multiple lines (default=False) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) """ super(QuotedString,self).__init__() @@ -2003,7 +1977,7 @@ class White(Token): by pyparsing grammars. This class is included when some whitespace structures are significant. Define with a string containing the whitespace characters to be matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, - as defined for the C{Word} class.""" + as defined for the C{L{Word}} class.""" whiteStrs = { " " : "", "\t": "", @@ -2331,7 +2305,8 @@ class And(ParseExpression): class _ErrorStop(Empty): def __init__(self, *args, **kwargs): - super(Empty,self).__init__(*args, **kwargs) + super(And._ErrorStop,self).__init__(*args, **kwargs) + self.name = '-' self.leaveWhitespace() def __init__( self, exprs, savelist = True ): @@ -2359,8 +2334,7 @@ class And(ParseExpression): loc, exprtokens = e._parse( instring, loc, doActions ) except ParseSyntaxException: raise - except ParseBaseException: - pe = sys.exc_info()[1] + except ParseBaseException, pe: raise ParseSyntaxException(pe) except IndexError: raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) @@ -2412,8 +2386,7 @@ class Or(ParseExpression): for e in self.exprs: try: loc2 = e.tryParse( instring, loc ) - except ParseException: - err = sys.exc_info()[1] + except ParseException, err: if err.loc > maxExcLoc: maxException = err maxExcLoc = err.loc @@ -2436,7 +2409,7 @@ class Or(ParseExpression): def __ixor__(self, other ): if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) return self.append( other ) #Or( [ self, other ] ) def __str__( self ): @@ -2495,7 +2468,7 @@ class MatchFirst(ParseExpression): def __ior__(self, other ): if isinstance( other, basestring ): - other = Literal( other ) + other = ParserElement.literalStringClass( other ) return self.append( other ) #MatchFirst( [ self, other ] ) def __str__( self ): @@ -2916,13 +2889,14 @@ class Forward(ParseElementEnhance): thereby leaving b and c out as parseable alternatives. It is recommended that you explicitly group the values inserted into the C{Forward}:: fwdExpr << (a | b | c) + Converting to use the '<<=' operator instead will avoid this problem. """ def __init__( self, other=None ): super(Forward,self).__init__( other, savelist=False ) def __lshift__( self, other ): if isinstance( other, basestring ): - other = Literal(other) + other = ParserElement.literalStringClass(other) self.expr = other self.mayReturnEmpty = other.mayReturnEmpty self.strRepr = None @@ -2933,7 +2907,8 @@ class Forward(ParseElementEnhance): self.saveAsList = self.expr.saveAsList self.ignoreExprs.extend(self.expr.ignoreExprs) return None - + __ilshift__ = __lshift__ + def leaveWhitespace( self ): self.skipWhitespace = False return self @@ -2993,7 +2968,7 @@ class Upcase(TokenConverter): DeprecationWarning,stacklevel=2) def postParse( self, instring, loc, tokenlist ): - return list(map( string.upper, tokenlist )) + return list(map( str.upper, tokenlist )) class Combine(TokenConverter): @@ -3029,7 +3004,7 @@ class Combine(TokenConverter): return retToks class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for returning tokens of C{ZeroOrMore} and C{OneOrMore} expressions.""" + """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" def __init__( self, expr ): super(Group,self).__init__( expr ) self.saveAsList = True @@ -3105,8 +3080,7 @@ def traceParseAction(f): sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) try: ret = f(*paArgs) - except Exception: - exc = sys.exc_info()[1] + except Exception, exc: sys.stderr.write( "<}()}. """ def _replFunc(*args): return [replStr] @@ -3398,7 +3372,7 @@ def downcaseTokens(s,l,t): return [ tt.lower() for tt in map(_ustr,t) ] def keepOriginalText(s,startLoc,t): - """DEPRECATED - use new helper method C{originalTextFor}. + """DEPRECATED - use new helper method C{L{originalTextFor}}. Helper parse action to preserve original parsed text, overriding any nested parse actions.""" try: @@ -3464,7 +3438,7 @@ def makeXMLTags(tagStr): def withAttribute(*args,**attrDict): """Helper to create a validating parse action to be used with start tags created - with C{makeXMLTags} or C{makeHTMLTags}. Use C{withAttribute} to qualify a starting tag + with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag with a required attribute value, to avoid false matches on common tags such as C{} or C{
}. @@ -3499,7 +3473,7 @@ opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() -def operatorPrecedence( baseExpr, opList ): +def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached @@ -3518,13 +3492,15 @@ def operatorPrecedence( baseExpr, opList ): be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the pyparsing-defined - constants opAssoc.RIGHT and opAssoc.LEFT. + constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted) + - lpar - expression for matching left-parentheses (default=Suppress('(')) + - rpar - expression for matching right-parentheses (default=Suppress(')')) """ ret = Forward() - lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) + lastExpr = baseExpr | ( lpar + ret + rpar ) for i,operDef in enumerate(opList): opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] if arity == 3: @@ -3569,6 +3545,7 @@ def operatorPrecedence( baseExpr, opList ): lastExpr = thisExpr ret << lastExpr return ret +operatorPrecedence = infixNotation dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") @@ -3715,8 +3692,7 @@ if __name__ == "__main__": print ("tokens.columns = " + str(tokens.columns)) print ("tokens.tables = " + str(tokens.tables)) print (tokens.asXML("SQL",True)) - except ParseBaseException: - err = sys.exc_info()[1] + except ParseBaseException, err: print (teststring + "->") print (err.line) print (" "*(err.column-1) + "^")