diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 12b17b7f0a..180433532d 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -2,7 +2,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' __appname__ = 'calibre' -__version__ = '0.4.127' +__version__ = '0.4.128' __author__ = "Kovid Goyal " ''' Various run time constants. diff --git a/src/calibre/devices/cybookg3/driver.py b/src/calibre/devices/cybookg3/driver.py index 3737de3d7d..6c026c8669 100644 --- a/src/calibre/devices/cybookg3/driver.py +++ b/src/calibre/devices/cybookg3/driver.py @@ -28,6 +28,7 @@ class CYBOOKG3(USBMS): STORAGE_CARD_VOLUME_LABEL = 'Cybook Gen 3 Storage Card' EBOOK_DIR_MAIN = "eBooks" + SUPPORTS_SUB_DIRS = True def delete_books(self, paths, end_session=True): for path in paths: @@ -46,3 +47,8 @@ class CYBOOKG3(USBMS): for filen in fnmatch.filter(files, filename + "*.t2b"): os.unlink(os.path.join(p, filen)) + try: + os.removedirs(os.path.dirname(path)) + except: + pass + diff --git a/src/calibre/devices/scanner.py b/src/calibre/devices/scanner.py index 26ea67ea44..f0cd9a66eb 100644 --- a/src/calibre/devices/scanner.py +++ b/src/calibre/devices/scanner.py @@ -60,8 +60,9 @@ class DeviceScanner(object): def is_device_connected(self, device): if iswindows: vid, pid = 'vid_%4.4x'%device.VENDOR_ID, 'pid_%4.4x'%device.PRODUCT_ID + vidd, pidd = 'vid_%i'%device.VENDOR_ID, 'pid_%i'%device.PRODUCT_ID for device_id in self.devices: - if vid in device_id and pid in device_id: + if (vid in device_id or vidd in device_id) and (pid in device_id or pidd in device_id): if self.test_bcd_windows(device_id, getattr(device, 'BCD', None)): if device.can_handle(device_id): return True diff --git a/src/calibre/devices/usbms/driver.py b/src/calibre/devices/usbms/driver.py index 234277ffaf..187a4c1a50 100644 --- a/src/calibre/devices/usbms/driver.py +++ b/src/calibre/devices/usbms/driver.py @@ -15,9 +15,10 @@ from calibre.devices.errors import FreeSpaceError from calibre.devices.mime import MIME_MAP class USBMS(Device): + FORMATS = [] EBOOK_DIR_MAIN = '' EBOOK_DIR_CARD = '' - FORMATS = [] + SUPPORTS_SUB_DIRS = False def __init__(self, key='-1', log_packets=False, report_progress=None): pass @@ -57,10 +58,18 @@ class USBMS(Device): path = os.path.join(self._main_prefix, self.EBOOK_DIR_MAIN) else: path = os.path.join(self._card_prefix, self.EBOOK_DIR_CARD) - - sizes = map(os.path.getsize, files) + + def get_size(obj): + if hasattr(obj, 'seek'): + obj.seek(0, os.SEEK_END) + size = obj.tell() + obj.seek(0) + return size + return os.path.getsize(obj) + + sizes = map(get_size, files) size = sum(sizes) - + if on_card and size > self.free_space()[2] - 1024*1024: raise FreeSpaceError(_("There is insufficient free space on the storage card")) if not on_card and size > self.free_space()[0] - 2*1024*1024: @@ -68,17 +77,42 @@ class USBMS(Device): paths = [] names = iter(names) + metadata = iter(metadata) for infile in files: - filepath = os.path.join(path, names.next()) + newpath = path + + if self.SUPPORTS_SUB_DIRS: + mdata = metadata.next() + + if 'tags' in mdata.keys(): + for tag in mdata['tags']: + if tag.startswith('/'): + newpath += tag + newpath = os.path.normpath(newpath) + break + + if not os.path.exists(newpath): + os.makedirs(newpath) + + filepath = os.path.join(newpath, names.next()) paths.append(filepath) - shutil.copy2(infile, filepath) + if hasattr(infile, 'read'): + infile.seek(0) + + dest = open(filepath, 'wb') + shutil.copyfileobj(infile, dest, 10*1024*1024) + + dest.flush() + dest.close() + else: + shutil.copy2(infile, filepath) return zip(paths, cycle([on_card])) @classmethod - def add_books_to_metadata(cls, locations, metadata, booklists): + def add_books_to_metadata(cls, locations, metadata, booklists): for location in locations: path = location[0] on_card = 1 if location[1] else 0 @@ -91,6 +125,10 @@ class USBMS(Device): if os.path.exists(path): # Delete the ebook os.unlink(path) + try: + os.removedirs(os.path.dirname(path)) + except: + pass @classmethod def remove_books_from_metadata(cls, paths, booklists): @@ -99,7 +137,6 @@ class USBMS(Device): for book in bl: if path.endswith(book.path): bl.remove(book) - break def sync_booklists(self, booklists, end_session=True): # There is no meta data on the device to update. The device is treated diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index 9165e99313..413bea4801 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -77,6 +77,8 @@ def check_links(opf_path, pretty_print): html_files.append(os.path.abspath(content(f))) for path in html_files: + if not os.access(path, os.R_OK): + continue base = os.path.dirname(path) root = html.fromstring(open(content(path), 'rb').read(), parser=parser) for element, attribute, link, pos in list(root.iterlinks()): diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index ad5b040e68..05b624f811 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -249,7 +249,7 @@ class MetaInformation(object): ans = u'' ans += u'Title : ' + unicode(self.title) + u'\n' if self.authors: - ans += u'Author : ' + (', '.join(self.authors) if self.authors is not None else u'None') + ans += u'Author : ' + (' & '.join(self.authors) if self.authors is not None else _('Unknown')) ans += ((' [' + self.author_sort + ']') if self.author_sort else '') + u'\n' if self.publisher: ans += u'Publisher: '+ unicode(self.publisher) + u'\n' diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index 31b9ef99b3..63ef7b0b14 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -33,7 +33,7 @@ class EXTHHeader(object): self.length, self.num_items = struct.unpack('>LL', raw[4:12]) raw = raw[12:] pos = 0 - self.mi = MetaInformation('Unknown', ['Unknown']) + self.mi = MetaInformation(_('Unknown'), [_('Unknown')]) self.has_fake_cover = True for i in range(self.num_items): @@ -63,7 +63,9 @@ class EXTHHeader(object): def process_metadata(self, id, content, codec): if id == 100: - self.mi.authors = [content.decode(codec, 'ignore').strip()] + if self.mi.authors == [_('Unknown')]: + self.mi.authors = [] + self.mi.authors.append(content.decode(codec, 'ignore').strip()) elif id == 101: self.mi.publisher = content.decode(codec, 'ignore').strip() elif id == 103: diff --git a/src/calibre/gui2/dialogs/job_view.ui b/src/calibre/gui2/dialogs/job_view.ui index 953887ce7a..ffd4419da9 100644 --- a/src/calibre/gui2/dialogs/job_view.ui +++ b/src/calibre/gui2/dialogs/job_view.ui @@ -28,9 +28,6 @@ true - - 400 - diff --git a/src/calibre/gui2/images/news/tomshardware_de.png b/src/calibre/gui2/images/news/tomshardware_de.png new file mode 100644 index 0000000000..c31412474c Binary files /dev/null and b/src/calibre/gui2/images/news/tomshardware_de.png differ diff --git a/src/calibre/library/static/gui.js b/src/calibre/library/static/gui.js index b0a88db7f9..17f6f7f8fd 100644 --- a/src/calibre/library/static/gui.js +++ b/src/calibre/library/static/gui.js @@ -7,8 +7,8 @@ var column_titles = { 'rating' : 'Rating', 'date' : 'Date', 'tags' : 'Tags', - 'series' : 'Series', -} + 'series' : 'Series' +}; String.prototype.format = function() { var pattern = /\{\d+\}/g; @@ -47,7 +47,7 @@ function render_book(book) { // Render title cell var title = '{0}'.format(book.attr("title")) + '
'; var id = book.attr("id"); - var comments = $.trim(book.text()).replace(/\n\n/, '
'); + var comments = $.trim(book.text()).replace(/\n\n/, '
'); var formats = new Array(); var size = (parseFloat(book.attr('size'))/(1024*1024)).toFixed(1); var tags = book.attr('tags').replace(/,/g, ', '); @@ -70,22 +70,22 @@ function render_book(book) { authors += jQuery.trim(_authors[i]).replace(/ /g, ' ')+'
'; } if (authors) { authors = authors.slice(0, authors.length-6); } - + // Render rating cell var _rating = parseFloat(book.attr('rating'))/2.; var rating = ''; for (i = 0; i < _rating; i++) { rating += '★'} - + // Render date cell var _date = Date.parseExact(book.attr('timestamp'), 'yyyy/MM/dd HH:mm:ss'); var date = _date.toString('d MMM yyyy').replace(/ /g, ' '); - + // Render series cell var series = book.attr("series") if (series) { series += ' [{0}]'.format(book.attr('series_index')); } - + var cells = { 'title' : title, 'authors' : authors, @@ -93,12 +93,12 @@ function render_book(book) { 'date' : date, 'series' : series }; - + var row = ''; for (i = 0; i < cmap.length; i++) { row += '{1}'.format(cmap[i], cells[cmap[i]]); } - return '{1}'.format(id, row); + return '{1}'.format(id, row); } function fetch_library_books(start, num, timeout, sort, order, search) { @@ -112,15 +112,15 @@ function fetch_library_books(start, num, timeout, sort, order, search) { last_search = search; last_sort = sort; last_sort_order = order; - + if (current_library_request != null) { current_library_request.abort(); current_library_request = null; } - + $('#cover_pane').css('visibility', 'hidden'); $('#loading').css('visibility', 'visible'); - + current_library_request = $.ajax({ type: "GET", url: "library", @@ -128,18 +128,18 @@ function fetch_library_books(start, num, timeout, sort, order, search) { cache: false, timeout: timeout, //milliseconds dataType: "xml", - + error : function(XMLHttpRequest, textStatus, errorThrown) { - alert('Error: '+textStatus+'\n\n'+errorThrown); + alert('Error: '+textStatus+'\n\n'+errorThrown); }, - + success : function(xml, textStatus) { var library = $(xml).find('library'); total = parseInt(library.attr('total')); var num = parseInt(library.attr('num')); var start = parseInt(library.attr('start')); update_count_bar(start, num, total); - var display = ''; + var display = ''; library.find('book').each( function() { var book = $(this); var row = render_book(book); @@ -170,18 +170,18 @@ function fetch_library_books(start, num, timeout, sort, order, search) { $('#cover_pane').css('visibility', 'visible'); } }); - - + + layout(); $('#book_list tbody tr:even()').css('background-color', '#eeeeee'); }, - + complete : function(XMLHttpRequest, textStatus) { current_library_request = null; document.getElementById('main').scrollTop = 0; $('#loading').css('visibility', 'hidden'); } - + }); } @@ -196,7 +196,7 @@ function update_count_bar(start, num, total) { left.css('opacity', (start <= 0) ? 0.3 : 1); var right = cb.find('#right'); right.css('opacity', (start + num >= total) ? 0.3 : 1); - + } function setup_count_bar() { @@ -205,7 +205,7 @@ function setup_count_bar() { fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search); } }); - + $('#count_bar * img:eq(1)').click(function(){ if (last_start > 0) { var new_start = last_start - last_num; @@ -215,14 +215,14 @@ function setup_count_bar() { fetch_library_books(new_start, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search); } }); - + $('#count_bar * img:eq(2)').click(function(){ if (last_start + last_num < total) { var new_start = last_start + last_num; fetch_library_books(new_start, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search); } }); - + $('#count_bar * img:eq(3)').click(function(){ if (total - last_num > 0) { fetch_library_books(total - last_num, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, last_search); @@ -234,7 +234,7 @@ function setup_count_bar() { function search() { var search = $.trim($('#search_box * #s').val()); - fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT, + fetch_library_books(0, last_num, LIBRARY_FETCH_TIMEOUT, last_sort, last_sort_order, search); } @@ -245,11 +245,11 @@ function setup_sorting() { $('table#book_list thead tr td').mouseover(function() { this.style.backgroundColor = "#fff2a8"; }); - + $('table#book_list thead tr td').mouseout(function() { this.style.backgroundColor = "inherit"; }); - + for (i = 0; i < cmap.length; i++) { $('table#book_list span#{0}_sort'.format(cmap[i])).parent().click(function() { var sort_indicator = $($(this).find('span')); @@ -258,7 +258,7 @@ function setup_sorting() { var col = id.slice(0, id.indexOf("_")); var order = 'ascending'; var html = '↑'; - + if (sort_indicator.html() == '↑') { order = 'descending'; html = '↓'; } @@ -291,13 +291,13 @@ function layout() { $(function() { // document is ready create_table_headers(); - + // Setup widgets setup_sorting(); setup_count_bar(); $('#search_box * #s').val(''); $(window).resize(layout); - + $($('#book_list * span#date_sort').parent()).click(); }); diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 4b3c217698..0fb1bc8996 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -102,7 +102,7 @@ Device Integration What devices does |app| support? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -At the moment |app| has full support for the SONY PRS 500/505/700 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. +At the moment |app| has full support for the SONY PRS 500/505/700, Cybook Gen 3 as well as the iPhone. In addition, using the :guilabel:`Save to disk` function you can use it with any ebook reader that exports itself as a USB disk. I used |app| to transfer some books to my reader, and now the SONY software hangs every time I connect the reader? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/calibre/parallel.py b/src/calibre/parallel.py index 45ceff5132..6cbe1c96e4 100644 --- a/src/calibre/parallel.py +++ b/src/calibre/parallel.py @@ -286,7 +286,7 @@ def write(socket, msg, timeout=5): def read(socket, timeout=5): ''' Read a message from `socket`. The message must have been sent with the :function:`write` - function. Raises a `RuntimeError` if the message is corrpted. Can return an + function. Raises a `RuntimeError` if the message is corrupted. Can return an empty string. ''' if isworker: @@ -299,7 +299,12 @@ def read(socket, timeout=5): if not msg: break if length is None: - length, msg = int(msg[:12]), msg[12:] + try: + length, msg = int(msg[:12]), msg[12:] + except ValueError: + if DEBUG: + print >>sys.__stdout__, 'read(%s):'%('worker' if isworker else 'overseer'), 'no length in', msg + return '' buf.write(msg) if buf.tell() >= length: break diff --git a/src/calibre/utils/pyparsing.py b/src/calibre/utils/pyparsing.py index 85e8c873dd..5404758186 100644 --- a/src/calibre/utils/pyparsing.py +++ b/src/calibre/utils/pyparsing.py @@ -1,3600 +1,3670 @@ -# module pyparsing.py -# -# Copyright (c) 2003-2008 Paul T. McGuire -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -#from __future__ import generators - -__doc__ = \ -""" -pyparsing module - Classes and methods to define and execute parsing grammars - -The pyparsing module is an alternative approach to creating and executing simple grammars, -vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you -don't need to learn a new syntax for defining grammars or matching expressions - the parsing module -provides a library of classes that you use to construct the grammar directly in Python. - -Here is a program to parse "Hello, World!" (or any greeting of the form ", !"):: - - from pyparsing import Word, alphas - - # define grammar of a greeting - greet = Word( alphas ) + "," + Word( alphas ) + "!" - - hello = "Hello, World!" - print hello, "->", greet.parseString( hello ) - -The program outputs the following:: - - Hello, World! -> ['Hello', ',', 'World', '!'] - -The Python representation of the grammar is quite readable, owing to the self-explanatory -class names, and the use of '+', '|' and '^' operators. - -The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an -object with named attributes. - -The pyparsing module handles some of the problems that are typically vexing when writing text parsers: - - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) - - quoted strings - - embedded comments -""" - -__version__ = "1.5.0" -__versionTime__ = "28 May 2008 10:05" -__author__ = "Paul McGuire " - -import string -from weakref import ref as wkref -import copy,sys -import warnings -import re -import sre_constants -import xml.sax.saxutils -#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) - -__all__ = [ -'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', -'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', -'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', -'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', -'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', -'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', -'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', -'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', -'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', -'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', -'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', -'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', -'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', -'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', -'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', -'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', -'indentedBlock', -] - - -""" -Detect if we are running version 3.X and make appropriate changes -Robert A. Clark -""" -if sys.version_info[0] > 2: - _PY3K = True - _MAX_INT = sys.maxsize - basestring = str -else: - _PY3K = False - _MAX_INT = sys.maxint - -if not _PY3K: - def _ustr(obj): - """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries - str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It - then < returns the unicode object | encodes it with the default encoding | ... >. - """ - try: - # If this works, then _ustr(obj) has the same behaviour as str(obj), so - # it won't break any existing code. - return str(obj) - - except UnicodeEncodeError: - # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) - # state that "The return value must be a string object". However, does a - # unicode object (being a subclass of basestring) count as a "string - # object"? - # If so, then return a unicode object: - return unicode(obj) - # Else encode it... but how? There are many choices... :) - # Replace unprintables with escape codes? - #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') - # Replace unprintables with question marks? - #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') - # ... -else: - _ustr = str - -def _str2dict(strg): - return dict( [(c,0) for c in strg] ) - #~ return set( [c for c in strg] ) - -class _Constants(object): - pass - -if not _PY3K: - alphas = string.lowercase + string.uppercase -else: - alphas = string.ascii_lowercase + string.ascii_uppercase -nums = string.digits -hexnums = nums + "ABCDEFabcdef" -alphanums = alphas + nums -_bslash = "\\" -printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) - -class ParseBaseException(Exception): - """base exception class for all parsing runtime exceptions""" - __slots__ = ( "loc","msg","pstr","parserElement" ) - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, pstr, loc=0, msg=None, elem=None ): - self.loc = loc - if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr - self.parserElement = elem - - def __getattr__( self, aname ): - """supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - if( aname == "lineno" ): - return lineno( self.loc, self.pstr ) - elif( aname in ("col", "column") ): - return col( self.loc, self.pstr ) - elif( aname == "line" ): - return line( self.loc, self.pstr ) - else: - raise AttributeError(aname) - - def __str__( self ): - return "%s (at char %d), (line:%d, col:%d)" % \ - ( self.msg, self.loc, self.lineno, self.column ) - def __repr__( self ): - return _ustr(self) - def markInputline( self, markerString = ">!<" ): - """Extracts the exception line from the input string, and marks - the location of the exception with a special symbol. - """ - line_str = self.line - line_column = self.column - 1 - if markerString: - line_str = "".join( [line_str[:line_column], - markerString, line_str[line_column:]]) - return line_str.strip() - -class ParseException(ParseBaseException): - """exception thrown when parse expressions don't match class; - supported attributes by name are: - - lineno - returns the line number of the exception text - - col - returns the column number of the exception text - - line - returns the line containing the exception text - """ - pass - -class ParseFatalException(ParseBaseException): - """user-throwable exception thrown when inconsistent parse content - is found; stops all parsing immediately""" - pass - -class ParseSyntaxException(ParseFatalException): - """just like ParseFatalException, but thrown internally when an - ErrorStop indicates that parsing is to stop immediately because - an unbacktrackable syntax error has been found""" - def __init__(self, pe): - ParseFatalException.__init__(self, pe.pstr, pe.loc, pe.msg, pe.parserElement) - -#~ class ReparseException(ParseBaseException): - #~ """Experimental class - parse actions can raise this exception to cause - #~ pyparsing to reparse the input string: - #~ - with a modified input string, and/or - #~ - with a modified start location - #~ Set the values of the ReparseException in the constructor, and raise the - #~ exception in a parse action to cause pyparsing to use the new string/location. - #~ Setting the values as None causes no change to be made. - #~ """ - #~ def __init_( self, newstring, restartLoc ): - #~ self.newParseText = newstring - #~ self.reparseLoc = restartLoc - -class RecursiveGrammarException(Exception): - """exception thrown by validate() if the grammar could be improperly recursive""" - def __init__( self, parseElementList ): - self.parseElementTrace = parseElementList - - def __str__( self ): - return "RecursiveGrammarException: %s" % self.parseElementTrace - -class _ParseResultsWithOffset(object): - def __init__(self,p1,p2): - self.tup = (p1,p2) - def __getitem__(self,i): - return self.tup[i] - def __repr__(self): - return repr(self.tup) - -class ParseResults(object): - """Structured parse results, to provide multiple means of access to the parsed data: - - as a list (len(results)) - - by list index (results[0], results[1], etc.) - - by attribute (results.) - """ - __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) - def __new__(cls, toklist, name=None, asList=True, modal=True ): - if isinstance(toklist, cls): - return toklist - retobj = object.__new__(cls) - retobj.__doinit = True - return retobj - - # Performance tuning: we construct a *lot* of these, so keep this - # constructor as small and fast as possible - def __init__( self, toklist, name=None, asList=True, modal=True ): - if self.__doinit: - self.__doinit = False - self.__name = None - self.__parent = None - self.__accumNames = {} - if isinstance(toklist, list): - self.__toklist = toklist[:] - else: - self.__toklist = [toklist] - self.__tokdict = dict() - - # this line is related to debugging the asXML bug - #~ asList = False - - if name: - if not modal: - self.__accumNames[name] = 0 - if isinstance(name,int): - name = _ustr(name) # will always return a str, but use _ustr for consistency - self.__name = name - if not toklist in (None,'',[]): - if isinstance(toklist,basestring): - toklist = [ toklist ] - if asList: - if isinstance(toklist,ParseResults): - self[name] = _ParseResultsWithOffset(toklist.copy(),-1) - else: - self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1) - self[name].__name = name - else: - try: - self[name] = toklist[0] - except (KeyError,TypeError): - self[name] = toklist - - def __getitem__( self, i ): - if isinstance( i, (int,slice) ): - return self.__toklist[i] - else: - if i not in self.__accumNames: - return self.__tokdict[i][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[i] ]) - - def __setitem__( self, k, v ): - if isinstance(v,_ParseResultsWithOffset): - self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] - sub = v[0] - elif isinstance(k,int): - self.__toklist[k] = v - sub = v - else: - self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] - sub = v - if isinstance(sub,ParseResults): - sub.__parent = wkref(self) - - def __delitem__( self, i ): - if isinstance(i,(int,slice)): - mylen = len( self.__toklist ) - del self.__toklist[i] - - # convert int to slice - if isinstance(i, int): - if i < 0: - i += mylen - i = slice(i, i+1) - # get removed indices - removed = list(range(*i.indices(mylen))) - removed.reverse() - # fixup indices in token dictionary - for name in self.__tokdict: - occurrences = self.__tokdict[name] - for j in removed: - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) - else: - del self.__tokdict[i] - - def __contains__( self, k ): - return k in self.__tokdict - - def __len__( self ): return len( self.__toklist ) - def __bool__(self): return len( self.__toklist ) > 0 - __nonzero__ = __bool__ - def __iter__( self ): return iter( self.__toklist ) - def __reversed__( self ): return iter( reversed(self.__toklist) ) - def keys( self ): - """Returns all named result keys.""" - return self.__tokdict.keys() - - def pop( self, index=-1 ): - """Removes and returns item at specified index (default=last). - Will work with either numeric indices or dict-key indicies.""" - ret = self[index] - del self[index] - return ret - - def get(self, key, defaultValue=None): - """Returns named result matching the given key, or if there is no - such name, then returns the given defaultValue or None if no - defaultValue is specified.""" - if key in self: - return self[key] - else: - return defaultValue - - def insert( self, index, insStr ): - self.__toklist.insert(index, insStr) - # fixup indices in token dictionary - for name in self.__tokdict: - occurrences = self.__tokdict[name] - for k, (value, position) in enumerate(occurrences): - occurrences[k] = _ParseResultsWithOffset(value, position + (position > j)) - - def items( self ): - """Returns all named result keys and values as a list of tuples.""" - return [(k,self[k]) for k in self.__tokdict] - - def values( self ): - """Returns all named result values.""" - return [ v[-1][0] for v in self.__tokdict.values() ] - - def __getattr__( self, name ): - if name not in self.__slots__: - if name in self.__tokdict: - if name not in self.__accumNames: - return self.__tokdict[name][-1][0] - else: - return ParseResults([ v[0] for v in self.__tokdict[name] ]) - else: - return "" - return None - - def __add__( self, other ): - ret = self.copy() - ret += other - return ret - - def __iadd__( self, other ): - if other.__tokdict: - offset = len(self.__toklist) - addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) - otheritems = other.__tokdict.items() - otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) - for (k,vlist) in otheritems for v in vlist] - for k,v in otherdictitems: - self[k] = v - if isinstance(v[0],ParseResults): - v[0].__parent = wkref(self) - self.__toklist += other.__toklist - self.__accumNames.update( other.__accumNames ) - del other - return self - - def __repr__( self ): - return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) - - def __str__( self ): - out = "[" - sep = "" - for i in self.__toklist: - if isinstance(i, ParseResults): - out += sep + _ustr(i) - else: - out += sep + repr(i) - sep = ", " - out += "]" - return out - - def _asStringList( self, sep='' ): - out = [] - for item in self.__toklist: - if out and sep: - out.append(sep) - if isinstance( item, ParseResults ): - out += item._asStringList() - else: - out.append( _ustr(item) ) - return out - - def asList( self ): - """Returns the parse results as a nested list of matching tokens, all converted to strings.""" - out = [] - for res in self.__toklist: - if isinstance(res,ParseResults): - out.append( res.asList() ) - else: - out.append( res ) - return out - - def asDict( self ): - """Returns the named parse results as dictionary.""" - return dict( self.items() ) - - def copy( self ): - """Returns a new copy of a ParseResults object.""" - ret = ParseResults( self.__toklist ) - ret.__tokdict = self.__tokdict.copy() - ret.__parent = self.__parent - ret.__accumNames.update( self.__accumNames ) - ret.__name = self.__name - return ret - - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): - """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" - nl = "\n" - out = [] - namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() - for v in vlist ] ) - nextLevelIndent = indent + " " - - # collapse out indents if formatting is not desired - if not formatted: - indent = "" - nextLevelIndent = "" - nl = "" - - selfTag = None - if doctag is not None: - selfTag = doctag - else: - if self.__name: - selfTag = self.__name - - if not selfTag: - if namedItemsOnly: - return "" - else: - selfTag = "ITEM" - - out += [ nl, indent, "<", selfTag, ">" ] - - worklist = self.__toklist - for i,res in enumerate(worklist): - if isinstance(res,ParseResults): - if i in namedItems: - out += [ res.asXML(namedItems[i], - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - out += [ res.asXML(None, - namedItemsOnly and doctag is None, - nextLevelIndent, - formatted)] - else: - # individual token, see if there is a name for it - resTag = None - if i in namedItems: - resTag = namedItems[i] - if not resTag: - if namedItemsOnly: - continue - else: - resTag = "ITEM" - xmlBodyText = xml.sax.saxutils.escape(_ustr(res)) - out += [ nl, nextLevelIndent, "<", resTag, ">", - xmlBodyText, - "" ] - - out += [ nl, indent, "" ] - return "".join(out) - - def __lookup(self,sub): - for k,vlist in self.__tokdict.items(): - for v,loc in vlist: - if sub is v: - return k - return None - - def getName(self): - """Returns the results name for this token expression.""" - if self.__name: - return self.__name - elif self.__parent: - par = self.__parent() - if par: - return par.__lookup(self) - else: - return None - elif (len(self) == 1 and - len(self.__tokdict) == 1 and - self.__tokdict.values()[0][0][1] in (0,-1)): - return self.__tokdict.keys()[0] - else: - return None - - def dump(self,indent='',depth=0): - """Diagnostic method for listing out the contents of a ParseResults. - Accepts an optional indent argument so that this string can be embedded - in a nested display of other data.""" - out = [] - out.append( indent+_ustr(self.asList()) ) - keys = self.items() - keys.sort() - for k,v in keys: - if out: - out.append('\n') - out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) - if isinstance(v,ParseResults): - if v.keys(): - #~ out.append('\n') - out.append( v.dump(indent,depth+1) ) - #~ out.append('\n') - else: - out.append(_ustr(v)) - else: - out.append(_ustr(v)) - #~ out.append('\n') - return "".join(out) - - # add support for pickle protocol - def __getstate__(self): - return ( self.__toklist, - ( self.__tokdict.copy(), - self.__parent is not None and self.__parent() or None, - self.__accumNames, - self.__name ) ) - - def __setstate__(self,state): - self.__toklist = state[0] - self.__tokdict, \ - par, \ - inAccumNames, \ - self.__name = state[1] - self.__accumNames = {} - self.__accumNames.update(inAccumNames) - if par is not None: - self.__parent = wkref(par) - else: - self.__parent = None - - -def col (loc,strg): - """Returns current column within a string, counting newlines as line separators. - The first column is number 1. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{ParserElement.parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return (loc} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - return strg.count("\n",0,loc) + 1 - -def line( loc, strg ): - """Returns the line of text containing loc within a string, counting newlines as line separators. - """ - lastCR = strg.rfind("\n", 0, loc) - nextCR = strg.find("\n", loc) - if nextCR > 0: - return strg[lastCR+1:nextCR] - else: - return strg[lastCR+1:] - -def _defaultStartDebugAction( instring, loc, expr ): - print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): - print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) - -def _defaultExceptionDebugAction( instring, loc, expr, exc ): - print ("Exception raised:" + _ustr(exc)) - -def nullDebugAction(*args): - """'Do-nothing' debug action, to suppress debugging output during parsing.""" - pass - -class ParserElement(object): - """Abstract base level parser element class.""" - DEFAULT_WHITE_CHARS = " \n\t\r" - - def setDefaultWhitespaceChars( chars ): - """Overrides the default whitespace chars - """ - ParserElement.DEFAULT_WHITE_CHARS = chars - setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) - - def __init__( self, savelist=False ): - self.parseAction = list() - self.failAction = None - #~ self.name = "" # don't define self.name, let subclasses try/except upcall - self.strRepr = None - self.resultsName = None - self.saveAsList = savelist - self.skipWhitespace = True - self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - self.copyDefaultWhiteChars = True - self.mayReturnEmpty = False # used when checking for left-recursion - self.keepTabs = False - self.ignoreExprs = list() - self.debug = False - self.streamlined = False - self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index - self.errmsg = "" - self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) - self.debugActions = ( None, None, None ) #custom debug actions - self.re = None - self.callPreparse = True # used to avoid redundant calls to preParse - self.callDuringTry = False - - def copy( self ): - """Make a copy of this ParserElement. Useful for defining different parse actions - for the same parsing pattern, using copies of the original parse element.""" - cpy = copy.copy( self ) - cpy.parseAction = self.parseAction[:] - cpy.ignoreExprs = self.ignoreExprs[:] - if self.copyDefaultWhiteChars: - cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS - return cpy - - def setName( self, name ): - """Define name for this expression, for use in debugging.""" - self.name = name - self.errmsg = "Expected " + self.name - if hasattr(self,"exception"): - self.exception.msg = self.errmsg - return self - - def setResultsName( self, name, listAllMatches=False ): - """Define name for referencing matching tokens as a nested attribute - of the returned parse results. - NOTE: this returns a *copy* of the original ParserElement object; - this is so that the client can define a basic element, such as an - integer, and reference it in multiple places with different names. - """ - newself = self.copy() - newself.resultsName = name - newself.modalResults = not listAllMatches - return newself - - def setBreak(self,breakFlag = True): - """Method to invoke the Python pdb debugger when this element is - about to be parsed. Set breakFlag to True to enable, False to - disable. - """ - if breakFlag: - _parseMethod = self._parse - def breaker(instring, loc, doActions=True, callPreParse=True): - import pdb - pdb.set_trace() - _parseMethod( instring, loc, doActions, callPreParse ) - breaker._originalParseMethod = _parseMethod - self._parse = breaker - else: - if hasattr(self._parse,"_originalParseMethod"): - self._parse = self._parse._originalParseMethod - return self - - def _normalizeParseActionArgs( f ): - """Internal method used to decorate parse actions that take fewer than 3 arguments, - so that all parse actions can be called as f(s,l,t).""" - STAR_ARGS = 4 - - try: - restore = None - if isinstance(f,type): - restore = f - f = f.__init__ - if not _PY3K: - codeObj = f.func_code - else: - codeObj = f.code - if codeObj.co_flags & STAR_ARGS: - return f - numargs = codeObj.co_argcount - if not _PY3K: - if hasattr(f,"im_self"): - numargs -= 1 - else: - if hasattr(f,"__self__"): - numargs -= 1 - if restore: - f = restore - except AttributeError: - try: - if not _PY3K: - call_im_func_code = f.__call__.im_func.func_code - else: - call_im_func_code = f.__code__ - - # not a function, must be a callable object, get info from the - # im_func binding of its bound __call__ method - if call_im_func_code.co_flags & STAR_ARGS: - return f - numargs = call_im_func_code.co_argcount - if not _PY3K: - if hasattr(f.__call__,"im_self"): - numargs -= 1 - else: - if hasattr(f.__call__,"__self__"): - numargs -= 0 - except AttributeError: - if not _PY3K: - call_func_code = f.__call__.func_code - else: - call_func_code = f.__call__.__code__ - # not a bound method, get info directly from __call__ method - if call_func_code.co_flags & STAR_ARGS: - return f - numargs = call_func_code.co_argcount - if not _PY3K: - if hasattr(f.__call__,"im_self"): - numargs -= 1 - else: - if hasattr(f.__call__,"__self__"): - numargs -= 1 - - - #~ print ("adding function %s with %d args" % (f.func_name,numargs)) - if numargs == 3: - return f - else: - if numargs > 3: - def tmp(s,l,t): - return f(f.__call__.__self__, s,l,t) - if numargs == 2: - def tmp(s,l,t): - return f(l,t) - elif numargs == 1: - def tmp(s,l,t): - return f(t) - else: #~ numargs == 0: - def tmp(s,l,t): - return f() - try: - tmp.__name__ = f.__name__ - except (AttributeError,TypeError): - # no need for special handling if attribute doesnt exist - pass - try: - tmp.__doc__ = f.__doc__ - except (AttributeError,TypeError): - # no need for special handling if attribute doesnt exist - pass - try: - tmp.__dict__.update(f.__dict__) - except (AttributeError,TypeError): - # no need for special handling if attribute doesnt exist - pass - return tmp - _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) - - def setParseAction( self, *fns, **kwargs ): - """Define action to perform when successfully matching parse element definition. - Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), - fn(loc,toks), fn(toks), or just fn(), where: - - s = the original string being parsed (see note below) - - loc = the location of the matching substring - - toks = a list of the matched tokens, packaged as a ParseResults object - If the functions in fns modify the tokens, they can return them as the return - value from fn, and the modified list of tokens will replace the original. - Otherwise, fn does not need to return any value. - - Note: the default parsing behavior is to expand tabs in the input string - before starting the parsing process. See L{I{parseString}} for more information - on parsing strings containing s, and suggested methods to maintain a - consistent view of the parsed string, the parse location, and line and column - positions within the parsed string. - """ - self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) - self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) - return self - - def addParseAction( self, *fns, **kwargs ): - """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" - self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) - self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) - return self - - def setFailAction( self, fn ): - """Define action to perform if parsing fails at this expression. - Fail acton fn is a callable function that takes the arguments - fn(s,loc,expr,err) where: - - s = string being parsed - - loc = location where expression match was attempted and failed - - expr = the parse expression that failed - - err = the exception thrown - The function returns no value. It may throw ParseFatalException - if it is desired to stop parsing immediately.""" - self.failAction = fn - return self - - def _skipIgnorables( self, instring, loc ): - exprsFound = True - while exprsFound: - exprsFound = False - for e in self.ignoreExprs: - try: - while 1: - loc,dummy = e._parse( instring, loc ) - exprsFound = True - except ParseException: - pass - return loc - - def preParse( self, instring, loc ): - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - - if self.skipWhitespace: - wt = self.whiteChars - instrlen = len(instring) - while loc < instrlen and instring[loc] in wt: - loc += 1 - - return loc - - def parseImpl( self, instring, loc, doActions=True ): - return loc, [] - - def postParse( self, instring, loc, tokenlist ): - return tokenlist - - #~ @profile - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): - debugging = ( self.debug ) #and doActions ) - - if debugging or self.failAction: - #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) - if (self.debugActions[0] ): - self.debugActions[0]( instring, loc, self ) - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = loc - try: - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - except ParseBaseException, err: - #~ print ("Exception raised:", err) - if self.debugActions[2]: - self.debugActions[2]( instring, tokensStart, self, err ) - if self.failAction: - self.failAction( instring, tokensStart, self, err ) - raise - else: - if callPreParse and self.callPreparse: - preloc = self.preParse( instring, loc ) - else: - preloc = loc - tokensStart = loc - if self.mayIndexError or loc >= len(instring): - try: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - except IndexError: - raise ParseException( instring, len(instring), self.errmsg, self ) - else: - loc,tokens = self.parseImpl( instring, preloc, doActions ) - - tokens = self.postParse( instring, loc, tokens ) - - retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) - if self.parseAction and (doActions or self.callDuringTry): - if debugging: - try: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - except ParseBaseException, err: - #~ print "Exception raised in user parse action:", err - if (self.debugActions[2] ): - self.debugActions[2]( instring, tokensStart, self, err ) - raise - else: - for fn in self.parseAction: - tokens = fn( instring, tokensStart, retTokens ) - if tokens is not None: - retTokens = ParseResults( tokens, - self.resultsName, - asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), - modal=self.modalResults ) - - if debugging: - #~ print ("Matched",self,"->",retTokens.asList()) - if (self.debugActions[1] ): - self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) - - return loc, retTokens - - def tryParse( self, instring, loc ): - try: - return self._parse( instring, loc, doActions=False )[0] - except ParseFatalException: - raise ParseException( instring, loc, self.errmsg, self) - - # this method gets repeatedly called during backtracking with the same arguments - - # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): - lookup = (self,instring,loc,callPreParse,doActions) - if lookup in ParserElement._exprArgCache: - value = ParserElement._exprArgCache[ lookup ] - if isinstance(value,Exception): - raise value - return value - else: - try: - value = self._parseNoCache( instring, loc, doActions, callPreParse ) - ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) - return value - except ParseBaseException, pe: - ParserElement._exprArgCache[ lookup ] = pe - raise - - _parse = _parseNoCache - - # argument cache for optimizing repeated calls when backtracking through recursive expressions - _exprArgCache = {} - def resetCache(): - ParserElement._exprArgCache.clear() - resetCache = staticmethod(resetCache) - - _packratEnabled = False - def enablePackrat(): - """Enables "packrat" parsing, which adds memoizing to the parsing logic. - Repeated parse attempts at the same string location (which happens - often in many complex grammars) can immediately return a cached value, - instead of re-executing parsing/validating code. Memoizing is done of - both valid results and parsing exceptions. - - This speedup may break existing programs that use parse actions that - have side-effects. For this reason, packrat parsing is disabled when - you first import pyparsing. To activate the packrat feature, your - program must call the class method ParserElement.enablePackrat(). If - your program uses psyco to "compile as you go", you must call - enablePackrat before calling psyco.full(). If you do not do this, - Python will crash. For best results, call enablePackrat() immediately - after importing pyparsing. - """ - if not ParserElement._packratEnabled: - ParserElement._packratEnabled = True - ParserElement._parse = ParserElement._parseCache - enablePackrat = staticmethod(enablePackrat) - - def parseString( self, instring, parseAll=False ): - """Execute the parse expression with the given string. - This is the main interface to the client code, once the complete - expression has been built. - - If you want the grammar to require that the entire input string be - successfully parsed, then set parseAll to True (equivalent to ending - the grammar with StringEnd()). - - Note: parseString implicitly calls expandtabs() on the input string, - in order to report proper column numbers in parse actions. - If the input string contains tabs and - the grammar uses parse actions that use the loc argument to index into the - string being parsed, you can ensure you have a consistent view of the input - string by: - - calling parseWithTabs on your grammar before calling parseString - (see L{I{parseWithTabs}}) - - define your parse action using the full (s,loc,toks) signature, and - reference the input string using the parse action's s argument - - explictly expand the tabs in your input string before calling - parseString - """ - ParserElement.resetCache() - if not self.streamlined: - self.streamline() - #~ self.saveAsList = True - for e in self.ignoreExprs: - e.streamline() - if not self.keepTabs: - instring = instring.expandtabs() - loc, tokens = self._parse( instring, 0 ) - if parseAll: - StringEnd()._parse( instring, loc ) - return tokens - - def scanString( self, instring, maxMatches=_MAX_INT ): - """Scan the input string for expression matches. Each match will return the - matching tokens, start location, and end location. May be called with optional - maxMatches argument, to clip scanning after 'n' matches are found. - - Note that the start and end locations are reported relative to the string - being parsed. See L{I{parseString}} for more information on parsing - strings with embedded tabs.""" - if not self.streamlined: - self.streamline() - for e in self.ignoreExprs: - e.streamline() - - if not self.keepTabs: - instring = _ustr(instring).expandtabs() - instrlen = len(instring) - loc = 0 - preparseFn = self.preParse - parseFn = self._parse - ParserElement.resetCache() - matches = 0 - while loc <= instrlen and matches < maxMatches: - try: - preloc = preparseFn( instring, loc ) - nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) - except ParseException: - loc = preloc+1 - else: - matches += 1 - yield tokens, preloc, nextLoc - loc = nextLoc - - def transformString( self, instring ): - """Extension to scanString, to modify matching text with modified tokens that may - be returned from a parse action. To use transformString, define a grammar and - attach a parse action to it that modifies the returned token list. - Invoking transformString() on a target string will then scan for matches, - and replace the matched text patterns according to the logic in the parse - action. transformString() returns the resulting transformed string.""" - out = [] - lastE = 0 - # force preservation of s, to minimize unwanted transformation of string, and to - # keep string locs straight between transformString and scanString - self.keepTabs = True - for t,s,e in self.scanString( instring ): - out.append( instring[lastE:s] ) - if t: - if isinstance(t,ParseResults): - out += t.asList() - elif isinstance(t,list): - out += t - else: - out.append(t) - lastE = e - out.append(instring[lastE:]) - return "".join(map(_ustr,out)) - - def searchString( self, instring, maxMatches=_MAX_INT ): - """Another extension to scanString, simplifying the access to the tokens found - to match the given parse expression. May be called with optional - maxMatches argument, to clip searching after 'n' matches are found. - """ - return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) - - def __add__(self, other ): - """Implementation of + operator - returns And""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, other ] ) - - def __radd__(self, other ): - """Implementation of + operator when left operand is not a ParserElement""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other + self - - def __sub__(self, other): - """Implementation of - operator, returns And with error stop""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return And( [ self, And._ErrorStop(), other ] ) - - def __rsub__(self, other ): - """Implementation of - operator when left operand is not a ParserElement""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other - self - - def __mul__(self,other): - if isinstance(other,int): - minElements, optElements = other,0 - elif isinstance(other,tuple): - if len(other)==0: - other = (None,None) - elif len(other)==1: - other = (other[0],None) - if len(other)==2: - if other[0] is None: - other = (0, other[1]) - if isinstance(other[0],int) and other[1] is None: - if other[0] == 0: - return ZeroOrMore(self) - if other[0] == 1: - return OneOrMore(self) - else: - return self*other[0] + ZeroOrMore(self) - elif isinstance(other[0],int) and isinstance(other[1],int): - minElements, optElements = other - optElements -= minElements - else: - raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) - else: - raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects") - else: - raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) - - if minElements < 0: - raise ValueError("cannot multiply ParserElement by negative value") - if optElements < 0: - raise ValueError("second tuple value must be greater or equal to first tuple value") - if minElements == optElements == 0: - raise ValueError("cannot multiply ParserElement by 0 or (0,0)") - - if (optElements): - def makeOptionalList(n): - if n>1: - return Optional(self + makeOptionalList(n-1)) - else: - return Optional(self) - if minElements: - if minElements == 1: - ret = self + makeOptionalList(optElements) - else: - ret = And([self]*minElements) + makeOptionalList(optElements) - else: - ret = makeOptionalList(optElements) - else: - if minElements == 1: - ret = self - else: - ret = And([self]*minElements) - return ret - - def __rmul__(self, other): - return self.__mul__(other) - - def __or__(self, other ): - """Implementation of | operator - returns MatchFirst""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return MatchFirst( [ self, other ] ) - - def __ror__(self, other ): - """Implementation of | operator when left operand is not a ParserElement""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other | self - - def __xor__(self, other ): - """Implementation of ^ operator - returns Or""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Or( [ self, other ] ) - - def __rxor__(self, other ): - """Implementation of ^ operator when left operand is not a ParserElement""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other ^ self - - def __and__(self, other ): - """Implementation of & operator - returns Each""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return Each( [ self, other ] ) - - def __rand__(self, other ): - """Implementation of & operator when left operand is not a ParserElement""" - if isinstance( other, basestring ): - other = Literal( other ) - if not isinstance( other, ParserElement ): - warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), - SyntaxWarning, stacklevel=2) - return None - return other & self - - def __invert__( self ): - """Implementation of ~ operator - returns NotAny""" - return NotAny( self ) - - def __call__(self, name): - """Shortcut for setResultsName, with listAllMatches=default:: - userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") - could be written as:: - userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") - """ - return self.setResultsName(name) - - def suppress( self ): - """Suppresses the output of this ParserElement; useful to keep punctuation from - cluttering up returned output. - """ - return Suppress( self ) - - def leaveWhitespace( self ): - """Disables the skipping of whitespace before matching the characters in the - ParserElement's defined pattern. This is normally only used internally by - the pyparsing module, but may be needed in some whitespace-sensitive grammars. - """ - self.skipWhitespace = False - return self - - def setWhitespaceChars( self, chars ): - """Overrides the default whitespace chars - """ - self.skipWhitespace = True - self.whiteChars = chars - self.copyDefaultWhiteChars = False - return self - - def parseWithTabs( self ): - """Overrides default behavior to expand s to spaces before parsing the input string. - Must be called before parseString when the input grammar contains elements that - match characters.""" - self.keepTabs = True - return self - - def ignore( self, other ): - """Define expression to be ignored (e.g., comments) while doing pattern - matching; may be called repeatedly, to define multiple comment or other - ignorable patterns. - """ - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - self.ignoreExprs.append( other ) - else: - self.ignoreExprs.append( Suppress( other ) ) - return self - - def setDebugActions( self, startAction, successAction, exceptionAction ): - """Enable display of debugging messages while doing pattern matching.""" - self.debugActions = (startAction or _defaultStartDebugAction, - successAction or _defaultSuccessDebugAction, - exceptionAction or _defaultExceptionDebugAction) - self.debug = True - return self - - def setDebug( self, flag=True ): - """Enable display of debugging messages while doing pattern matching. - Set flag to True to enable, False to disable.""" - if flag: - self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) - else: - self.debug = False - return self - - def __str__( self ): - return self.name - - def __repr__( self ): - return _ustr(self) - - def streamline( self ): - self.streamlined = True - self.strRepr = None - return self - - def checkRecursion( self, parseElementList ): - pass - - def validate( self, validateTrace=[] ): - """Check defined expressions for valid structure, check for infinite recursive definitions.""" - self.checkRecursion( [] ) - - def parseFile( self, file_or_filename ): - """Execute the parse expression on the given file or filename. - If a filename is specified (instead of a file object), - the entire file is opened, read, and closed before parsing. - """ - try: - file_contents = file_or_filename.read() - except AttributeError: - f = open(file_or_filename, "rb") - file_contents = f.read() - f.close() - return self.parseString(file_contents) - - def getException(self): - return ParseException("",0,self.errmsg,self) - - def __getattr__(self,aname): - if aname == "myException": - self.myException = ret = self.getException(); - return ret; - else: - raise AttributeError("no such attribute " + aname) - - def __eq__(self,other): - if isinstance(other, basestring): - try: - (self + StringEnd()).parseString(_ustr(other)) - return True - except ParseBaseException: - return False - else: - return super(ParserElement,self)==other - - def __hash__(self): - return hash(id(self)) - - def __req__(self,other): - return self == other - - -class Token(ParserElement): - """Abstract ParserElement subclass, for defining atomic matching patterns.""" - def __init__( self ): - super(Token,self).__init__( savelist=False ) - #self.myException = ParseException("",0,"",self) - - def setName(self, name): - s = super(Token,self).setName(name) - self.errmsg = "Expected " + self.name - #s.myException.msg = self.errmsg - return s - - -class Empty(Token): - """An empty token, will always match.""" - def __init__( self ): - super(Empty,self).__init__() - self.name = "Empty" - self.mayReturnEmpty = True - self.mayIndexError = False - - -class NoMatch(Token): - """A token that will never match.""" - def __init__( self ): - super(NoMatch,self).__init__() - self.name = "NoMatch" - self.mayReturnEmpty = True - self.mayIndexError = False - self.errmsg = "Unmatchable token" - #self.myException.msg = self.errmsg - - def parseImpl( self, instring, loc, doActions=True ): - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - -class Literal(Token): - """Token to exactly match a specified string.""" - def __init__( self, matchString ): - super(Literal,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Literal; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.__class__ = Empty - self.name = '"%s"' % _ustr(self.match) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - #self.myException.msg = self.errmsg - self.mayIndexError = False - - # Performance tuning: this routine gets called a *lot* - # if this is a single character match string and the first character matches, - # short-circuit as quickly as possible, and avoid calling startswith - #~ @profile - def parseImpl( self, instring, loc, doActions=True ): - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) ): - return loc+self.matchLen, self.match - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc -_L = Literal - -class Keyword(Token): - """Token to exactly match a specified string as a keyword, that is, it must be - immediately followed by a non-keyword character. Compare with Literal:: - Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. - Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' - Accepts two optional constructor arguments in addition to the keyword string: - identChars is a string of characters that would be valid identifier characters, - defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive - matching, default is False. - """ - DEFAULT_KEYWORD_CHARS = alphanums+"_$" - - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): - super(Keyword,self).__init__() - self.match = matchString - self.matchLen = len(matchString) - try: - self.firstMatchChar = matchString[0] - except IndexError: - warnings.warn("null string passed to Keyword; use Empty() instead", - SyntaxWarning, stacklevel=2) - self.name = '"%s"' % self.match - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = False - #self.myException.msg = self.errmsg - self.mayIndexError = False - self.caseless = caseless - if caseless: - self.caselessmatch = matchString.upper() - identChars = identChars.upper() - self.identChars = _str2dict(identChars) - - def parseImpl( self, instring, loc, doActions=True ): - if self.caseless: - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and - (loc == 0 or instring[loc-1].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - else: - if (instring[loc] == self.firstMatchChar and - (self.matchLen==1 or instring.startswith(self.match,loc)) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and - (loc == 0 or instring[loc-1] not in self.identChars) ): - return loc+self.matchLen, self.match - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - def copy(self): - c = super(Keyword,self).copy() - c.identChars = Keyword.DEFAULT_KEYWORD_CHARS - return c - - def setDefaultKeywordChars( chars ): - """Overrides the default Keyword chars - """ - Keyword.DEFAULT_KEYWORD_CHARS = chars - setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) - - -class CaselessLiteral(Literal): - """Token to match a specified string, ignoring case of letters. - Note: the matched results will always be in the case of the given - match string, NOT the case of the input text. - """ - def __init__( self, matchString ): - super(CaselessLiteral,self).__init__( matchString.upper() ) - # Preserve the defining literal. - self.returnString = matchString - self.name = "'%s'" % self.returnString - self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg - - def parseImpl( self, instring, loc, doActions=True ): - if instring[ loc:loc+self.matchLen ].upper() == self.match: - return loc+self.matchLen, self.returnString - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class CaselessKeyword(Keyword): - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): - super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) - - def parseImpl( self, instring, loc, doActions=True ): - if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and - (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): - return loc+self.matchLen, self.match - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class Word(Token): - """Token for matching words composed of allowed character sets. - Defined with string containing all allowed initial characters, - an optional string containing allowed body characters (if omitted, - defaults to the initial character set), and an optional minimum, - maximum, and/or exact length. The default value for min is 1 (a - minimum value < 1 is not valid); the default values for max and exact - are 0, meaning no maximum or exact length restriction. - """ - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): - super(Word,self).__init__() - self.initCharsOrig = initChars - self.initChars = _str2dict(initChars) - if bodyChars : - self.bodyCharsOrig = bodyChars - self.bodyChars = _str2dict(bodyChars) - else: - self.bodyCharsOrig = initChars - self.bodyChars = _str2dict(initChars) - - self.maxSpecified = max > 0 - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg - self.mayIndexError = False - self.asKeyword = asKeyword - - if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): - if self.bodyCharsOrig == self.initCharsOrig: - self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) - elif len(self.bodyCharsOrig) == 1: - self.reString = "%s[%s]*" % \ - (re.escape(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - else: - self.reString = "[%s][%s]*" % \ - (_escapeRegexRangeChars(self.initCharsOrig), - _escapeRegexRangeChars(self.bodyCharsOrig),) - if self.asKeyword: - self.reString = r"\b"+self.reString+r"\b" - try: - self.re = re.compile( self.reString ) - except: - self.re = None - - def parseImpl( self, instring, loc, doActions=True ): - if self.re: - result = self.re.match(instring,loc) - if not result: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - loc = result.end() - return loc,result.group() - - if not(instring[ loc ] in self.initChars): - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - start = loc - loc += 1 - instrlen = len(instring) - bodychars = self.bodyChars - maxloc = start + self.maxLen - maxloc = min( maxloc, instrlen ) - while loc < maxloc and instring[loc] in bodychars: - loc += 1 - - throwException = False - if loc - start < self.minLen: - throwException = True - if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: - throwException = True - if self.asKeyword: - if (start>0 and instring[start-1] in bodychars) or (loc4: - return s[:4]+"..." - else: - return s - - if ( self.initCharsOrig != self.bodyCharsOrig ): - self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) - else: - self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) - - return self.strRepr - - -class Regex(Token): - """Token for matching strings that match a given regular expression. - Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. - """ - def __init__( self, pattern, flags=0): - """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" - super(Regex,self).__init__() - - if len(pattern) == 0: - warnings.warn("null string passed to Regex; use Empty() instead", - SyntaxWarning, stacklevel=2) - - self.pattern = pattern - self.flags = flags - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = self.re.match(instring,loc) - if not result: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - loc = result.end() - d = result.groupdict() - ret = ParseResults(result.group()) - if d: - for k in d: - ret[k] = d[k] - return loc,ret - - def __str__( self ): - try: - return super(Regex,self).__str__() - except: - pass - - if self.strRepr is None: - self.strRepr = "Re:(%s)" % repr(self.pattern) - - return self.strRepr - - -class QuotedString(Token): - """Token for matching strings that are delimited by quoting characters. - """ - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): - """ - Defined with the following parameters: - - quoteChar - string of one or more characters defining the quote delimiting string - - escChar - character to escape quotes, typically backslash (default=None) - - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) - - multiline - boolean indicating whether quotes can span multiple lines (default=False) - - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) - - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) - """ - super(QuotedString,self).__init__() - - # remove white space from quote chars - wont work anyway - quoteChar = quoteChar.strip() - if len(quoteChar) == 0: - warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - if endQuoteChar is None: - endQuoteChar = quoteChar - else: - endQuoteChar = endQuoteChar.strip() - if len(endQuoteChar) == 0: - warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) - raise SyntaxError() - - self.quoteChar = quoteChar - self.quoteCharLen = len(quoteChar) - self.firstQuoteChar = quoteChar[0] - self.endQuoteChar = endQuoteChar - self.endQuoteCharLen = len(endQuoteChar) - self.escChar = escChar - self.escQuote = escQuote - self.unquoteResults = unquoteResults - - if multiline: - self.flags = re.MULTILINE | re.DOTALL - self.pattern = r'%s(?:[^%s%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - else: - self.flags = 0 - self.pattern = r'%s(?:[^%s\n\r%s]' % \ - ( re.escape(self.quoteChar), - _escapeRegexRangeChars(self.endQuoteChar[0]), - (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) - if len(self.endQuoteChar) > 1: - self.pattern += ( - '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), - _escapeRegexRangeChars(self.endQuoteChar[i])) - for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' - ) - if escQuote: - self.pattern += (r'|(?:%s)' % re.escape(escQuote)) - if escChar: - self.pattern += (r'|(?:%s.)' % re.escape(escChar)) - self.escCharReplacePattern = re.escape(self.escChar)+"(.)" - self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) - - try: - self.re = re.compile(self.pattern, self.flags) - self.reString = self.pattern - except sre_constants.error: - warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, - SyntaxWarning, stacklevel=2) - raise - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg - self.mayIndexError = False - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None - if not result: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - loc = result.end() - ret = result.group() - - if self.unquoteResults: - - # strip off quotes - ret = ret[self.quoteCharLen:-self.endQuoteCharLen] - - if isinstance(ret,basestring): - # replace escaped characters - if self.escChar: - ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) - - # replace escaped quotes - if self.escQuote: - ret = ret.replace(self.escQuote, self.endQuoteChar) - - return loc, ret - - def __str__( self ): - try: - return super(QuotedString,self).__str__() - except: - pass - - if self.strRepr is None: - self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) - - return self.strRepr - - -class CharsNotIn(Token): - """Token for matching words composed of characters *not* in a given set. - Defined with string containing all disallowed characters, and an optional - minimum, maximum, and/or exact length. The default value for min is 1 (a - minimum value < 1 is not valid); the default values for max and exact - are 0, meaning no maximum or exact length restriction. - """ - def __init__( self, notChars, min=1, max=0, exact=0 ): - super(CharsNotIn,self).__init__() - self.skipWhitespace = False - self.notChars = notChars - - if min < 1: - raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - self.name = _ustr(self) - self.errmsg = "Expected " + self.name - self.mayReturnEmpty = ( self.minLen == 0 ) - #self.myException.msg = self.errmsg - self.mayIndexError = False - - def parseImpl( self, instring, loc, doActions=True ): - if instring[loc] in self.notChars: - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - start = loc - loc += 1 - notchars = self.notChars - maxlen = min( start+self.maxLen, len(instring) ) - while loc < maxlen and \ - (instring[loc] not in notchars): - loc += 1 - - if loc - start < self.minLen: - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - return loc, instring[start:loc] - - def __str__( self ): - try: - return super(CharsNotIn, self).__str__() - except: - pass - - if self.strRepr is None: - if len(self.notChars) > 4: - self.strRepr = "!W:(%s...)" % self.notChars[:4] - else: - self.strRepr = "!W:(%s)" % self.notChars - - return self.strRepr - -class White(Token): - """Special matching class for matching whitespace. Normally, whitespace is ignored - by pyparsing grammars. This class is included when some whitespace structures - are significant. Define with a string containing the whitespace characters to be - matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, - as defined for the Word class.""" - whiteStrs = { - " " : "", - "\t": "", - "\n": "", - "\r": "", - "\f": "", - } - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): - super(White,self).__init__() - self.matchWhite = ws - self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) - #~ self.leaveWhitespace() - self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) - self.mayReturnEmpty = True - self.errmsg = "Expected " + self.name - #self.myException.msg = self.errmsg - - self.minLen = min - - if max > 0: - self.maxLen = max - else: - self.maxLen = _MAX_INT - - if exact > 0: - self.maxLen = exact - self.minLen = exact - - def parseImpl( self, instring, loc, doActions=True ): - if not(instring[ loc ] in self.matchWhite): - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - start = loc - loc += 1 - maxloc = start + self.maxLen - maxloc = min( maxloc, len(instring) ) - while loc < maxloc and instring[loc] in self.matchWhite: - loc += 1 - - if loc - start < self.minLen: - #~ raise ParseException( instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - - return loc, instring[start:loc] - - -class _PositionToken(Token): - def __init__( self ): - super(_PositionToken,self).__init__() - self.name=self.__class__.__name__ - self.mayReturnEmpty = True - self.mayIndexError = False - -class GoToColumn(_PositionToken): - """Token to advance to a specific column of input text; useful for tabular report scraping.""" - def __init__( self, colno ): - super(GoToColumn,self).__init__() - self.col = colno - - def preParse( self, instring, loc ): - if col(loc,instring) != self.col: - instrlen = len(instring) - if self.ignoreExprs: - loc = self._skipIgnorables( instring, loc ) - while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - thiscol = col( loc, instring ) - if thiscol > self.col: - raise ParseException( instring, loc, "Text not in expected column", self ) - newloc = loc + self.col - thiscol - ret = instring[ loc: newloc ] - return newloc, ret - -class LineStart(_PositionToken): - """Matches if current position is at the beginning of a line within the parse string""" - def __init__( self ): - super(LineStart,self).__init__() - self.setWhitespaceChars( " \t" ) - self.errmsg = "Expected start of line" - #self.myException.msg = self.errmsg - - def preParse( self, instring, loc ): - preloc = super(LineStart,self).preParse(instring,loc) - if instring[preloc] == "\n": - loc += 1 - return loc - - def parseImpl( self, instring, loc, doActions=True ): - if not( loc==0 or - (loc == self.preParse( instring, 0 )) or - (instring[loc-1] == "\n") ): #col(loc, instring) != 1: - #~ raise ParseException( instring, loc, "Expected start of line" ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - return loc, [] - -class LineEnd(_PositionToken): - """Matches if current position is at the end of a line within the parse string""" - def __init__( self ): - super(LineEnd,self).__init__() - self.setWhitespaceChars( " \t" ) - self.errmsg = "Expected end of line" - #self.myException.msg = self.errmsg - - def parseImpl( self, instring, loc, doActions=True ): - if loc len(instring): - return loc, [] - else: - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class WordStart(_PositionToken): - """Matches if the current position is at the beginning of a Word, and - is not preceded by any character in a given set of wordChars - (default=printables). To emulate the \b behavior of regular expressions, - use WordStart(alphanums). WordStart will also match at the beginning of - the string being parsed, or at the beginning of a line. - """ - def __init__(self, wordChars = printables): - super(WordStart,self).__init__() - self.wordChars = _str2dict(wordChars) - self.errmsg = "Not at the start of a word" - - def parseImpl(self, instring, loc, doActions=True ): - if loc != 0: - if (instring[loc-1] in self.wordChars or - instring[loc] not in self.wordChars): - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - return loc, [] - -class WordEnd(_PositionToken): - """Matches if the current position is at the end of a Word, and - is not followed by any character in a given set of wordChars - (default=printables). To emulate the \b behavior of regular expressions, - use WordEnd(alphanums). WordEnd will also match at the end of - the string being parsed, or at the end of a line. - """ - def __init__(self, wordChars = printables): - super(WordEnd,self).__init__() - self.wordChars = _str2dict(wordChars) - self.skipWhitespace = False - self.errmsg = "Not at the end of a word" - - def parseImpl(self, instring, loc, doActions=True ): - instrlen = len(instring) - if instrlen>0 and loc maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - else: - if loc2 > maxMatchLoc: - maxMatchLoc = loc2 - maxMatchExp = e - - if maxMatchLoc < 0: - if maxException is not None: - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - return maxMatchExp._parse( instring, loc, doActions ) - - def __ixor__(self, other ): - if isinstance( other, basestring ): - other = Literal( other ) - return self.append( other ) #Or( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class MatchFirst(ParseExpression): - """Requires that at least one ParseExpression is found. - If two expressions match, the first one listed is the one that will match. - May be constructed using the '|' operator. - """ - def __init__( self, exprs, savelist = False ): - super(MatchFirst,self).__init__(exprs, savelist) - if exprs: - self.mayReturnEmpty = False - for e in self.exprs: - if e.mayReturnEmpty: - self.mayReturnEmpty = True - break - else: - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - maxExcLoc = -1 - maxException = None - for e in self.exprs: - try: - ret = e._parse( instring, loc, doActions ) - return ret - except ParseException, err: - if err.loc > maxExcLoc: - maxException = err - maxExcLoc = err.loc - except IndexError: - if len(instring) > maxExcLoc: - maxException = ParseException(instring,len(instring),e.errmsg,self) - maxExcLoc = len(instring) - - # only got here if no expression matched, raise exception for match that made it the furthest - else: - if maxException is not None: - raise maxException - else: - raise ParseException(instring, loc, "no defined alternatives to match", self) - - def __ior__(self, other ): - if isinstance( other, basestring ): - other = Literal( other ) - return self.append( other ) #MatchFirst( [ self, other ] ) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class Each(ParseExpression): - """Requires all given ParseExpressions to be found, but in any order. - Expressions may be separated by whitespace. - May be constructed using the '&' operator. - """ - def __init__( self, exprs, savelist = True ): - super(Each,self).__init__(exprs, savelist) - self.mayReturnEmpty = True - for e in self.exprs: - if not e.mayReturnEmpty: - self.mayReturnEmpty = False - break - self.skipWhitespace = True - self.initExprGroups = True - - def parseImpl( self, instring, loc, doActions=True ): - if self.initExprGroups: - self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] - self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] - self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] - self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] - self.required += self.multirequired - self.initExprGroups = False - tmpLoc = loc - tmpReqd = self.required[:] - tmpOpt = self.optionals[:] - matchOrder = [] - - keepMatching = True - while keepMatching: - tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired - failed = [] - for e in tmpExprs: - try: - tmpLoc = e.tryParse( instring, tmpLoc ) - except ParseException: - failed.append(e) - else: - matchOrder.append(e) - if e in tmpReqd: - tmpReqd.remove(e) - elif e in tmpOpt: - tmpOpt.remove(e) - if len(failed) == len(tmpExprs): - keepMatching = False - - if tmpReqd: - missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) - raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) - - # add any unmatched Optionals, in case they have default values defined - matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) - - resultlist = [] - for e in matchOrder: - loc,results = e._parse(instring,loc,doActions) - resultlist.append(results) - - finalResults = ParseResults([]) - for r in resultlist: - dups = {} - for k in r.keys(): - if k in finalResults.keys(): - tmp = ParseResults(finalResults[k]) - tmp += ParseResults(r[k]) - dups[k] = tmp - finalResults += ParseResults(r) - for k,v in dups.items(): - finalResults[k] = v - return loc, finalResults - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" - - return self.strRepr - - def checkRecursion( self, parseElementList ): - subRecCheckList = parseElementList[:] + [ self ] - for e in self.exprs: - e.checkRecursion( subRecCheckList ) - - -class ParseElementEnhance(ParserElement): - """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" - def __init__( self, expr, savelist=False ): - super(ParseElementEnhance,self).__init__(savelist) - if isinstance( expr, basestring ): - expr = Literal(expr) - self.expr = expr - self.strRepr = None - if expr is not None: - self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty - self.setWhitespaceChars( expr.whiteChars ) - self.skipWhitespace = expr.skipWhitespace - self.saveAsList = expr.saveAsList - self.callPreparse = expr.callPreparse - self.ignoreExprs.extend(expr.ignoreExprs) - - def parseImpl( self, instring, loc, doActions=True ): - if self.expr is not None: - return self.expr._parse( instring, loc, doActions, callPreParse=False ) - else: - raise ParseException("",loc,self.errmsg,self) - - def leaveWhitespace( self ): - self.skipWhitespace = False - self.expr = self.expr.copy() - if self.expr is not None: - self.expr.leaveWhitespace() - return self - - def ignore( self, other ): - if isinstance( other, Suppress ): - if other not in self.ignoreExprs: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - else: - super( ParseElementEnhance, self).ignore( other ) - if self.expr is not None: - self.expr.ignore( self.ignoreExprs[-1] ) - return self - - def streamline( self ): - super(ParseElementEnhance,self).streamline() - if self.expr is not None: - self.expr.streamline() - return self - - def checkRecursion( self, parseElementList ): - if self in parseElementList: - raise RecursiveGrammarException( parseElementList+[self] ) - subRecCheckList = parseElementList[:] + [ self ] - if self.expr is not None: - self.expr.checkRecursion( subRecCheckList ) - - def validate( self, validateTrace=[] ): - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion( [] ) - - def __str__( self ): - try: - return super(ParseElementEnhance,self).__str__() - except: - pass - - if self.strRepr is None and self.expr is not None: - self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) - return self.strRepr - - -class FollowedBy(ParseElementEnhance): - """Lookahead matching of the given parse expression. FollowedBy - does *not* advance the parsing position within the input string, it only - verifies that the specified parse expression matches at the current - position. FollowedBy always returns a null token list.""" - def __init__( self, expr ): - super(FollowedBy,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - self.expr.tryParse( instring, loc ) - return loc, [] - - -class NotAny(ParseElementEnhance): - """Lookahead to disallow matching with the given parse expression. NotAny - does *not* advance the parsing position within the input string, it only - verifies that the specified parse expression does *not* match at the current - position. Also, NotAny does *not* skip over leading whitespace. NotAny - always returns a null token list. May be constructed using the '~' operator.""" - def __init__( self, expr ): - super(NotAny,self).__init__(expr) - #~ self.leaveWhitespace() - self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs - self.mayReturnEmpty = True - self.errmsg = "Found unwanted token, "+_ustr(self.expr) - #self.myException = ParseException("",0,self.errmsg,self) - - def parseImpl( self, instring, loc, doActions=True ): - try: - self.expr.tryParse( instring, loc ) - except (ParseException,IndexError): - pass - else: - #~ raise ParseException(instring, loc, self.errmsg ) - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - return loc, [] - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "~{" + _ustr(self.expr) + "}" - - return self.strRepr - - -class ZeroOrMore(ParseElementEnhance): - """Optional repetition of zero or more of the given expression.""" - def __init__( self, expr ): - super(ZeroOrMore,self).__init__(expr) - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - tokens = [] - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) - while 1: - if hasIgnoreExprs: - preloc = self._skipIgnorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self.expr._parse( instring, preloc, doActions ) - if tmptokens or tmptokens.keys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]..." - - return self.strRepr - - def setResultsName( self, name, listAllMatches=False ): - ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) - ret.saveAsList = True - return ret - - -class OneOrMore(ParseElementEnhance): - """Repetition of one or more of the given expression.""" - def parseImpl( self, instring, loc, doActions=True ): - # must be at least one - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - try: - hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) - while 1: - if hasIgnoreExprs: - preloc = self._skipIgnorables( instring, loc ) - else: - preloc = loc - loc, tmptokens = self.expr._parse( instring, preloc, doActions ) - if tmptokens or tmptokens.keys(): - tokens += tmptokens - except (ParseException,IndexError): - pass - - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "{" + _ustr(self.expr) + "}..." - - return self.strRepr - - def setResultsName( self, name, listAllMatches=False ): - ret = super(OneOrMore,self).setResultsName(name,listAllMatches) - ret.saveAsList = True - return ret - -class _NullToken(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - def __str__(self): - return "" - -_optionalNotMatched = _NullToken() -class Optional(ParseElementEnhance): - """Optional matching of the given expression. - A default return string can also be specified, if the optional expression - is not found. - """ - def __init__( self, exprs, default=_optionalNotMatched ): - super(Optional,self).__init__( exprs, savelist=False ) - self.defaultValue = default - self.mayReturnEmpty = True - - def parseImpl( self, instring, loc, doActions=True ): - try: - loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) - except (ParseException,IndexError): - if self.defaultValue is not _optionalNotMatched: - if self.expr.resultsName: - tokens = ParseResults([ self.defaultValue ]) - tokens[self.expr.resultsName] = self.defaultValue - else: - tokens = [ self.defaultValue ] - else: - tokens = [] - return loc, tokens - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - if self.strRepr is None: - self.strRepr = "[" + _ustr(self.expr) + "]" - - return self.strRepr - - -class SkipTo(ParseElementEnhance): - """Token for skipping over all undefined text until the matched expression is found. - If include is set to true, the matched expression is also consumed. The ignore - argument is used to define grammars (typically quoted strings and comments) that - might contain false matches. - """ - def __init__( self, other, include=False, ignore=None ): - super( SkipTo, self ).__init__( other ) - if ignore is not None: - self.expr = self.expr.copy() - self.expr.ignore(ignore) - self.mayReturnEmpty = True - self.mayIndexError = False - self.includeMatch = include - self.asList = False - self.errmsg = "No match found for "+_ustr(self.expr) - #self.myException = ParseException("",0,self.errmsg,self) - - def parseImpl( self, instring, loc, doActions=True ): - startLoc = loc - instrlen = len(instring) - expr = self.expr - while loc <= instrlen: - try: - loc = expr._skipIgnorables( instring, loc ) - expr._parse( instring, loc, doActions=False, callPreParse=False ) - if self.includeMatch: - skipText = instring[startLoc:loc] - loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) - if mat: - skipRes = ParseResults( skipText ) - skipRes += mat - return loc, [ skipRes ] - else: - return loc, [ skipText ] - else: - return loc, [ instring[startLoc:loc] ] - except (ParseException,IndexError): - loc += 1 - exc = self.myException - exc.loc = loc - exc.pstr = instring - raise exc - -class Forward(ParseElementEnhance): - """Forward declaration of an expression to be defined later - - used for recursive grammars, such as algebraic infix notation. - When the expression is known, it is assigned to the Forward variable using the '<<' operator. - - Note: take care when assigning to Forward not to overlook precedence of operators. - Specifically, '|' has a lower precedence than '<<', so that:: - fwdExpr << a | b | c - will actually be evaluated as:: - (fwdExpr << a) | b | c - thereby leaving b and c out as parseable alternatives. It is recommended that you - explicitly group the values inserted into the Forward:: - fwdExpr << (a | b | c) - """ - def __init__( self, other=None ): - super(Forward,self).__init__( other, savelist=False ) - - def __lshift__( self, other ): - if isinstance( other, basestring ): - other = Literal(other) - self.expr = other - self.mayReturnEmpty = other.mayReturnEmpty - self.strRepr = None - self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty - self.setWhitespaceChars( self.expr.whiteChars ) - self.skipWhitespace = self.expr.skipWhitespace - self.saveAsList = self.expr.saveAsList - self.ignoreExprs.extend(self.expr.ignoreExprs) - return None - - def leaveWhitespace( self ): - self.skipWhitespace = False - return self - - def streamline( self ): - if not self.streamlined: - self.streamlined = True - if self.expr is not None: - self.expr.streamline() - return self - - def validate( self, validateTrace=[] ): - if self not in validateTrace: - tmp = validateTrace[:]+[self] - if self.expr is not None: - self.expr.validate(tmp) - self.checkRecursion([]) - - def __str__( self ): - if hasattr(self,"name"): - return self.name - - self.__class__ = _ForwardNoRecurse - try: - if self.expr is not None: - retString = _ustr(self.expr) - else: - retString = "None" - finally: - self.__class__ = Forward - return "Forward: "+retString - - def copy(self): - if self.expr is not None: - return super(Forward,self).copy() - else: - ret = Forward() - ret << self - return ret - -class _ForwardNoRecurse(Forward): - def __str__( self ): - return "..." - -class TokenConverter(ParseElementEnhance): - """Abstract subclass of ParseExpression, for converting parsed results.""" - def __init__( self, expr, savelist=False ): - super(TokenConverter,self).__init__( expr )#, savelist ) - self.saveAsList = False - -class Upcase(TokenConverter): - """Converter to upper case all matching tokens.""" - def __init__(self, *args): - super(Upcase,self).__init__(*args) - warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", - DeprecationWarning,stacklevel=2) - - def postParse( self, instring, loc, tokenlist ): - return list(map( string.upper, tokenlist )) - - -class Combine(TokenConverter): - """Converter to concatenate all matching tokens to a single string. - By default, the matching patterns must also be contiguous in the input string; - this can be disabled by specifying 'adjacent=False' in the constructor. - """ - def __init__( self, expr, joinString="", adjacent=True ): - super(Combine,self).__init__( expr ) - # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself - if adjacent: - self.leaveWhitespace() - self.adjacent = adjacent - self.skipWhitespace = True - self.joinString = joinString - - def ignore( self, other ): - if self.adjacent: - ParserElement.ignore(self, other) - else: - super( Combine, self).ignore( other ) - return self - - def postParse( self, instring, loc, tokenlist ): - retToks = tokenlist.copy() - del retToks[:] - retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) - - if self.resultsName and len(retToks.keys())>0: - return [ retToks ] - else: - return retToks - -class Group(TokenConverter): - """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" - def __init__( self, expr ): - super(Group,self).__init__( expr ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - return [ tokenlist ] - -class Dict(TokenConverter): - """Converter to return a repetitive expression as a list, but also as a dictionary. - Each element can also be referenced using the first token in the expression as its key. - Useful for tabular report scraping when the first column can be used as a item key. - """ - def __init__( self, exprs ): - super(Dict,self).__init__( exprs ) - self.saveAsList = True - - def postParse( self, instring, loc, tokenlist ): - for i,tok in enumerate(tokenlist): - if len(tok) == 0: - continue - ikey = tok[0] - if isinstance(ikey,int): - ikey = _ustr(tok[0]).strip() - if len(tok)==1: - tokenlist[ikey] = _ParseResultsWithOffset("",i) - elif len(tok)==2 and not isinstance(tok[1],ParseResults): - tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) - else: - dictvalue = tok.copy() #ParseResults(i) - del dictvalue[0] - if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) - else: - tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) - - if self.resultsName: - return [ tokenlist ] - else: - return tokenlist - - -class Suppress(TokenConverter): - """Converter for ignoring the results of a parsed expression.""" - def postParse( self, instring, loc, tokenlist ): - return [] - - def suppress( self ): - return self - - -class OnlyOnce(object): - """Wrapper for parse actions, to ensure they are only called once.""" - def __init__(self, methodCall): - self.callable = ParserElement._normalizeParseActionArgs(methodCall) - self.called = False - def __call__(self,s,l,t): - if not self.called: - results = self.callable(s,l,t) - self.called = True - return results - raise ParseException(s,l,"") - def reset(self): - self.called = False - -def traceParseAction(f): - """Decorator for debugging parse actions.""" - f = ParserElement._normalizeParseActionArgs(f) - def z(*paArgs): - thisFunc = f.func_name - s,l,t = paArgs[-3:] - if len(paArgs)>3: - thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc - sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) - try: - ret = f(*paArgs) - except Exception, exc: - sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) - try: - if len(symbols)==len("".join(symbols)): - return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) - else: - return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) - except: - warnings.warn("Exception creating Regex for oneOf, building MatchFirst", - SyntaxWarning, stacklevel=2) - - - # last resort, just use MatchFirst - return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) - -def dictOf( key, value ): - """Helper to easily and clearly define a dictionary by specifying the respective patterns - for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens - in the proper order. The key pattern can include delimiting markers or punctuation, - as long as they are suppressed, thereby leaving the significant key text. The value - pattern can include named results, so that the Dict results can include named token - fields. - """ - return Dict( ZeroOrMore( Group ( key + value ) ) ) - -# convenience constants for positional expressions -empty = Empty().setName("empty") -lineStart = LineStart().setName("lineStart") -lineEnd = LineEnd().setName("lineEnd") -stringStart = StringStart().setName("stringStart") -stringEnd = StringEnd().setName("stringEnd") - -_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) -_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) -_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) -_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) -_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) -_charRange = Group(_singleChar + Suppress("-") + _singleChar) -_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" - -_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) - -def srange(s): - r"""Helper to easily define string ranges for use in Word construction. Borrows - syntax from regexp '[]' string range definitions:: - srange("[0-9]") -> "0123456789" - srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" - srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" - The input string must be enclosed in []'s, and the returned string is the expanded - character set joined into a single string. - The values enclosed in the []'s may be:: - a single character - an escaped character with a leading backslash (such as \- or \]) - an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) - an escaped octal character with a leading '\0' (\041, which is a '!' character) - a range of any of the above, separated by a dash ('a-z', etc.) - any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) - """ - try: - return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) - except: - return "" - -def matchOnlyAtCol(n): - """Helper method for defining parse actions that require matching at a specific - column in the input text. - """ - def verifyCol(strg,locn,toks): - if col(locn,strg) != n: - raise ParseException(strg,locn,"matched token not at column %d" % n) - return verifyCol - -def replaceWith(replStr): - """Helper method for common parse actions that simply return a literal value. Especially - useful when used with transformString(). - """ - def _replFunc(*args): - return [replStr] - return _replFunc - -def removeQuotes(s,l,t): - """Helper parse action for removing quotation marks from parsed quoted strings. - To use, add this parse action to quoted string using:: - quotedString.setParseAction( removeQuotes ) - """ - return t[0][1:-1] - -def upcaseTokens(s,l,t): - """Helper parse action to convert tokens to upper case.""" - return [ tt.upper() for tt in map(_ustr,t) ] - -def downcaseTokens(s,l,t): - """Helper parse action to convert tokens to lower case.""" - return [ tt.lower() for tt in map(_ustr,t) ] - -def keepOriginalText(s,startLoc,t): - """Helper parse action to preserve original parsed text, - overriding any nested parse actions.""" - try: - endloc = getTokensEndLoc() - except ParseException: - raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") - del t[:] - t += ParseResults(s[startLoc:endloc]) - return t - -def getTokensEndLoc(): - """Method to be called from within a parse action to determine the end - location of the parsed tokens.""" - import inspect - fstack = inspect.stack() - try: - # search up the stack (through intervening argument normalizers) for correct calling routine - for f in fstack[2:]: - if f[3] == "_parseNoCache": - endloc = f[0].f_locals["loc"] - return endloc - else: - raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") - finally: - del fstack - -def _makeTags(tagStr, xml): - """Internal helper to construct opening and closing tag expressions, given a tag name""" - if isinstance(tagStr,basestring): - resname = tagStr - tagStr = Keyword(tagStr, caseless=not xml) - else: - resname = tagStr.name - - tagAttrName = Word(alphas,alphanums+"_-:") - if (xml): - tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) - openTag = Suppress("<") + tagStr + \ - Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - else: - printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) - tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) - openTag = Suppress("<") + tagStr + \ - Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ - Optional( Suppress("=") + tagAttrValue ) ))) + \ - Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") - closeTag = Combine(_L("") - - openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) - closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) - - return openTag, closeTag - -def makeHTMLTags(tagStr): - """Helper to construct opening and closing tag expressions for HTML, given a tag name""" - return _makeTags( tagStr, False ) - -def makeXMLTags(tagStr): - """Helper to construct opening and closing tag expressions for XML, given a tag name""" - return _makeTags( tagStr, True ) - -def withAttribute(*args,**attrDict): - """Helper to create a validating parse action to be used with start tags created - with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag - with a required attribute value, to avoid false matches on common tags such as - or
. - - Call withAttribute with a series of attribute names and values. Specify the list - of filter attributes names and values as: - - keyword arguments, as in (class="Customer",align="right"), or - - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) - For attribute names with a namespace prefix, you must use the second form. Attribute - names are matched insensitive to upper/lower case. - - To verify that the attribute exists, but without specifying a value, pass - withAttribute.ANY_VALUE as the value. - """ - if args: - attrs = args[:] - else: - attrs = attrDict.items() - attrs = [(k,v) for k,v in attrs] - def pa(s,l,tokens): - for attrName,attrValue in attrs: - if attrName not in tokens: - raise ParseException(s,l,"no matching attribute " + attrName) - if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: - raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % - (attrName, tokens[attrName], attrValue)) - return pa -withAttribute.ANY_VALUE = object() - -opAssoc = _Constants() -opAssoc.LEFT = object() -opAssoc.RIGHT = object() - -def operatorPrecedence( baseExpr, opList ): - """Helper method for constructing grammars of expressions made up of - operators working in a precedence hierarchy. Operators may be unary or - binary, left- or right-associative. Parse actions can also be attached - to operator expressions. - - Parameters: - - baseExpr - expression representing the most basic element for the nested - - opList - list of tuples, one for each operator precedence level in the - expression grammar; each tuple is of the form - (opExpr, numTerms, rightLeftAssoc, parseAction), where: - - opExpr is the pyparsing expression for the operator; - may also be a string, which will be converted to a Literal; - if numTerms is 3, opExpr is a tuple of two expressions, for the - two operators separating the 3 terms - - numTerms is the number of terms for this operator (must - be 1, 2, or 3) - - rightLeftAssoc is the indicator whether the operator is - right or left associative, using the pyparsing-defined - constants opAssoc.RIGHT and opAssoc.LEFT. - - parseAction is the parse action to be associated with - expressions matching this operator expression (the - parse action tuple member may be omitted) - """ - ret = Forward() - lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) - for i,operDef in enumerate(opList): - opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] - if arity == 3: - if opExpr is None or len(opExpr) != 2: - raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") - opExpr1, opExpr2 = opExpr - thisExpr = Forward()#.setName("expr%d" % i) - if rightLeftAssoc == opAssoc.LEFT: - if arity == 1: - matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) - else: - matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ - Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - elif rightLeftAssoc == opAssoc.RIGHT: - if arity == 1: - # try to avoid LR with this extra test - if not isinstance(opExpr, Optional): - opExpr = Optional(opExpr) - matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) - elif arity == 2: - if opExpr is not None: - matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) - else: - matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) - elif arity == 3: - matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ - Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) - else: - raise ValueError("operator must be unary (1), binary (2), or ternary (3)") - else: - raise ValueError("operator must indicate right or left associativity") - if pa: - matchExpr.setParseAction( pa ) - thisExpr << ( matchExpr | lastExpr ) - lastExpr = thisExpr - ret << lastExpr - return ret - -dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") -sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") -quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") -unicodeString = Combine(_L('u') + quotedString.copy()) - -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): - """Helper method for defining nested lists enclosed in opening and closing - delimiters ("(" and ")" are the default). - - Parameters: - - opener - opening character for a nested list (default="("); can also be a pyparsing expression - - closer - closing character for a nested list (default=")"); can also be a pyparsing expression - - content - expression for items within the nested lists (default=None) - - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) - - If an expression is not provided for the content argument, the nested - expression will capture all whitespace-delimited content between delimiters - as a list of separate values. - - Use the ignoreExpr argument to define expressions that may contain - opening or closing characters that should not be treated as opening - or closing characters for nesting, such as quotedString or a comment - expression. Specify multiple expressions using an Or or MatchFirst. - The default is quotedString, but if no expressions are to be ignored, - then pass None for this argument. - """ - if opener == closer: - raise ValueError("opening and closing strings cannot be the same") - if content is None: - if isinstance(opener,basestring) and isinstance(closer,basestring): - if ignoreExpr is not None: - content = (Combine(OneOrMore(~ignoreExpr + - CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) - ).setParseAction(lambda t:t[0].strip())) - else: - content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip())) - else: - raise ValueError("opening and closing arguments must be strings if no content expression is given") - ret = Forward() - if ignoreExpr is not None: - ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) - else: - ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) - return ret - -def indentedBlock(blockStatementExpr, indentStack, indent=True): - """Helper method for defining space-delimited indentation blocks, such as - those used to define block statements in Python source code. - - Parameters: - - blockStatementExpr - expression defining syntax of statement that - is repeated within the indented block - - indentStack - list created by caller to manage indentation stack - (multiple statementWithIndentedBlock expressions within a single grammar - should share a common indentStack) - - indent - boolean indicating whether block must be indented beyond the - the current level; set to False for block of left-most statements - (default=True) - - A valid block must contain at least one blockStatement. - """ - def checkPeerIndent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if curCol != indentStack[-1]: - if curCol > indentStack[-1]: - raise ParseFatalException(s,l,"illegal nesting") - raise ParseException(s,l,"not a peer entry") - - def checkSubIndent(s,l,t): - curCol = col(l,s) - if curCol > indentStack[-1]: - indentStack.append( curCol ) - else: - raise ParseException(s,l,"not a subentry") - - def checkUnindent(s,l,t): - if l >= len(s): return - curCol = col(l,s) - if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): - raise ParseException(s,l,"not an unindent") - indentStack.pop() - - NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) - INDENT = Empty() + Empty().setParseAction(checkSubIndent) - PEER = Empty().setParseAction(checkPeerIndent) - UNDENT = Empty().setParseAction(checkUnindent) - if indent: - smExpr = Group( Optional(NL) + - FollowedBy(blockStatementExpr) + - INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) - else: - smExpr = Group( Optional(NL) + - (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) - blockStatementExpr.ignore("\\" + LineEnd()) - return smExpr - -alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") -punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") - -anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) -commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") -_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '")) -replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None - -# it's easy to get these comment structures wrong - they're very common, so may as well make them available -cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") - -htmlComment = Regex(r"") -restOfLine = Regex(r".*").leaveWhitespace() -dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") -cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) - print ("tokens = " + str(tokens)) - print ("tokens.columns = " + str(tokens.columns)) - print ("tokens.tables = " + str(tokens.tables)) - print (tokens.asXML("SQL",True)) - except ParseBaseException,err: - print (teststring + "->") - print (err.line) - print (" "*(err.column-1) + "^") - print (err) - print() - - selectToken = CaselessLiteral( "select" ) - fromToken = CaselessLiteral( "from" ) - - ident = Word( alphas, alphanums + "_$" ) - columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) - columnNameList = Group( delimitedList( columnName ) )#.setName("columns") - tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) - tableNameList = Group( delimitedList( tableName ) )#.setName("tables") - simpleSQL = ( selectToken + \ - ( '*' | columnNameList ).setResultsName( "columns" ) + \ - fromToken + \ - tableNameList.setResultsName( "tables" ) ) - - test( "SELECT * from XYZZY, ABC" ) - test( "select * from SYS.XYZZY" ) - test( "Select A from Sys.dual" ) - test( "Select AA,BB,CC from Sys.dual" ) - test( "Select A, B, C from Sys.dual" ) - test( "Select A, B, C from Sys.dual" ) - test( "Xelect A, B, C from Sys.dual" ) - test( "Select A, B, C frox Sys.dual" ) - test( "Select" ) - test( "Select ^^^ frox Sys.dual" ) - test( "Select A, B, C from Sys.dual, Table2 " ) +# module pyparsing.py +# +# Copyright (c) 2003-2008 Paul T. McGuire +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +#from __future__ import generators + +__doc__ = \ +""" +pyparsing module - Classes and methods to define and execute parsing grammars + +The pyparsing module is an alternative approach to creating and executing simple grammars, +vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you +don't need to learn a new syntax for defining grammars or matching expressions - the parsing module +provides a library of classes that you use to construct the grammar directly in Python. + +Here is a program to parse "Hello, World!" (or any greeting of the form ", !"):: + + from pyparsing import Word, alphas + + # define grammar of a greeting + greet = Word( alphas ) + "," + Word( alphas ) + "!" + + hello = "Hello, World!" + print hello, "->", greet.parseString( hello ) + +The program outputs the following:: + + Hello, World! -> ['Hello', ',', 'World', '!'] + +The Python representation of the grammar is quite readable, owing to the self-explanatory +class names, and the use of '+', '|' and '^' operators. + +The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an +object with named attributes. + +The pyparsing module handles some of the problems that are typically vexing when writing text parsers: + - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) + - quoted strings + - embedded comments +""" + +__version__ = "1.5.1" +__versionTime__ = "2 October 2008 00:44" +__author__ = "Paul McGuire " + +import string +from weakref import ref as wkref +import copy +import sys +import warnings +import re +import sre_constants +#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) + +__all__ = [ +'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', +'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', +'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', +'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', +'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', +'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', +'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', +'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', +'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', +'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', +'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', +'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', +'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', +'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', +'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', +'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', +'indentedBlock', 'originalTextFor', +] + + +""" +Detect if we are running version 3.X and make appropriate changes +Robert A. Clark +""" +if sys.version_info[0] > 2: + _PY3K = True + _MAX_INT = sys.maxsize + basestring = str +else: + _PY3K = False + _MAX_INT = sys.maxint + +if not _PY3K: + def _ustr(obj): + """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries + str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It + then < returns the unicode object | encodes it with the default encoding | ... >. + """ + try: + # If this works, then _ustr(obj) has the same behaviour as str(obj), so + # it won't break any existing code. + return str(obj) + + except UnicodeEncodeError: + # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) + # state that "The return value must be a string object". However, does a + # unicode object (being a subclass of basestring) count as a "string + # object"? + # If so, then return a unicode object: + return unicode(obj) + # Else encode it... but how? There are many choices... :) + # Replace unprintables with escape codes? + #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') + # Replace unprintables with question marks? + #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') + # ... +else: + _ustr = str + unichr = chr + +def _str2dict(strg): + return dict( [(c,0) for c in strg] ) + #~ return set( [c for c in strg] ) + +def _xml_escape(data): + """Escape &, <, >, ", ', etc. in a string of data.""" + + # ampersand must be replaced first + from_symbols = '&><"\'' + to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] + for from_,to_ in zip(from_symbols, to_symbols): + data = data.replace(from_, to_) + return data + +class _Constants(object): + pass + +if not _PY3K: + alphas = string.lowercase + string.uppercase +else: + alphas = string.ascii_lowercase + string.ascii_uppercase +nums = string.digits +hexnums = nums + "ABCDEFabcdef" +alphanums = alphas + nums +_bslash = chr(92) +printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) + +class ParseBaseException(Exception): + """base exception class for all parsing runtime exceptions""" + __slots__ = ( "loc","msg","pstr","parserElement" ) + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, pstr, loc=0, msg=None, elem=None ): + self.loc = loc + if msg is None: + self.msg = pstr + self.pstr = "" + else: + self.msg = msg + self.pstr = pstr + self.parserElement = elem + + def __getattr__( self, aname ): + """supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + if( aname == "lineno" ): + return lineno( self.loc, self.pstr ) + elif( aname in ("col", "column") ): + return col( self.loc, self.pstr ) + elif( aname == "line" ): + return line( self.loc, self.pstr ) + else: + raise AttributeError(aname) + + def __str__( self ): + return "%s (at char %d), (line:%d, col:%d)" % \ + ( self.msg, self.loc, self.lineno, self.column ) + def __repr__( self ): + return _ustr(self) + def markInputline( self, markerString = ">!<" ): + """Extracts the exception line from the input string, and marks + the location of the exception with a special symbol. + """ + line_str = self.line + line_column = self.column - 1 + if markerString: + line_str = "".join( [line_str[:line_column], + markerString, line_str[line_column:]]) + return line_str.strip() + def __dir__(self): + return "loc msg pstr parserElement lineno col line " \ + "markInputLine __str__ __repr__".split() + +class ParseException(ParseBaseException): + """exception thrown when parse expressions don't match class; + supported attributes by name are: + - lineno - returns the line number of the exception text + - col - returns the column number of the exception text + - line - returns the line containing the exception text + """ + pass + +class ParseFatalException(ParseBaseException): + """user-throwable exception thrown when inconsistent parse content + is found; stops all parsing immediately""" + pass + +class ParseSyntaxException(ParseFatalException): + """just like ParseFatalException, but thrown internally when an + ErrorStop indicates that parsing is to stop immediately because + an unbacktrackable syntax error has been found""" + def __init__(self, pe): + super(ParseSyntaxException, self).__init__( + pe.pstr, pe.loc, pe.msg, pe.parserElement) + +#~ class ReparseException(ParseBaseException): + #~ """Experimental class - parse actions can raise this exception to cause + #~ pyparsing to reparse the input string: + #~ - with a modified input string, and/or + #~ - with a modified start location + #~ Set the values of the ReparseException in the constructor, and raise the + #~ exception in a parse action to cause pyparsing to use the new string/location. + #~ Setting the values as None causes no change to be made. + #~ """ + #~ def __init_( self, newstring, restartLoc ): + #~ self.newParseText = newstring + #~ self.reparseLoc = restartLoc + +class RecursiveGrammarException(Exception): + """exception thrown by validate() if the grammar could be improperly recursive""" + def __init__( self, parseElementList ): + self.parseElementTrace = parseElementList + + def __str__( self ): + return "RecursiveGrammarException: %s" % self.parseElementTrace + +class _ParseResultsWithOffset(object): + def __init__(self,p1,p2): + self.tup = (p1,p2) + def __getitem__(self,i): + return self.tup[i] + def __repr__(self): + return repr(self.tup) + def setOffset(self,i): + self.tup = (self.tup[0],i) + +class ParseResults(object): + """Structured parse results, to provide multiple means of access to the parsed data: + - as a list (len(results)) + - by list index (results[0], results[1], etc.) + - by attribute (results.) + """ + __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) + def __new__(cls, toklist, name=None, asList=True, modal=True ): + if isinstance(toklist, cls): + return toklist + retobj = object.__new__(cls) + retobj.__doinit = True + return retobj + + # Performance tuning: we construct a *lot* of these, so keep this + # constructor as small and fast as possible + def __init__( self, toklist, name=None, asList=True, modal=True ): + if self.__doinit: + self.__doinit = False + self.__name = None + self.__parent = None + self.__accumNames = {} + if isinstance(toklist, list): + self.__toklist = toklist[:] + else: + self.__toklist = [toklist] + self.__tokdict = dict() + + if name: + if not modal: + self.__accumNames[name] = 0 + if isinstance(name,int): + name = _ustr(name) # will always return a str, but use _ustr for consistency + self.__name = name + if not toklist in (None,'',[]): + if isinstance(toklist,basestring): + toklist = [ toklist ] + if asList: + if isinstance(toklist,ParseResults): + self[name] = _ParseResultsWithOffset(toklist.copy(),0) + else: + self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) + self[name].__name = name + else: + try: + self[name] = toklist[0] + except (KeyError,TypeError): + self[name] = toklist + + def __getitem__( self, i ): + if isinstance( i, (int,slice) ): + return self.__toklist[i] + else: + if i not in self.__accumNames: + return self.__tokdict[i][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[i] ]) + + def __setitem__( self, k, v ): + if isinstance(v,_ParseResultsWithOffset): + self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] + sub = v[0] + elif isinstance(k,int): + self.__toklist[k] = v + sub = v + else: + self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] + sub = v + if isinstance(sub,ParseResults): + sub.__parent = wkref(self) + + def __delitem__( self, i ): + if isinstance(i,(int,slice)): + mylen = len( self.__toklist ) + del self.__toklist[i] + + # convert int to slice + if isinstance(i, int): + if i < 0: + i += mylen + i = slice(i, i+1) + # get removed indices + removed = list(range(*i.indices(mylen))) + removed.reverse() + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for j in removed: + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) + else: + del self.__tokdict[i] + + def __contains__( self, k ): + return k in self.__tokdict + + def __len__( self ): return len( self.__toklist ) + def __bool__(self): return len( self.__toklist ) > 0 + __nonzero__ = __bool__ + def __iter__( self ): return iter( self.__toklist ) + def __reversed__( self ): return iter( reversed(self.__toklist) ) + def keys( self ): + """Returns all named result keys.""" + return self.__tokdict.keys() + + def pop( self, index=-1 ): + """Removes and returns item at specified index (default=last). + Will work with either numeric indices or dict-key indicies.""" + ret = self[index] + del self[index] + return ret + + def get(self, key, defaultValue=None): + """Returns named result matching the given key, or if there is no + such name, then returns the given defaultValue or None if no + defaultValue is specified.""" + if key in self: + return self[key] + else: + return defaultValue + + def insert( self, index, insStr ): + self.__toklist.insert(index, insStr) + # fixup indices in token dictionary + for name in self.__tokdict: + occurrences = self.__tokdict[name] + for k, (value, position) in enumerate(occurrences): + occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) + + def items( self ): + """Returns all named result keys and values as a list of tuples.""" + return [(k,self[k]) for k in self.__tokdict] + + def values( self ): + """Returns all named result values.""" + return [ v[-1][0] for v in self.__tokdict.values() ] + + def __getattr__( self, name ): + if name not in self.__slots__: + if name in self.__tokdict: + if name not in self.__accumNames: + return self.__tokdict[name][-1][0] + else: + return ParseResults([ v[0] for v in self.__tokdict[name] ]) + else: + return "" + return None + + def __add__( self, other ): + ret = self.copy() + ret += other + return ret + + def __iadd__( self, other ): + if other.__tokdict: + offset = len(self.__toklist) + addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) + otheritems = other.__tokdict.items() + otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) + for (k,vlist) in otheritems for v in vlist] + for k,v in otherdictitems: + self[k] = v + if isinstance(v[0],ParseResults): + v[0].__parent = wkref(self) + + self.__toklist += other.__toklist + self.__accumNames.update( other.__accumNames ) + del other + return self + + def __repr__( self ): + return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) + + def __str__( self ): + out = "[" + sep = "" + for i in self.__toklist: + if isinstance(i, ParseResults): + out += sep + _ustr(i) + else: + out += sep + repr(i) + sep = ", " + out += "]" + return out + + def _asStringList( self, sep='' ): + out = [] + for item in self.__toklist: + if out and sep: + out.append(sep) + if isinstance( item, ParseResults ): + out += item._asStringList() + else: + out.append( _ustr(item) ) + return out + + def asList( self ): + """Returns the parse results as a nested list of matching tokens, all converted to strings.""" + out = [] + for res in self.__toklist: + if isinstance(res,ParseResults): + out.append( res.asList() ) + else: + out.append( res ) + return out + + def asDict( self ): + """Returns the named parse results as dictionary.""" + return dict( self.items() ) + + def copy( self ): + """Returns a new copy of a ParseResults object.""" + ret = ParseResults( self.__toklist ) + ret.__tokdict = self.__tokdict.copy() + ret.__parent = self.__parent + ret.__accumNames.update( self.__accumNames ) + ret.__name = self.__name + return ret + + def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): + """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" + nl = "\n" + out = [] + namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() + for v in vlist ] ) + nextLevelIndent = indent + " " + + # collapse out indents if formatting is not desired + if not formatted: + indent = "" + nextLevelIndent = "" + nl = "" + + selfTag = None + if doctag is not None: + selfTag = doctag + else: + if self.__name: + selfTag = self.__name + + if not selfTag: + if namedItemsOnly: + return "" + else: + selfTag = "ITEM" + + out += [ nl, indent, "<", selfTag, ">" ] + + worklist = self.__toklist + for i,res in enumerate(worklist): + if isinstance(res,ParseResults): + if i in namedItems: + out += [ res.asXML(namedItems[i], + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + out += [ res.asXML(None, + namedItemsOnly and doctag is None, + nextLevelIndent, + formatted)] + else: + # individual token, see if there is a name for it + resTag = None + if i in namedItems: + resTag = namedItems[i] + if not resTag: + if namedItemsOnly: + continue + else: + resTag = "ITEM" + xmlBodyText = _xml_escape(_ustr(res)) + out += [ nl, nextLevelIndent, "<", resTag, ">", + xmlBodyText, + "" ] + + out += [ nl, indent, "" ] + return "".join(out) + + def __lookup(self,sub): + for k,vlist in self.__tokdict.items(): + for v,loc in vlist: + if sub is v: + return k + return None + + def getName(self): + """Returns the results name for this token expression.""" + if self.__name: + return self.__name + elif self.__parent: + par = self.__parent() + if par: + return par.__lookup(self) + else: + return None + elif (len(self) == 1 and + len(self.__tokdict) == 1 and + self.__tokdict.values()[0][0][1] in (0,-1)): + return self.__tokdict.keys()[0] + else: + return None + + def dump(self,indent='',depth=0): + """Diagnostic method for listing out the contents of a ParseResults. + Accepts an optional indent argument so that this string can be embedded + in a nested display of other data.""" + out = [] + out.append( indent+_ustr(self.asList()) ) + keys = self.items() + keys.sort() + for k,v in keys: + if out: + out.append('\n') + out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) + if isinstance(v,ParseResults): + if v.keys(): + #~ out.append('\n') + out.append( v.dump(indent,depth+1) ) + #~ out.append('\n') + else: + out.append(_ustr(v)) + else: + out.append(_ustr(v)) + #~ out.append('\n') + return "".join(out) + + # add support for pickle protocol + def __getstate__(self): + return ( self.__toklist, + ( self.__tokdict.copy(), + self.__parent is not None and self.__parent() or None, + self.__accumNames, + self.__name ) ) + + def __setstate__(self,state): + self.__toklist = state[0] + self.__tokdict, \ + par, \ + inAccumNames, \ + self.__name = state[1] + self.__accumNames = {} + self.__accumNames.update(inAccumNames) + if par is not None: + self.__parent = wkref(par) + else: + self.__parent = None + + def __dir__(self): + return dir(super(ParseResults,self)) + self.keys() + +def col (loc,strg): + """Returns current column within a string, counting newlines as line separators. + The first column is number 1. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{ParserElement.parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return (loc} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + return strg.count("\n",0,loc) + 1 + +def line( loc, strg ): + """Returns the line of text containing loc within a string, counting newlines as line separators. + """ + lastCR = strg.rfind("\n", 0, loc) + nextCR = strg.find("\n", loc) + if nextCR > 0: + return strg[lastCR+1:nextCR] + else: + return strg[lastCR+1:] + +def _defaultStartDebugAction( instring, loc, expr ): + print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + +def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): + print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) + +def _defaultExceptionDebugAction( instring, loc, expr, exc ): + print ("Exception raised:" + _ustr(exc)) + +def nullDebugAction(*args): + """'Do-nothing' debug action, to suppress debugging output during parsing.""" + pass + +class ParserElement(object): + """Abstract base level parser element class.""" + DEFAULT_WHITE_CHARS = " \n\t\r" + + def setDefaultWhitespaceChars( chars ): + """Overrides the default whitespace chars + """ + ParserElement.DEFAULT_WHITE_CHARS = chars + setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) + + def __init__( self, savelist=False ): + self.parseAction = list() + self.failAction = None + #~ self.name = "" # don't define self.name, let subclasses try/except upcall + self.strRepr = None + self.resultsName = None + self.saveAsList = savelist + self.skipWhitespace = True + self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + self.copyDefaultWhiteChars = True + self.mayReturnEmpty = False # used when checking for left-recursion + self.keepTabs = False + self.ignoreExprs = list() + self.debug = False + self.streamlined = False + self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index + self.errmsg = "" + self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) + self.debugActions = ( None, None, None ) #custom debug actions + self.re = None + self.callPreparse = True # used to avoid redundant calls to preParse + self.callDuringTry = False + + def copy( self ): + """Make a copy of this ParserElement. Useful for defining different parse actions + for the same parsing pattern, using copies of the original parse element.""" + cpy = copy.copy( self ) + cpy.parseAction = self.parseAction[:] + cpy.ignoreExprs = self.ignoreExprs[:] + if self.copyDefaultWhiteChars: + cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS + return cpy + + def setName( self, name ): + """Define name for this expression, for use in debugging.""" + self.name = name + self.errmsg = "Expected " + self.name + if hasattr(self,"exception"): + self.exception.msg = self.errmsg + return self + + def setResultsName( self, name, listAllMatches=False ): + """Define name for referencing matching tokens as a nested attribute + of the returned parse results. + NOTE: this returns a *copy* of the original ParserElement object; + this is so that the client can define a basic element, such as an + integer, and reference it in multiple places with different names. + """ + newself = self.copy() + newself.resultsName = name + newself.modalResults = not listAllMatches + return newself + + def setBreak(self,breakFlag = True): + """Method to invoke the Python pdb debugger when this element is + about to be parsed. Set breakFlag to True to enable, False to + disable. + """ + if breakFlag: + _parseMethod = self._parse + def breaker(instring, loc, doActions=True, callPreParse=True): + import pdb + pdb.set_trace() + return _parseMethod( instring, loc, doActions, callPreParse ) + breaker._originalParseMethod = _parseMethod + self._parse = breaker + else: + if hasattr(self._parse,"_originalParseMethod"): + self._parse = self._parse._originalParseMethod + return self + + def _normalizeParseActionArgs( f ): + """Internal method used to decorate parse actions that take fewer than 3 arguments, + so that all parse actions can be called as f(s,l,t).""" + STAR_ARGS = 4 + + try: + restore = None + if isinstance(f,type): + restore = f + f = f.__init__ + if not _PY3K: + codeObj = f.func_code + else: + codeObj = f.code + if codeObj.co_flags & STAR_ARGS: + return f + numargs = codeObj.co_argcount + if not _PY3K: + if hasattr(f,"im_self"): + numargs -= 1 + else: + if hasattr(f,"__self__"): + numargs -= 1 + if restore: + f = restore + except AttributeError: + try: + if not _PY3K: + call_im_func_code = f.__call__.im_func.func_code + else: + call_im_func_code = f.__code__ + + # not a function, must be a callable object, get info from the + # im_func binding of its bound __call__ method + if call_im_func_code.co_flags & STAR_ARGS: + return f + numargs = call_im_func_code.co_argcount + if not _PY3K: + if hasattr(f.__call__,"im_self"): + numargs -= 1 + else: + if hasattr(f.__call__,"__self__"): + numargs -= 0 + except AttributeError: + if not _PY3K: + call_func_code = f.__call__.func_code + else: + call_func_code = f.__call__.__code__ + # not a bound method, get info directly from __call__ method + if call_func_code.co_flags & STAR_ARGS: + return f + numargs = call_func_code.co_argcount + if not _PY3K: + if hasattr(f.__call__,"im_self"): + numargs -= 1 + else: + if hasattr(f.__call__,"__self__"): + numargs -= 1 + + + #~ print ("adding function %s with %d args" % (f.func_name,numargs)) + if numargs == 3: + return f + else: + if numargs > 3: + def tmp(s,l,t): + return f(f.__call__.__self__, s,l,t) + if numargs == 2: + def tmp(s,l,t): + return f(l,t) + elif numargs == 1: + def tmp(s,l,t): + return f(t) + else: #~ numargs == 0: + def tmp(s,l,t): + return f() + try: + tmp.__name__ = f.__name__ + except (AttributeError,TypeError): + # no need for special handling if attribute doesnt exist + pass + try: + tmp.__doc__ = f.__doc__ + except (AttributeError,TypeError): + # no need for special handling if attribute doesnt exist + pass + try: + tmp.__dict__.update(f.__dict__) + except (AttributeError,TypeError): + # no need for special handling if attribute doesnt exist + pass + return tmp + _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) + + def setParseAction( self, *fns, **kwargs ): + """Define action to perform when successfully matching parse element definition. + Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), + fn(loc,toks), fn(toks), or just fn(), where: + - s = the original string being parsed (see note below) + - loc = the location of the matching substring + - toks = a list of the matched tokens, packaged as a ParseResults object + If the functions in fns modify the tokens, they can return them as the return + value from fn, and the modified list of tokens will replace the original. + Otherwise, fn does not need to return any value. + + Note: the default parsing behavior is to expand tabs in the input string + before starting the parsing process. See L{I{parseString}} for more information + on parsing strings containing s, and suggested methods to maintain a + consistent view of the parsed string, the parse location, and line and column + positions within the parsed string. + """ + self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) + self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def addParseAction( self, *fns, **kwargs ): + """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" + self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) + self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) + return self + + def setFailAction( self, fn ): + """Define action to perform if parsing fails at this expression. + Fail acton fn is a callable function that takes the arguments + fn(s,loc,expr,err) where: + - s = string being parsed + - loc = location where expression match was attempted and failed + - expr = the parse expression that failed + - err = the exception thrown + The function returns no value. It may throw ParseFatalException + if it is desired to stop parsing immediately.""" + self.failAction = fn + return self + + def _skipIgnorables( self, instring, loc ): + exprsFound = True + while exprsFound: + exprsFound = False + for e in self.ignoreExprs: + try: + while 1: + loc,dummy = e._parse( instring, loc ) + exprsFound = True + except ParseException: + pass + return loc + + def preParse( self, instring, loc ): + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + + if self.skipWhitespace: + wt = self.whiteChars + instrlen = len(instring) + while loc < instrlen and instring[loc] in wt: + loc += 1 + + return loc + + def parseImpl( self, instring, loc, doActions=True ): + return loc, [] + + def postParse( self, instring, loc, tokenlist ): + return tokenlist + + #~ @profile + def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): + debugging = ( self.debug ) #and doActions ) + + if debugging or self.failAction: + #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) + if (self.debugActions[0] ): + self.debugActions[0]( instring, loc, self ) + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = loc + try: + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + except ParseBaseException, err: + #~ print ("Exception raised:", err) + if self.debugActions[2]: + self.debugActions[2]( instring, tokensStart, self, err ) + if self.failAction: + self.failAction( instring, tokensStart, self, err ) + raise + else: + if callPreParse and self.callPreparse: + preloc = self.preParse( instring, loc ) + else: + preloc = loc + tokensStart = loc + if self.mayIndexError or loc >= len(instring): + try: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + except IndexError: + raise ParseException( instring, len(instring), self.errmsg, self ) + else: + loc,tokens = self.parseImpl( instring, preloc, doActions ) + + tokens = self.postParse( instring, loc, tokens ) + + retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) + if self.parseAction and (doActions or self.callDuringTry): + if debugging: + try: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + except ParseBaseException, err: + #~ print "Exception raised in user parse action:", err + if (self.debugActions[2] ): + self.debugActions[2]( instring, tokensStart, self, err ) + raise + else: + for fn in self.parseAction: + tokens = fn( instring, tokensStart, retTokens ) + if tokens is not None: + retTokens = ParseResults( tokens, + self.resultsName, + asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), + modal=self.modalResults ) + + if debugging: + #~ print ("Matched",self,"->",retTokens.asList()) + if (self.debugActions[1] ): + self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) + + return loc, retTokens + + def tryParse( self, instring, loc ): + try: + return self._parse( instring, loc, doActions=False )[0] + except ParseFatalException: + raise ParseException( instring, loc, self.errmsg, self) + + # this method gets repeatedly called during backtracking with the same arguments - + # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression + def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): + lookup = (self,instring,loc,callPreParse,doActions) + if lookup in ParserElement._exprArgCache: + value = ParserElement._exprArgCache[ lookup ] + if isinstance(value,Exception): + raise value + return value + else: + try: + value = self._parseNoCache( instring, loc, doActions, callPreParse ) + ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) + return value + except ParseBaseException, pe: + ParserElement._exprArgCache[ lookup ] = pe + raise + + _parse = _parseNoCache + + # argument cache for optimizing repeated calls when backtracking through recursive expressions + _exprArgCache = {} + def resetCache(): + ParserElement._exprArgCache.clear() + resetCache = staticmethod(resetCache) + + _packratEnabled = False + def enablePackrat(): + """Enables "packrat" parsing, which adds memoizing to the parsing logic. + Repeated parse attempts at the same string location (which happens + often in many complex grammars) can immediately return a cached value, + instead of re-executing parsing/validating code. Memoizing is done of + both valid results and parsing exceptions. + + This speedup may break existing programs that use parse actions that + have side-effects. For this reason, packrat parsing is disabled when + you first import pyparsing. To activate the packrat feature, your + program must call the class method ParserElement.enablePackrat(). If + your program uses psyco to "compile as you go", you must call + enablePackrat before calling psyco.full(). If you do not do this, + Python will crash. For best results, call enablePackrat() immediately + after importing pyparsing. + """ + if not ParserElement._packratEnabled: + ParserElement._packratEnabled = True + ParserElement._parse = ParserElement._parseCache + enablePackrat = staticmethod(enablePackrat) + + def parseString( self, instring, parseAll=False ): + """Execute the parse expression with the given string. + This is the main interface to the client code, once the complete + expression has been built. + + If you want the grammar to require that the entire input string be + successfully parsed, then set parseAll to True (equivalent to ending + the grammar with StringEnd()). + + Note: parseString implicitly calls expandtabs() on the input string, + in order to report proper column numbers in parse actions. + If the input string contains tabs and + the grammar uses parse actions that use the loc argument to index into the + string being parsed, you can ensure you have a consistent view of the input + string by: + - calling parseWithTabs on your grammar before calling parseString + (see L{I{parseWithTabs}}) + - define your parse action using the full (s,loc,toks) signature, and + reference the input string using the parse action's s argument + - explictly expand the tabs in your input string before calling + parseString + """ + ParserElement.resetCache() + if not self.streamlined: + self.streamline() + #~ self.saveAsList = True + for e in self.ignoreExprs: + e.streamline() + if not self.keepTabs: + instring = instring.expandtabs() + loc, tokens = self._parse( instring, 0 ) + if parseAll: + loc = self.preParse( instring, loc ) + StringEnd()._parse( instring, loc ) + return tokens + + def scanString( self, instring, maxMatches=_MAX_INT ): + """Scan the input string for expression matches. Each match will return the + matching tokens, start location, and end location. May be called with optional + maxMatches argument, to clip scanning after 'n' matches are found. + + Note that the start and end locations are reported relative to the string + being parsed. See L{I{parseString}} for more information on parsing + strings with embedded tabs.""" + if not self.streamlined: + self.streamline() + for e in self.ignoreExprs: + e.streamline() + + if not self.keepTabs: + instring = _ustr(instring).expandtabs() + instrlen = len(instring) + loc = 0 + preparseFn = self.preParse + parseFn = self._parse + ParserElement.resetCache() + matches = 0 + while loc <= instrlen and matches < maxMatches: + try: + preloc = preparseFn( instring, loc ) + nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) + except ParseException: + loc = preloc+1 + else: + matches += 1 + yield tokens, preloc, nextLoc + loc = nextLoc + + def transformString( self, instring ): + """Extension to scanString, to modify matching text with modified tokens that may + be returned from a parse action. To use transformString, define a grammar and + attach a parse action to it that modifies the returned token list. + Invoking transformString() on a target string will then scan for matches, + and replace the matched text patterns according to the logic in the parse + action. transformString() returns the resulting transformed string.""" + out = [] + lastE = 0 + # force preservation of s, to minimize unwanted transformation of string, and to + # keep string locs straight between transformString and scanString + self.keepTabs = True + for t,s,e in self.scanString( instring ): + out.append( instring[lastE:s] ) + if t: + if isinstance(t,ParseResults): + out += t.asList() + elif isinstance(t,list): + out += t + else: + out.append(t) + lastE = e + out.append(instring[lastE:]) + return "".join(map(_ustr,out)) + + def searchString( self, instring, maxMatches=_MAX_INT ): + """Another extension to scanString, simplifying the access to the tokens found + to match the given parse expression. May be called with optional + maxMatches argument, to clip searching after 'n' matches are found. + """ + return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) + + def __add__(self, other ): + """Implementation of + operator - returns And""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, other ] ) + + def __radd__(self, other ): + """Implementation of + operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other + self + + def __sub__(self, other): + """Implementation of - operator, returns And with error stop""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return And( [ self, And._ErrorStop(), other ] ) + + def __rsub__(self, other ): + """Implementation of - operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other - self + + def __mul__(self,other): + if isinstance(other,int): + minElements, optElements = other,0 + elif isinstance(other,tuple): + other = (other + (None, None))[:2] + if other[0] is None: + other = (0, other[1]) + if isinstance(other[0],int) and other[1] is None: + if other[0] == 0: + return ZeroOrMore(self) + if other[0] == 1: + return OneOrMore(self) + else: + return self*other[0] + ZeroOrMore(self) + elif isinstance(other[0],int) and isinstance(other[1],int): + minElements, optElements = other + optElements -= minElements + else: + raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) + else: + raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) + + if minElements < 0: + raise ValueError("cannot multiply ParserElement by negative value") + if optElements < 0: + raise ValueError("second tuple value must be greater or equal to first tuple value") + if minElements == optElements == 0: + raise ValueError("cannot multiply ParserElement by 0 or (0,0)") + + if (optElements): + def makeOptionalList(n): + if n>1: + return Optional(self + makeOptionalList(n-1)) + else: + return Optional(self) + if minElements: + if minElements == 1: + ret = self + makeOptionalList(optElements) + else: + ret = And([self]*minElements) + makeOptionalList(optElements) + else: + ret = makeOptionalList(optElements) + else: + if minElements == 1: + ret = self + else: + ret = And([self]*minElements) + return ret + + def __rmul__(self, other): + return self.__mul__(other) + + def __or__(self, other ): + """Implementation of | operator - returns MatchFirst""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return MatchFirst( [ self, other ] ) + + def __ror__(self, other ): + """Implementation of | operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other | self + + def __xor__(self, other ): + """Implementation of ^ operator - returns Or""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Or( [ self, other ] ) + + def __rxor__(self, other ): + """Implementation of ^ operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other ^ self + + def __and__(self, other ): + """Implementation of & operator - returns Each""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return Each( [ self, other ] ) + + def __rand__(self, other ): + """Implementation of & operator when left operand is not a ParserElement""" + if isinstance( other, basestring ): + other = Literal( other ) + if not isinstance( other, ParserElement ): + warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), + SyntaxWarning, stacklevel=2) + return None + return other & self + + def __invert__( self ): + """Implementation of ~ operator - returns NotAny""" + return NotAny( self ) + + def __call__(self, name): + """Shortcut for setResultsName, with listAllMatches=default:: + userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") + could be written as:: + userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") + """ + return self.setResultsName(name) + + def suppress( self ): + """Suppresses the output of this ParserElement; useful to keep punctuation from + cluttering up returned output. + """ + return Suppress( self ) + + def leaveWhitespace( self ): + """Disables the skipping of whitespace before matching the characters in the + ParserElement's defined pattern. This is normally only used internally by + the pyparsing module, but may be needed in some whitespace-sensitive grammars. + """ + self.skipWhitespace = False + return self + + def setWhitespaceChars( self, chars ): + """Overrides the default whitespace chars + """ + self.skipWhitespace = True + self.whiteChars = chars + self.copyDefaultWhiteChars = False + return self + + def parseWithTabs( self ): + """Overrides default behavior to expand s to spaces before parsing the input string. + Must be called before parseString when the input grammar contains elements that + match characters.""" + self.keepTabs = True + return self + + def ignore( self, other ): + """Define expression to be ignored (e.g., comments) while doing pattern + matching; may be called repeatedly, to define multiple comment or other + ignorable patterns. + """ + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + self.ignoreExprs.append( other ) + else: + self.ignoreExprs.append( Suppress( other ) ) + return self + + def setDebugActions( self, startAction, successAction, exceptionAction ): + """Enable display of debugging messages while doing pattern matching.""" + self.debugActions = (startAction or _defaultStartDebugAction, + successAction or _defaultSuccessDebugAction, + exceptionAction or _defaultExceptionDebugAction) + self.debug = True + return self + + def setDebug( self, flag=True ): + """Enable display of debugging messages while doing pattern matching. + Set flag to True to enable, False to disable.""" + if flag: + self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) + else: + self.debug = False + return self + + def __str__( self ): + return self.name + + def __repr__( self ): + return _ustr(self) + + def streamline( self ): + self.streamlined = True + self.strRepr = None + return self + + def checkRecursion( self, parseElementList ): + pass + + def validate( self, validateTrace=[] ): + """Check defined expressions for valid structure, check for infinite recursive definitions.""" + self.checkRecursion( [] ) + + def parseFile( self, file_or_filename, parseAll=False ): + """Execute the parse expression on the given file or filename. + If a filename is specified (instead of a file object), + the entire file is opened, read, and closed before parsing. + """ + try: + file_contents = file_or_filename.read() + except AttributeError: + f = open(file_or_filename, "rb") + file_contents = f.read() + f.close() + return self.parseString(file_contents, parseAll) + + def getException(self): + return ParseException("",0,self.errmsg,self) + + def __getattr__(self,aname): + if aname == "myException": + self.myException = ret = self.getException(); + return ret; + else: + raise AttributeError("no such attribute " + aname) + + def __eq__(self,other): + if isinstance(other, basestring): + try: + (self + StringEnd()).parseString(_ustr(other)) + return True + except ParseBaseException: + return False + else: + return super(ParserElement,self)==other + + def __ne__(self,other): + return not (self == other) + + def __hash__(self): + return hash(id(self)) + + def __req__(self,other): + return self == other + + def __rne__(self,other): + return not (self == other) + + +class Token(ParserElement): + """Abstract ParserElement subclass, for defining atomic matching patterns.""" + def __init__( self ): + super(Token,self).__init__( savelist=False ) + #self.myException = ParseException("",0,"",self) + + def setName(self, name): + s = super(Token,self).setName(name) + self.errmsg = "Expected " + self.name + #s.myException.msg = self.errmsg + return s + + +class Empty(Token): + """An empty token, will always match.""" + def __init__( self ): + super(Empty,self).__init__() + self.name = "Empty" + self.mayReturnEmpty = True + self.mayIndexError = False + + +class NoMatch(Token): + """A token that will never match.""" + def __init__( self ): + super(NoMatch,self).__init__() + self.name = "NoMatch" + self.mayReturnEmpty = True + self.mayIndexError = False + self.errmsg = "Unmatchable token" + #self.myException.msg = self.errmsg + + def parseImpl( self, instring, loc, doActions=True ): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + +class Literal(Token): + """Token to exactly match a specified string.""" + def __init__( self, matchString ): + super(Literal,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Literal; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.__class__ = Empty + self.name = '"%s"' % _ustr(self.match) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + #self.myException.msg = self.errmsg + self.mayIndexError = False + + # Performance tuning: this routine gets called a *lot* + # if this is a single character match string and the first character matches, + # short-circuit as quickly as possible, and avoid calling startswith + #~ @profile + def parseImpl( self, instring, loc, doActions=True ): + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc +_L = Literal + +class Keyword(Token): + """Token to exactly match a specified string as a keyword, that is, it must be + immediately followed by a non-keyword character. Compare with Literal:: + Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. + Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' + Accepts two optional constructor arguments in addition to the keyword string: + identChars is a string of characters that would be valid identifier characters, + defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive + matching, default is False. + """ + DEFAULT_KEYWORD_CHARS = alphanums+"_$" + + def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): + super(Keyword,self).__init__() + self.match = matchString + self.matchLen = len(matchString) + try: + self.firstMatchChar = matchString[0] + except IndexError: + warnings.warn("null string passed to Keyword; use Empty() instead", + SyntaxWarning, stacklevel=2) + self.name = '"%s"' % self.match + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = False + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.caseless = caseless + if caseless: + self.caselessmatch = matchString.upper() + identChars = identChars.upper() + self.identChars = _str2dict(identChars) + + def parseImpl( self, instring, loc, doActions=True ): + if self.caseless: + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and + (loc == 0 or instring[loc-1].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + else: + if (instring[loc] == self.firstMatchChar and + (self.matchLen==1 or instring.startswith(self.match,loc)) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and + (loc == 0 or instring[loc-1] not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + def copy(self): + c = super(Keyword,self).copy() + c.identChars = Keyword.DEFAULT_KEYWORD_CHARS + return c + + def setDefaultKeywordChars( chars ): + """Overrides the default Keyword chars + """ + Keyword.DEFAULT_KEYWORD_CHARS = chars + setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) + +class CaselessLiteral(Literal): + """Token to match a specified string, ignoring case of letters. + Note: the matched results will always be in the case of the given + match string, NOT the case of the input text. + """ + def __init__( self, matchString ): + super(CaselessLiteral,self).__init__( matchString.upper() ) + # Preserve the defining literal. + self.returnString = matchString + self.name = "'%s'" % self.returnString + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + + def parseImpl( self, instring, loc, doActions=True ): + if instring[ loc:loc+self.matchLen ].upper() == self.match: + return loc+self.matchLen, self.returnString + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class CaselessKeyword(Keyword): + def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): + super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) + + def parseImpl( self, instring, loc, doActions=True ): + if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and + (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): + return loc+self.matchLen, self.match + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Word(Token): + """Token for matching words composed of allowed character sets. + Defined with string containing all allowed initial characters, + an optional string containing allowed body characters (if omitted, + defaults to the initial character set), and an optional minimum, + maximum, and/or exact length. The default value for min is 1 (a + minimum value < 1 is not valid); the default values for max and exact + are 0, meaning no maximum or exact length restriction. + """ + def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): + super(Word,self).__init__() + self.initCharsOrig = initChars + self.initChars = _str2dict(initChars) + if bodyChars : + self.bodyCharsOrig = bodyChars + self.bodyChars = _str2dict(bodyChars) + else: + self.bodyCharsOrig = initChars + self.bodyChars = _str2dict(initChars) + + self.maxSpecified = max > 0 + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.asKeyword = asKeyword + + if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): + if self.bodyCharsOrig == self.initCharsOrig: + self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) + elif len(self.bodyCharsOrig) == 1: + self.reString = "%s[%s]*" % \ + (re.escape(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + else: + self.reString = "[%s][%s]*" % \ + (_escapeRegexRangeChars(self.initCharsOrig), + _escapeRegexRangeChars(self.bodyCharsOrig),) + if self.asKeyword: + self.reString = r"\b"+self.reString+r"\b" + try: + self.re = re.compile( self.reString ) + except: + self.re = None + + def parseImpl( self, instring, loc, doActions=True ): + if self.re: + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + return loc,result.group() + + if not(instring[ loc ] in self.initChars): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + instrlen = len(instring) + bodychars = self.bodyChars + maxloc = start + self.maxLen + maxloc = min( maxloc, instrlen ) + while loc < maxloc and instring[loc] in bodychars: + loc += 1 + + throwException = False + if loc - start < self.minLen: + throwException = True + if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: + throwException = True + if self.asKeyword: + if (start>0 and instring[start-1] in bodychars) or (loc4: + return s[:4]+"..." + else: + return s + + if ( self.initCharsOrig != self.bodyCharsOrig ): + self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) + else: + self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) + + return self.strRepr + + +class Regex(Token): + """Token for matching strings that match a given regular expression. + Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. + """ + def __init__( self, pattern, flags=0): + """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" + super(Regex,self).__init__() + + if len(pattern) == 0: + warnings.warn("null string passed to Regex; use Empty() instead", + SyntaxWarning, stacklevel=2) + + self.pattern = pattern + self.flags = flags + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = self.re.match(instring,loc) + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + d = result.groupdict() + ret = ParseResults(result.group()) + if d: + for k in d: + ret[k] = d[k] + return loc,ret + + def __str__( self ): + try: + return super(Regex,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "Re:(%s)" % repr(self.pattern) + + return self.strRepr + + +class QuotedString(Token): + """Token for matching strings that are delimited by quoting characters. + """ + def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): + """ + Defined with the following parameters: + - quoteChar - string of one or more characters defining the quote delimiting string + - escChar - character to escape quotes, typically backslash (default=None) + - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) + - multiline - boolean indicating whether quotes can span multiple lines (default=False) + - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) + - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) + """ + super(QuotedString,self).__init__() + + # remove white space from quote chars - wont work anyway + quoteChar = quoteChar.strip() + if len(quoteChar) == 0: + warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + if endQuoteChar is None: + endQuoteChar = quoteChar + else: + endQuoteChar = endQuoteChar.strip() + if len(endQuoteChar) == 0: + warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) + raise SyntaxError() + + self.quoteChar = quoteChar + self.quoteCharLen = len(quoteChar) + self.firstQuoteChar = quoteChar[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + self.pattern = r'%s(?:[^%s%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + else: + self.flags = 0 + self.pattern = r'%s(?:[^%s\n\r%s]' % \ + ( re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) + if len(self.endQuoteChar) > 1: + self.pattern += ( + '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' + ) + if escQuote: + self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + if escChar: + self.pattern += (r'|(?:%s.)' % re.escape(escChar)) + self.escCharReplacePattern = re.escape(self.escChar)+"(.)" + self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None + if not result: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen:-self.endQuoteCharLen] + + if isinstance(ret,basestring): + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + def __str__( self ): + try: + return super(QuotedString,self).__str__() + except: + pass + + if self.strRepr is None: + self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) + + return self.strRepr + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given set. + Defined with string containing all disallowed characters, and an optional + minimum, maximum, and/or exact length. The default value for min is 1 (a + minimum value < 1 is not valid); the default values for max and exact + are 0, meaning no maximum or exact length restriction. + """ + def __init__( self, notChars, min=1, max=0, exact=0 ): + super(CharsNotIn,self).__init__() + self.skipWhitespace = False + self.notChars = notChars + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = ( self.minLen == 0 ) + #self.myException.msg = self.errmsg + self.mayIndexError = False + + def parseImpl( self, instring, loc, doActions=True ): + if instring[loc] in self.notChars: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + start = loc + loc += 1 + notchars = self.notChars + maxlen = min( start+self.maxLen, len(instring) ) + while loc < maxlen and \ + (instring[loc] not in notchars): + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + def __str__( self ): + try: + return super(CharsNotIn, self).__str__() + except: + pass + + if self.strRepr is None: + if len(self.notChars) > 4: + self.strRepr = "!W:(%s...)" % self.notChars[:4] + else: + self.strRepr = "!W:(%s)" % self.notChars + + return self.strRepr + +class White(Token): + """Special matching class for matching whitespace. Normally, whitespace is ignored + by pyparsing grammars. This class is included when some whitespace structures + are significant. Define with a string containing the whitespace characters to be + matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, + as defined for the Word class.""" + whiteStrs = { + " " : "", + "\t": "", + "\n": "", + "\r": "", + "\f": "", + } + def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): + super(White,self).__init__() + self.matchWhite = ws + self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) + #~ self.leaveWhitespace() + self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + #self.myException.msg = self.errmsg + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def parseImpl( self, instring, loc, doActions=True ): + if not(instring[ loc ] in self.matchWhite): + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min( maxloc, len(instring) ) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + #~ raise ParseException( instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + + return loc, instring[start:loc] + + +class _PositionToken(Token): + def __init__( self ): + super(_PositionToken,self).__init__() + self.name=self.__class__.__name__ + self.mayReturnEmpty = True + self.mayIndexError = False + +class GoToColumn(_PositionToken): + """Token to advance to a specific column of input text; useful for tabular report scraping.""" + def __init__( self, colno ): + super(GoToColumn,self).__init__() + self.col = colno + + def preParse( self, instring, loc ): + if col(loc,instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables( instring, loc ) + while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + thiscol = col( loc, instring ) + if thiscol > self.col: + raise ParseException( instring, loc, "Text not in expected column", self ) + newloc = loc + self.col - thiscol + ret = instring[ loc: newloc ] + return newloc, ret + +class LineStart(_PositionToken): + """Matches if current position is at the beginning of a line within the parse string""" + def __init__( self ): + super(LineStart,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected start of line" + #self.myException.msg = self.errmsg + + def preParse( self, instring, loc ): + preloc = super(LineStart,self).preParse(instring,loc) + if instring[preloc] == "\n": + loc += 1 + return loc + + def parseImpl( self, instring, loc, doActions=True ): + if not( loc==0 or + (loc == self.preParse( instring, 0 )) or + (instring[loc-1] == "\n") ): #col(loc, instring) != 1: + #~ raise ParseException( instring, loc, "Expected start of line" ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class LineEnd(_PositionToken): + """Matches if current position is at the end of a line within the parse string""" + def __init__( self ): + super(LineEnd,self).__init__() + self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) + self.errmsg = "Expected end of line" + #self.myException.msg = self.errmsg + + def parseImpl( self, instring, loc, doActions=True ): + if loc len(instring): + return loc, [] + else: + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class WordStart(_PositionToken): + """Matches if the current position is at the beginning of a Word, and + is not preceded by any character in a given set of wordChars + (default=printables). To emulate the \b behavior of regular expressions, + use WordStart(alphanums). WordStart will also match at the beginning of + the string being parsed, or at the beginning of a line. + """ + def __init__(self, wordChars = printables): + super(WordStart,self).__init__() + self.wordChars = _str2dict(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True ): + if loc != 0: + if (instring[loc-1] in self.wordChars or + instring[loc] not in self.wordChars): + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + +class WordEnd(_PositionToken): + """Matches if the current position is at the end of a Word, and + is not followed by any character in a given set of wordChars + (default=printables). To emulate the \b behavior of regular expressions, + use WordEnd(alphanums). WordEnd will also match at the end of + the string being parsed, or at the end of a line. + """ + def __init__(self, wordChars = printables): + super(WordEnd,self).__init__() + self.wordChars = _str2dict(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True ): + instrlen = len(instring) + if instrlen>0 and loc maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + else: + if loc2 > maxMatchLoc: + maxMatchLoc = loc2 + maxMatchExp = e + + if maxMatchLoc < 0: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + return maxMatchExp._parse( instring, loc, doActions ) + + def __ixor__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #Or( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class MatchFirst(ParseExpression): + """Requires that at least one ParseExpression is found. + If two expressions match, the first one listed is the one that will match. + May be constructed using the '|' operator. + """ + def __init__( self, exprs, savelist = False ): + super(MatchFirst,self).__init__(exprs, savelist) + if exprs: + self.mayReturnEmpty = False + for e in self.exprs: + if e.mayReturnEmpty: + self.mayReturnEmpty = True + break + else: + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + maxExcLoc = -1 + maxException = None + for e in self.exprs: + try: + ret = e._parse( instring, loc, doActions ) + return ret + except ParseException, err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring,len(instring),e.errmsg,self) + maxExcLoc = len(instring) + + # only got here if no expression matched, raise exception for match that made it the furthest + else: + if maxException is not None: + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + def __ior__(self, other ): + if isinstance( other, basestring ): + other = Literal( other ) + return self.append( other ) #MatchFirst( [ self, other ] ) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class Each(ParseExpression): + """Requires all given ParseExpressions to be found, but in any order. + Expressions may be separated by whitespace. + May be constructed using the '&' operator. + """ + def __init__( self, exprs, savelist = True ): + super(Each,self).__init__(exprs, savelist) + self.mayReturnEmpty = True + for e in self.exprs: + if not e.mayReturnEmpty: + self.mayReturnEmpty = False + break + self.skipWhitespace = True + self.initExprGroups = True + + def parseImpl( self, instring, loc, doActions=True ): + if self.initExprGroups: + self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] + self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] + self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] + self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] + self.required += self.multirequired + self.initExprGroups = False + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + matchOrder = [] + + keepMatching = True + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired + failed = [] + for e in tmpExprs: + try: + tmpLoc = e.tryParse( instring, tmpLoc ) + except ParseException: + failed.append(e) + else: + matchOrder.append(e) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + if tmpReqd: + missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) + raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) + + # add any unmatched Optionals, in case they have default values defined + matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ] + + resultlist = [] + for e in matchOrder: + loc,results = e._parse(instring,loc,doActions) + resultlist.append(results) + + finalResults = ParseResults([]) + for r in resultlist: + dups = {} + for k in r.keys(): + if k in finalResults.keys(): + tmp = ParseResults(finalResults[k]) + tmp += ParseResults(r[k]) + dups[k] = tmp + finalResults += ParseResults(r) + for k,v in dups.items(): + finalResults[k] = v + return loc, finalResults + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" + + return self.strRepr + + def checkRecursion( self, parseElementList ): + subRecCheckList = parseElementList[:] + [ self ] + for e in self.exprs: + e.checkRecursion( subRecCheckList ) + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" + def __init__( self, expr, savelist=False ): + super(ParseElementEnhance,self).__init__(savelist) + if isinstance( expr, basestring ): + expr = Literal(expr) + self.expr = expr + self.strRepr = None + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.setWhitespaceChars( expr.whiteChars ) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def parseImpl( self, instring, loc, doActions=True ): + if self.expr is not None: + return self.expr._parse( instring, loc, doActions, callPreParse=False ) + else: + raise ParseException("",loc,self.errmsg,self) + + def leaveWhitespace( self ): + self.skipWhitespace = False + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leaveWhitespace() + return self + + def ignore( self, other ): + if isinstance( other, Suppress ): + if other not in self.ignoreExprs: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + else: + super( ParseElementEnhance, self).ignore( other ) + if self.expr is not None: + self.expr.ignore( self.ignoreExprs[-1] ) + return self + + def streamline( self ): + super(ParseElementEnhance,self).streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def checkRecursion( self, parseElementList ): + if self in parseElementList: + raise RecursiveGrammarException( parseElementList+[self] ) + subRecCheckList = parseElementList[:] + [ self ] + if self.expr is not None: + self.expr.checkRecursion( subRecCheckList ) + + def validate( self, validateTrace=[] ): + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion( [] ) + + def __str__( self ): + try: + return super(ParseElementEnhance,self).__str__() + except: + pass + + if self.strRepr is None and self.expr is not None: + self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) + return self.strRepr + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. FollowedBy + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression matches at the current + position. FollowedBy always returns a null token list.""" + def __init__( self, expr ): + super(FollowedBy,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + self.expr.tryParse( instring, loc ) + return loc, [] + + +class NotAny(ParseElementEnhance): + """Lookahead to disallow matching with the given parse expression. NotAny + does *not* advance the parsing position within the input string, it only + verifies that the specified parse expression does *not* match at the current + position. Also, NotAny does *not* skip over leading whitespace. NotAny + always returns a null token list. May be constructed using the '~' operator.""" + def __init__( self, expr ): + super(NotAny,self).__init__(expr) + #~ self.leaveWhitespace() + self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, "+_ustr(self.expr) + #self.myException = ParseException("",0,self.errmsg,self) + + def parseImpl( self, instring, loc, doActions=True ): + try: + self.expr.tryParse( instring, loc ) + except (ParseException,IndexError): + pass + else: + #~ raise ParseException(instring, loc, self.errmsg ) + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + return loc, [] + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "~{" + _ustr(self.expr) + "}" + + return self.strRepr + + +class ZeroOrMore(ParseElementEnhance): + """Optional repetition of zero or more of the given expression.""" + def __init__( self, expr ): + super(ZeroOrMore,self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + tokens = [] + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + + +class OneOrMore(ParseElementEnhance): + """Repetition of one or more of the given expression.""" + def parseImpl( self, instring, loc, doActions=True ): + # must be at least one + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + try: + hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) + while 1: + if hasIgnoreExprs: + preloc = self._skipIgnorables( instring, loc ) + else: + preloc = loc + loc, tmptokens = self.expr._parse( instring, preloc, doActions ) + if tmptokens or tmptokens.keys(): + tokens += tmptokens + except (ParseException,IndexError): + pass + + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + _ustr(self.expr) + "}..." + + return self.strRepr + + def setResultsName( self, name, listAllMatches=False ): + ret = super(OneOrMore,self).setResultsName(name,listAllMatches) + ret.saveAsList = True + return ret + +class _NullToken(object): + def __bool__(self): + return False + __nonzero__ = __bool__ + def __str__(self): + return "" + +_optionalNotMatched = _NullToken() +class Optional(ParseElementEnhance): + """Optional matching of the given expression. + A default return string can also be specified, if the optional expression + is not found. + """ + def __init__( self, exprs, default=_optionalNotMatched ): + super(Optional,self).__init__( exprs, savelist=False ) + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl( self, instring, loc, doActions=True ): + try: + loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) + except (ParseException,IndexError): + if self.defaultValue is not _optionalNotMatched: + if self.expr.resultsName: + tokens = ParseResults([ self.defaultValue ]) + tokens[self.expr.resultsName] = self.defaultValue + else: + tokens = [ self.defaultValue ] + else: + tokens = [] + return loc, tokens + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]" + + return self.strRepr + + +class SkipTo(ParseElementEnhance): + """Token for skipping over all undefined text until the matched expression is found. + If include is set to true, the matched expression is also consumed. The ignore + argument is used to define grammars (typically quoted strings and comments) that + might contain false matches. + """ + def __init__( self, other, include=False, ignore=None, failOn=None ): + super( SkipTo, self ).__init__( other ) + if ignore is not None: + self.expr = self.expr.copy() + self.expr.ignore(ignore) + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.asList = False + if failOn is not None and isinstance(failOn, basestring): + self.failOn = Literal(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for "+_ustr(self.expr) + #self.myException = ParseException("",0,self.errmsg,self) + + def parseImpl( self, instring, loc, doActions=True ): + startLoc = loc + instrlen = len(instring) + expr = self.expr + failParse = False + while loc <= instrlen: + try: + if self.failOn: + failParse = True + self.failOn.tryParse(instring, loc) + failParse = False + loc = expr._skipIgnorables( instring, loc ) + expr._parse( instring, loc, doActions=False, callPreParse=False ) + skipText = instring[startLoc:loc] + if self.includeMatch: + loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) + if mat: + skipRes = ParseResults( skipText ) + skipRes += mat + return loc, [ skipRes ] + else: + return loc, [ skipText ] + else: + return loc, [ skipText ] + except (ParseException,IndexError): + if failParse: + raise + else: + loc += 1 + exc = self.myException + exc.loc = loc + exc.pstr = instring + raise exc + +class Forward(ParseElementEnhance): + """Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the Forward variable using the '<<' operator. + + Note: take care when assigning to Forward not to overlook precedence of operators. + Specifically, '|' has a lower precedence than '<<', so that:: + fwdExpr << a | b | c + will actually be evaluated as:: + (fwdExpr << a) | b | c + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the Forward:: + fwdExpr << (a | b | c) + """ + def __init__( self, other=None ): + super(Forward,self).__init__( other, savelist=False ) + + def __lshift__( self, other ): + if isinstance( other, basestring ): + other = Literal(other) + self.expr = other + self.mayReturnEmpty = other.mayReturnEmpty + self.strRepr = None + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.setWhitespaceChars( self.expr.whiteChars ) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + return None + + def leaveWhitespace( self ): + self.skipWhitespace = False + return self + + def streamline( self ): + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate( self, validateTrace=[] ): + if self not in validateTrace: + tmp = validateTrace[:]+[self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion([]) + + def __str__( self ): + if hasattr(self,"name"): + return self.name + + self._revertClass = self.__class__ + self.__class__ = _ForwardNoRecurse + try: + if self.expr is not None: + retString = _ustr(self.expr) + else: + retString = "None" + finally: + self.__class__ = self._revertClass + return self.__class__.__name__ + ": " + retString + + def copy(self): + if self.expr is not None: + return super(Forward,self).copy() + else: + ret = Forward() + ret << self + return ret + +class _ForwardNoRecurse(Forward): + def __str__( self ): + return "..." + +class TokenConverter(ParseElementEnhance): + """Abstract subclass of ParseExpression, for converting parsed results.""" + def __init__( self, expr, savelist=False ): + super(TokenConverter,self).__init__( expr )#, savelist ) + self.saveAsList = False + +class Upcase(TokenConverter): + """Converter to upper case all matching tokens.""" + def __init__(self, *args): + super(Upcase,self).__init__(*args) + warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", + DeprecationWarning,stacklevel=2) + + def postParse( self, instring, loc, tokenlist ): + return list(map( string.upper, tokenlist )) + + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the input string; + this can be disabled by specifying 'adjacent=False' in the constructor. + """ + def __init__( self, expr, joinString="", adjacent=True ): + super(Combine,self).__init__( expr ) + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leaveWhitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + + def ignore( self, other ): + if self.adjacent: + ParserElement.ignore(self, other) + else: + super( Combine, self).ignore( other ) + return self + + def postParse( self, instring, loc, tokenlist ): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) + + if self.resultsName and len(retToks.keys())>0: + return [ retToks ] + else: + return retToks + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" + def __init__( self, expr ): + super(Group,self).__init__( expr ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + return [ tokenlist ] + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also as a dictionary. + Each element can also be referenced using the first token in the expression as its key. + Useful for tabular report scraping when the first column can be used as a item key. + """ + def __init__( self, exprs ): + super(Dict,self).__init__( exprs ) + self.saveAsList = True + + def postParse( self, instring, loc, tokenlist ): + for i,tok in enumerate(tokenlist): + if len(tok) == 0: + continue + ikey = tok[0] + if isinstance(ikey,int): + ikey = _ustr(tok[0]).strip() + if len(tok)==1: + tokenlist[ikey] = _ParseResultsWithOffset("",i) + elif len(tok)==2 and not isinstance(tok[1],ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) + else: + dictvalue = tok.copy() #ParseResults(i) + del dictvalue[0] + if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) + + if self.resultsName: + return [ tokenlist ] + else: + return tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression.""" + def postParse( self, instring, loc, tokenlist ): + return [] + + def suppress( self ): + return self + + +class OnlyOnce(object): + """Wrapper for parse actions, to ensure they are only called once.""" + def __init__(self, methodCall): + self.callable = ParserElement._normalizeParseActionArgs(methodCall) + self.called = False + def __call__(self,s,l,t): + if not self.called: + results = self.callable(s,l,t) + self.called = True + return results + raise ParseException(s,l,"") + def reset(self): + self.called = False + +def traceParseAction(f): + """Decorator for debugging parse actions.""" + f = ParserElement._normalizeParseActionArgs(f) + def z(*paArgs): + thisFunc = f.func_name + s,l,t = paArgs[-3:] + if len(paArgs)>3: + thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) + try: + ret = f(*paArgs) + except Exception, exc: + sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) + try: + if len(symbols)==len("".join(symbols)): + return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) + else: + return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) + except: + warnings.warn("Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, stacklevel=2) + + + # last resort, just use MatchFirst + return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) + +def dictOf( key, value ): + """Helper to easily and clearly define a dictionary by specifying the respective patterns + for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens + in the proper order. The key pattern can include delimiting markers or punctuation, + as long as they are suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the Dict results can include named token + fields. + """ + return Dict( ZeroOrMore( Group ( key + value ) ) ) + +def originalTextFor(expr, asString=True): + """Helper to return the original, untokenized text for a given expression. Useful to + restore the parsed fields of an HTML start tag into the raw tag text itself, or to + revert separate tokens with intervening whitespace back to the original matching + input text. Simpler to use than the parse action keepOriginalText, and does not + require the inspect module to chase up the call stack. By default, returns a + string containing the original parsed text. + + If the optional asString argument is passed as False, then the return value is a + ParseResults containing any results names that were originally matched, and a + single token containing the original matched text from the input string. So if + the expression passed to originalTextFor contains expressions with defined + results names, you must set asString to False if you want to preserve those + results name values.""" + locMarker = Empty().setParseAction(lambda s,loc,t: loc) + matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") + if asString: + extractText = lambda s,l,t: s[t._original_start:t._original_end] + else: + def extractText(s,l,t): + del t[:] + t.insert(0, s[t._original_start:t._original_end]) + del t["_original_start"] + del t["_original_end"] + matchExpr.setParseAction(extractText) + return matchExpr + +# convenience constants for positional expressions +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") +stringStart = StringStart().setName("stringStart") +stringEnd = StringEnd().setName("stringEnd") + +_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) +_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) +_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) +_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) +_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) +_charRange = Group(_singleChar + Suppress("-") + _singleChar) +_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" + +_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) + +def srange(s): + r"""Helper to easily define string ranges for use in Word construction. Borrows + syntax from regexp '[]' string range definitions:: + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + The input string must be enclosed in []'s, and the returned string is the expanded + character set joined into a single string. + The values enclosed in the []'s may be:: + a single character + an escaped character with a leading backslash (such as \- or \]) + an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) + an escaped octal character with a leading '\0' (\041, which is a '!' character) + a range of any of the above, separated by a dash ('a-z', etc.) + any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) + """ + try: + return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) + except: + return "" + +def matchOnlyAtCol(n): + """Helper method for defining parse actions that require matching at a specific + column in the input text. + """ + def verifyCol(strg,locn,toks): + if col(locn,strg) != n: + raise ParseException(strg,locn,"matched token not at column %d" % n) + return verifyCol + +def replaceWith(replStr): + """Helper method for common parse actions that simply return a literal value. Especially + useful when used with transformString(). + """ + def _replFunc(*args): + return [replStr] + return _replFunc + +def removeQuotes(s,l,t): + """Helper parse action for removing quotation marks from parsed quoted strings. + To use, add this parse action to quoted string using:: + quotedString.setParseAction( removeQuotes ) + """ + return t[0][1:-1] + +def upcaseTokens(s,l,t): + """Helper parse action to convert tokens to upper case.""" + return [ tt.upper() for tt in map(_ustr,t) ] + +def downcaseTokens(s,l,t): + """Helper parse action to convert tokens to lower case.""" + return [ tt.lower() for tt in map(_ustr,t) ] + +def keepOriginalText(s,startLoc,t): + """Helper parse action to preserve original parsed text, + overriding any nested parse actions.""" + try: + endloc = getTokensEndLoc() + except ParseException: + raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") + del t[:] + t += ParseResults(s[startLoc:endloc]) + return t + +def getTokensEndLoc(): + """Method to be called from within a parse action to determine the end + location of the parsed tokens.""" + import inspect + fstack = inspect.stack() + try: + # search up the stack (through intervening argument normalizers) for correct calling routine + for f in fstack[2:]: + if f[3] == "_parseNoCache": + endloc = f[0].f_locals["loc"] + return endloc + else: + raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") + finally: + del fstack + +def _makeTags(tagStr, xml): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr,basestring): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas,alphanums+"_-:") + if (xml): + tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) + openTag = Suppress("<") + tagStr + \ + Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + else: + printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) + tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) + openTag = Suppress("<") + tagStr + \ + Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ + Optional( Suppress("=") + tagAttrValue ) ))) + \ + Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") + closeTag = Combine(_L("") + + openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) + closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) + + return openTag, closeTag + +def makeHTMLTags(tagStr): + """Helper to construct opening and closing tag expressions for HTML, given a tag name""" + return _makeTags( tagStr, False ) + +def makeXMLTags(tagStr): + """Helper to construct opening and closing tag expressions for XML, given a tag name""" + return _makeTags( tagStr, True ) + +def withAttribute(*args,**attrDict): + """Helper to create a validating parse action to be used with start tags created + with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag + with a required attribute value, to avoid false matches on common tags such as + or
. + + Call withAttribute with a series of attribute names and values. Specify the list + of filter attributes names and values as: + - keyword arguments, as in (class="Customer",align="right"), or + - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) + For attribute names with a namespace prefix, you must use the second form. Attribute + names are matched insensitive to upper/lower case. + + To verify that the attribute exists, but without specifying a value, pass + withAttribute.ANY_VALUE as the value. + """ + if args: + attrs = args[:] + else: + attrs = attrDict.items() + attrs = [(k,v) for k,v in attrs] + def pa(s,l,tokens): + for attrName,attrValue in attrs: + if attrName not in tokens: + raise ParseException(s,l,"no matching attribute " + attrName) + if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) + return pa +withAttribute.ANY_VALUE = object() + +opAssoc = _Constants() +opAssoc.LEFT = object() +opAssoc.RIGHT = object() + +def operatorPrecedence( baseExpr, opList ): + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary or + binary, left- or right-associative. Parse actions can also be attached + to operator expressions. + + Parameters: + - baseExpr - expression representing the most basic element for the nested + - opList - list of tuples, one for each operator precedence level in the + expression grammar; each tuple is of the form + (opExpr, numTerms, rightLeftAssoc, parseAction), where: + - opExpr is the pyparsing expression for the operator; + may also be a string, which will be converted to a Literal; + if numTerms is 3, opExpr is a tuple of two expressions, for the + two operators separating the 3 terms + - numTerms is the number of terms for this operator (must + be 1, 2, or 3) + - rightLeftAssoc is the indicator whether the operator is + right or left associative, using the pyparsing-defined + constants opAssoc.RIGHT and opAssoc.LEFT. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the + parse action tuple member may be omitted) + """ + ret = Forward() + lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) + for i,operDef in enumerate(opList): + opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] + if arity == 3: + if opExpr is None or len(opExpr) != 2: + raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") + opExpr1, opExpr2 = opExpr + thisExpr = Forward()#.setName("expr%d" % i) + if rightLeftAssoc == opAssoc.LEFT: + if arity == 1: + matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) + else: + matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ + Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + elif rightLeftAssoc == opAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Optional): + opExpr = Optional(opExpr) + matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) + elif arity == 2: + if opExpr is not None: + matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) + else: + matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) + elif arity == 3: + matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ + Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + else: + raise ValueError("operator must indicate right or left associativity") + if pa: + matchExpr.setParseAction( pa ) + thisExpr << ( matchExpr | lastExpr ) + lastExpr = thisExpr + ret << lastExpr + return ret + +dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") +sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") +quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()) + +def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): + """Helper method for defining nested lists enclosed in opening and closing + delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list (default="("); can also be a pyparsing expression + - closer - closing character for a nested list (default=")"); can also be a pyparsing expression + - content - expression for items within the nested lists (default=None) + - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) + + If an expression is not provided for the content argument, the nested + expression will capture all whitespace-delimited content between delimiters + as a list of separate values. + + Use the ignoreExpr argument to define expressions that may contain + opening or closing characters that should not be treated as opening + or closing characters for nesting, such as quotedString or a comment + expression. Specify multiple expressions using an Or or MatchFirst. + The default is quotedString, but if no expressions are to be ignored, + then pass None for this argument. + """ + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener,basestring) and isinstance(closer,basestring): + if len(opener) == 1 and len(closer)==1: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t:t[0].strip())) + else: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) + ).setParseAction(lambda t:t[0].strip())) + else: + raise ValueError("opening and closing arguments must be strings if no content expression is given") + ret = Forward() + if ignoreExpr is not None: + ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) + else: + ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) + return ret + +def indentedBlock(blockStatementExpr, indentStack, indent=True): + """Helper method for defining space-delimited indentation blocks, such as + those used to define block statements in Python source code. + + Parameters: + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single grammar + should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond the + the current level; set to False for block of left-most statements + (default=True) + + A valid block must contain at least one blockStatement. + """ + def checkPeerIndent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseFatalException(s,l,"illegal nesting") + raise ParseException(s,l,"not a peer entry") + + def checkSubIndent(s,l,t): + curCol = col(l,s) + if curCol > indentStack[-1]: + indentStack.append( curCol ) + else: + raise ParseException(s,l,"not a subentry") + + def checkUnindent(s,l,t): + if l >= len(s): return + curCol = col(l,s) + if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): + raise ParseException(s,l,"not an unindent") + indentStack.pop() + + NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) + INDENT = Empty() + Empty().setParseAction(checkSubIndent) + PEER = Empty().setParseAction(checkPeerIndent) + UNDENT = Empty().setParseAction(checkUnindent) + if indent: + smExpr = Group( Optional(NL) + + FollowedBy(blockStatementExpr) + + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) + else: + smExpr = Group( Optional(NL) + + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) +commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";") +_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) +replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") + +htmlComment = Regex(r"") +restOfLine = Regex(r".*").leaveWhitespace() +dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") +cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) + print ("tokens = " + str(tokens)) + print ("tokens.columns = " + str(tokens.columns)) + print ("tokens.tables = " + str(tokens.tables)) + print (tokens.asXML("SQL",True)) + except ParseBaseException,err: + print (teststring + "->") + print (err.line) + print (" "*(err.column-1) + "^") + print (err) + print() + + selectToken = CaselessLiteral( "select" ) + fromToken = CaselessLiteral( "from" ) + + ident = Word( alphas, alphanums + "_$" ) + columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + columnNameList = Group( delimitedList( columnName ) )#.setName("columns") + tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) + tableNameList = Group( delimitedList( tableName ) )#.setName("tables") + simpleSQL = ( selectToken + \ + ( '*' | columnNameList ).setResultsName( "columns" ) + \ + fromToken + \ + tableNameList.setResultsName( "tables" ) ) + + test( "SELECT * from XYZZY, ABC" ) + test( "select * from SYS.XYZZY" ) + test( "Select A from Sys.dual" ) + test( "Select AA,BB,CC from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Select A, B, C from Sys.dual" ) + test( "Xelect A, B, C from Sys.dual" ) + test( "Select A, B, C frox Sys.dual" ) + test( "Select" ) + test( "Select ^^^ frox Sys.dual" ) + test( "Select A, B, C from Sys.dual, Table2 " ) diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index 0c6e48131e..faeb65c193 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -21,7 +21,8 @@ recipe_modules = ['recipe_' + r for r in ( 'linux_magazine', 'telegraph_uk', 'utne', 'sciencedaily', 'forbes', 'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik', 'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet', - 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', + 'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de', + )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware.py b/src/calibre/web/feeds/recipes/recipe_tomshardware.py index 657a6911f7..b0b155666f 100644 --- a/src/calibre/web/feeds/recipes/recipe_tomshardware.py +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware.py @@ -6,7 +6,6 @@ __copyright__ = '2008, Darko Miletic ' tomshardware.com ''' -from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.web.feeds.recipes import BasicNewsRecipe class Tomshardware(BasicNewsRecipe): @@ -50,7 +49,7 @@ class Tomshardware(BasicNewsRecipe): rmain, rsep, article_id = main.rpartition(',') tmain, tsep, trest = rmain.rpartition('/reviews/') if tsep: - return 'http://www.tomshardware.com/review_print.php?p1=' + article_id + return 'http://www.tomshardware.com/review_print.php?p1=' + article_id return 'http://www.tomshardware.com/news_print.php?p1=' + article_id def preprocess_html(self, soup): diff --git a/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py new file mode 100644 index 0000000000..78d820e246 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_tomshardware_de.py @@ -0,0 +1,54 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch tomshardware. +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + + +class TomsHardwareDe(BasicNewsRecipe): + + title = 'Tom\'s Hardware German' + description = 'Computer news in german' + __author__ = 'Oliver Niesner' + use_embedded_content = False + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 50 + no_stylesheets = True + encoding = 'utf-8' + + #preprocess_regexps = \ +# [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in +# [ +# (r'<84>', lambda match: ''), +# (r'<93>', lambda match: ''), +# ] +# ] + + remove_tags = [dict(id='outside-advert'), + dict(id='advertRightWhite'), + dict(id='header-advert'), + dict(id='header-banner'), + dict(id='header-menu'), + dict(id='header-top'), + dict(id='header-tools'), + dict(id='nbComment'), + dict(id='internalSidebar'), + dict(id='header-news-infos'), + dict(id='breadcrumbs'), + dict(id=''), + dict(name='div', attrs={'class':'pyjama'}), + dict(name='href', attrs={'class':'comment'}), + dict(name='div', attrs={'class':'greyBoxR clearfix'}), + dict(name='div', attrs={'class':'greyBoxL clearfix'}), + dict(name='div', attrs={'class':'greyBox clearfix'}), + dict(id='')] + #remove_tags_before = [dict(id='header-news-title')] + remove_tags_after = [dict(name='div', attrs={'class':'news-elm'})] + #remove_tags_after = [dict(name='div', attrs={'class':'intelliTXT'})] + + feeds = [ ('tomshardware', 'http://www.tomshardware.com/de/feeds/rss2/tom-s-hardware-de,12-1.xml') ] +