Merge branch 'misc-fixups' of https://github.com/eli-schwartz/calibre

2025-12-03 11:45:01 -05:00 · 2021-12-03 07:46:23 +05:30 · 2021-12-03 07:46:23 +05:30 · 1372fe239b
commit 1372fe239b
parent 7837389034 d5fa341d13
23 changed files with 55 additions and 602 deletions
--- a/bypy/sources.json
+++ b/bypy/sources.json
@ -632,6 +632,16 @@
        }
    },
    {
        "name": "sgmllib3k",
        "python": 3,
        "unix": {
            "filename": "sgmllib3k-1.0.0.tar.gz",
            "hash": "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9",
            "urls": ["pypi"]
        }
    },
    {
        "name": "markdown",
        "unix": {
--- a/setup/test.py
+++ b/setup/test.py
@ -8,7 +8,7 @@ import subprocess
 import sys
 import unittest
-from setup import SRC, Command, islinux, ismacos, iswindows
+from setup import SRC, Command, isbsd, islinux, ismacos, iswindows
 TEST_MODULES = frozenset('srv db polish opf css docx cfi matcher icu smartypants build misc dbcli ebooks'.split())
@ -50,8 +50,10 @@ class TestImports(unittest.TestCase):
            exclude_modules |= {
                    'calibre.linux',
                    'calibre.utils.linux_trash', 'calibre.utils.open_with.linux',
-                    'calibre.gui2.linux_file_dialogs', 'calibre.devices.usbms.hal',
+                    'calibre.gui2.linux_file_dialogs',
            }
        if not isbsd:
            exclude_modules.add('calibre.devices.usbms.hal')
        self.assertGreater(self.base_check(os.path.join(SRC, 'odf'), exclude_packages, exclude_modules), 10)
        base = os.path.join(SRC, 'calibre')
        self.assertGreater(self.base_check(base, exclude_packages, exclude_modules), 1000)
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -750,7 +750,7 @@ def initialize_plugins(perf=False):
                times[plugin.name] = time.time() - st
            _initialized_plugins.append(plugin)
        except:
-            print('Failed to initialize plugin:', repr(zfp))
+            print('Failed to initialize plugin:', repr(zfp), file=sys.stderr)
            if DEBUG:
                traceback.print_exc()
    # Prevent a custom plugin from overriding stdout/stderr as this breaks
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@ -182,7 +182,8 @@ class Convert:
                        indent = float(style.text_indent[:-2]) + indent
                    style.text_indent = '%.3gpt' % indent
                    parent.text = tabs[-1].tail or ''
-                    list(map(parent.remove, tabs))
+                    for i in tabs:
                        parent.remove(i)
        self.images.rid_map = orig_rid_map
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -65,8 +65,8 @@ class TAGX:  # {{{
        '''
        TAGX block for the Primary index header of a periodical
        '''
-        list(map(self.add_tag, (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,
+        for i in (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,73, 0):
-            73, 0)))
+            self.add_tag(i)
        return self.header(2) + bytes(self.byts)
    @property
@ -74,7 +74,8 @@ class TAGX:  # {{{
        '''
        TAGX block for the secondary index header of a periodical
        '''
-        list(map(self.add_tag, (11, 0)))
+        for i in (11, 0):
            self.add_tag(i)
        return self.header(1) + bytes(self.byts)
    @property
@ -82,7 +83,8 @@ class TAGX:  # {{{
        '''
        TAGX block for the primary index header of a flat book
        '''
-        list(map(self.add_tag, (1, 2, 3, 4, 0)))
+        for i in (1, 2, 3, 4, 0):
            self.add_tag(i)
        return self.header(1) + bytes(self.byts)
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@ -65,7 +65,8 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
    for b in bodies:
        for x in b:
            body.append(x)
-    tuple(map(root.append, (head, body)))
+    for x in (head, body):
        root.append(x)
    if log is not None:
        log.warn('Merging multiple <head> and <body> sections')
    return root
--- a/src/calibre/ebooks/oeb/polish/check/css.py
+++ b/src/calibre/ebooks/oeb/polish/check/css.py
@ -221,7 +221,8 @@ class Pool:
            if not sip.isdeleted(x):
                sip.delete(x)
-        tuple(map(safe_delete, self.workers))
+        for i in self.workers:
            safe_delete(i)
        self.workers = []
--- a/src/calibre/ebooks/oeb/polish/check/links.py
+++ b/src/calibre/ebooks/oeb/polish/check/links.py
@ -410,7 +410,8 @@ def check_external_links(container, progress_callback=(lambda num, total:None),
        return []
    items = Queue()
    ans = []
-    tuple(map(items.put, iteritems(external_links)))
+    for el in iteritems(external_links):
        items.put(el)
    progress_callback(0, len(external_links))
    done = []
    downloaded_html_ids = {}
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@ -825,7 +825,8 @@ class Container(ContainerBase):  # {{{
        imap = {name:item_id for item_id, name in iteritems(imap)}
        items = [item for item, name, linear in self.spine_iter]
        tail, last_tail = (items[0].tail, items[-1].tail) if items else ('\n    ', '\n  ')
-        tuple(map(self.remove_from_xml, items))
+        for i in items:
            self.remove_from_xml(i)
        spine = self.opf_xpath('//opf:spine')[0]
        spine.text = tail
        for name, linear in spine_items:
--- a/src/calibre/ebooks/oeb/polish/replace.py
+++ b/src/calibre/ebooks/oeb/polish/replace.py
@ -384,7 +384,8 @@ def remove_links_to(container, predicate):
            removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
        if removed:
            changed.add(name)
-    tuple(map(container.dirty, changed))
+    for i in changed:
        container.dirty(i)
    return changed
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@ -455,8 +455,8 @@ def add_anchors_markup(root, uuid, anchors):
            # rescale the viewport
            a.tail = '\n'
        div.append(a)
-    a.count = 0
+    for anchor in anchors:
-    tuple(map(a, anchors))
+        a(anchor)
    a(uuid)
--- a/src/calibre/gui2/icon_theme.py
+++ b/src/calibre/gui2/icon_theme.py
@ -490,7 +490,8 @@ def get_cover(metadata):
 def get_covers(themes, dialog, num_of_workers=8):
    items = Queue()
-    tuple(map(items.put, themes))
+    for i in themes:
        items.put(i)
    def callback(metadata, x):
        if not sip.isdeleted(dialog) and not dialog.dialog_closed:
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@ -305,7 +305,7 @@ def main(args=sys.argv, logger=None):
        return 1
    pid = os.fork() if (islinux or isbsd) else -1
    if pid <= 0:
-        override = 'calibre-lrf-viewer' if islinux else None
+        override = 'calibre-lrfviewer' if islinux else None
        app = Application(args, override_program_name=override)
        app.setWindowIcon(QIcon(I('viewer.png')))
        opts = normalize_settings(parser, opts)
--- a/src/calibre/gui2/preferences/server.py
+++ b/src/calibre/gui2/preferences/server.py
@ -217,7 +217,7 @@ class Choices(QComboBox):
        self.setEditable(False)
        opt = options[name]
        self.choices = opt.choices
-        tuple(map(self.addItem, opt.choices))
+        self.addItems(opt.choices)
        self.currentIndexChanged.connect(self.changed_signal.emit)
        init_opt(self, opt, layout)
--- a/src/calibre/gui2/tag_browser/ui.py
+++ b/src/calibre/gui2/tag_browser/ui.py
@ -600,8 +600,8 @@ class TagBrowserBar(QWidget):  # {{{
        find_shown = self.toggle_search_button.isChecked()
        self.toggle_search_button.setVisible(not find_shown)
        l = self.layout()
-        items = [l.itemAt(i) for i in range(l.count())]
+        for i in (l.itemAt(i) for i in range(l.count())):
-        tuple(map(l.removeItem, items))
+            l.removeItem(i)
        if find_shown:
            l.addWidget(self.alter_tb)
            self.alter_tb.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonIconOnly)
--- a/src/calibre/gui2/tweak_book/boss.py
+++ b/src/calibre/gui2/tweak_book/boss.py
@ -391,7 +391,8 @@ class Boss(QObject):
            if ef:
                if isinstance(ef, str):
                    ef = [ef]
-                tuple(map(self.gui.file_list.request_edit, ef))
+                for i in ef:
                    self.gui.file_list.request_edit(i)
            else:
                if tprefs['restore_book_state']:
                    self.restore_book_edit_state()
--- a/src/calibre/gui2/tweak_book/file_list.py
+++ b/src/calibre/gui2/tweak_book/file_list.py
@ -1129,7 +1129,8 @@ class MergeDialog(QDialog):  # {{{
        buttons = self.buttons = [QRadioButton(n) for n in names]
        buttons[0].setChecked(True)
-        tuple(map(w.l.addWidget, buttons))
+        for i in buttons:
            w.l.addWidget(i)
        sa.setWidget(w)
        self.resize(self.sizeHint() + QSize(150, 20))
--- a/src/calibre/gui2/tweak_book/main.py
+++ b/src/calibre/gui2/tweak_book/main.py
@ -70,7 +70,7 @@ def _run(args, notify=None):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    decouple('edit-book-'), set_gui_prefs(tprefs)
-    override = 'calibre-edit-book' if islinux else None
+    override = 'calibre-ebook-edit' if islinux else None
    app = Application(args, override_program_name=override, color_prefs=tprefs, windows_app_uid=EDITOR_APP_UID)
    app.file_event_hook = EventAccumulator()
    app.load_builtin_fonts()
--- a/src/calibre/gui2/viewer/ui.py
+++ b/src/calibre/gui2/viewer/ui.py
@ -431,6 +431,7 @@ class EbookViewer(MainWindow):
        if not hasattr(set_book_path, 'pathtoebook'):
            error_dialog(self, _('Cannot print book'), _(
                'No book is currently open'), show=True)
            return
        from .printing import print_book
        print_book(set_book_path.pathtoebook, book_title=self.current_book_data['metadata']['title'], parent=self)
--- a/src/calibre/srv/http_response.py
+++ b/src/calibre/srv/http_response.py
@ -254,7 +254,8 @@ class RequestData:  # {{{
    def filesystem_file_with_custom_etag(self, output, *etag_parts):
        etag = hashlib.sha1()
-        tuple(map(lambda x:etag.update(str(x).encode('utf-8')), etag_parts))
+        for i in etag_parts:
            etag.update(str(i).encode('utf-8'))
        return ETaggedFile(output, etag.hexdigest())
    def filesystem_file_with_constant_etag(self, output, etag_as_hexencoded_string):
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@ -194,7 +194,8 @@ def toc_anchor_map(toc):
        if name and node['id'] not in seen_map[name]:
            ans[name].append({'id':node['id'], 'frag':node['frag']})
            seen_map[name].add(node['id'])
-        tuple(map(process_node, node['children']))
+        for i in node['children']:
            process_node(i)
    process_node(toc)
    return dict(ans)
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@ -266,14 +266,15 @@ def main(args):
    for c in chars:
        if '-' in c:
-            parts = [x.strip() for x in c.split('-')]
+            parts = tuple(x.strip() for x in c.split('-'))
            if len(parts) != 2:
                prints('Invalid range:', c, file=sys.stderr)
                raise SystemExit(1)
            if opts.codes:
                parts = tuple(map(conv_code, parts))
-            tuple(map(not_single, parts))
+            for i in parts:
-            ranges.add(tuple(parts))
+                not_single(i)
            ranges.add(parts)
        else:
            if opts.codes:
                c = conv_code(c)
--- a/src/sgmllib.py
+++ b/src/sgmllib.py
@ -1,574 +0,0 @@
 """A parser for SGML, using the derived class as a static DTD."""
 # Needed for feedparser under python 3 where this module has been removed
 # XXX This only supports those SGML features used by HTML.
 # XXX There should be a way to distinguish between PCDATA (parsed
 # character data -- the normal case), RCDATA (replaceable character
 # data -- only char and entity references and end tags are special)
 # and CDATA (character data -- only end tags are special).  RCDATA is
 # not supported at all.
 import _markupbase
 import re
 __all__ = ["SGMLParser", "SGMLParseError"]
 # Regular expressions used for parsing
 interesting = re.compile('[&<]')
 incomplete = re.compile(
    '&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
    '<([a-zA-Z][^<>]*|'
    '/([a-zA-Z][^<>]*)?|'
    '![^<>]*)?'
 )
 entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
 charref = re.compile('&#([0-9]+)[^0-9]')
 starttagopen = re.compile('<[>a-zA-Z]')
 shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
 shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
 piclose = re.compile('>')
 endbracket = re.compile('[<>]')
 tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
 attrfind = re.compile(
    r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
    r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?'
 )
 class SGMLParseError(RuntimeError):
    """Exception raised for all parse errors."""
    pass
 # SGML parser base class -- find tags and call handler functions.
 # Usage: p = SGMLParser(); p.feed(data); ...; p.close().
 # The dtd is defined by deriving a class which defines methods
 # with special names to handle tags: start_foo and end_foo to handle
 # <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
 # (Tags are converted to lower case for this purpose.)  The data
 # between tags is passed to the parser by calling self.handle_data()
 # with some data as argument (the data may be split up in arbitrary
 # chunks).  Entity references are passed by calling
 # self.handle_entityref() with the entity reference as argument.
 class SGMLParser(_markupbase.ParserBase):
    # Definition of entities -- derived classes may override
    entity_or_charref = re.compile(
        '&(?:'
        '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
        ')(;?)'
    )
    def __init__(self, verbose=0):
        """Initialize and reset this instance."""
        self.verbose = verbose
        self.reset()
    def reset(self):
        """Reset this instance. Loses all unprocessed data."""
        self.__starttag_text = None
        self.rawdata = ''
        self.stack = []
        self.lasttag = '???'
        self.nomoretags = 0
        self.literal = 0
        _markupbase.ParserBase.reset(self)
    def setnomoretags(self):
        """Enter literal mode (CDATA) till EOF.
        Intended for derived classes only.
        """
        self.nomoretags = self.literal = 1
    def setliteral(self, *args):
        """Enter literal mode (CDATA).
        Intended for derived classes only.
        """
        self.literal = 1
    def feed(self, data):
        """Feed some data to the parser.
        Call this as often as you want, with as little or as much text
        as you want (may include '\n').  (This just saves the text,
        all the processing is done by goahead().)
        """
        self.rawdata = self.rawdata + data
        self.goahead(0)
    def close(self):
        """Handle the remaining data."""
        self.goahead(1)
    def error(self, message):
        raise SGMLParseError(message)
    # Internal -- handle data as far as reasonable.  May leave state
    # and data to be processed by a subsequent call.  If 'end' is
    # true, force handling all data as if followed by EOF marker.
    def goahead(self, end):
        rawdata = self.rawdata
        i = 0
        n = len(rawdata)
        while i < n:
            if self.nomoretags:
                self.handle_data(rawdata[i:n])
                i = n
                break
            match = interesting.search(rawdata, i)
            if match:
                j = match.start()
            else:
                j = n
            if i < j:
                self.handle_data(rawdata[i:j])
            i = j
            if i == n:
                break
            if rawdata[i] == '<':
                if starttagopen.match(rawdata, i):
                    if self.literal:
                        self.handle_data(rawdata[i])
                        i = i + 1
                        continue
                    k = self.parse_starttag(i)
                    if k < 0:
                        break
                    i = k
                    continue
                if rawdata.startswith("</", i):
                    k = self.parse_endtag(i)
                    if k < 0:
                        break
                    i = k
                    self.literal = 0
                    continue
                if self.literal:
                    if n > (i + 1):
                        self.handle_data("<")
                        i = i + 1
                    else:
                        # incomplete
                        break
                    continue
                if rawdata.startswith("<!--", i):
                    # Strictly speaking, a comment is --.*--
                    # within a declaration tag <!...>.
                    # This should be removed,
                    # and comments handled only in parse_declaration.
                    k = self.parse_comment(i)
                    if k < 0:
                        break
                    i = k
                    continue
                if rawdata.startswith("<?", i):
                    k = self.parse_pi(i)
                    if k < 0:
                        break
                    i = i + k
                    continue
                if rawdata.startswith("<!", i):
                    # This is some sort of declaration; in "HTML as
                    # deployed," this should only be the document type
                    # declaration ("<!DOCTYPE html...>").
                    k = self.parse_declaration(i)
                    if k < 0:
                        break
                    i = k
                    continue
            elif rawdata[i] == '&':
                if self.literal:
                    self.handle_data(rawdata[i])
                    i = i + 1
                    continue
                match = charref.match(rawdata, i)
                if match:
                    name = match.group(1)
                    self.handle_charref(name)
                    i = match.end(0)
                    if rawdata[i - 1] != ';':
                        i = i - 1
                    continue
                match = entityref.match(rawdata, i)
                if match:
                    name = match.group(1)
                    self.handle_entityref(name)
                    i = match.end(0)
                    if rawdata[i - 1] != ';':
                        i = i - 1
                    continue
            else:
                self.error('neither < nor & ??')
            # We get here only if incomplete matches but
            # nothing else
            match = incomplete.match(rawdata, i)
            if not match:
                self.handle_data(rawdata[i])
                i = i + 1
                continue
            j = match.end(0)
            if j == n:
                break  # Really incomplete
            self.handle_data(rawdata[i:j])
            i = j
        # end while
        if end and i < n:
            self.handle_data(rawdata[i:n])
            i = n
        self.rawdata = rawdata[i:]
        # XXX if end: check for empty stack
    # Extensions for the DOCTYPE scanner:
    _decl_otherchars = '='
    # Internal -- parse processing instr, return length or -1 if not terminated
    def parse_pi(self, i):
        rawdata = self.rawdata
        if rawdata[i:i + 2] != '<?':
            self.error('unexpected call to parse_pi()')
        match = piclose.search(rawdata, i + 2)
        if not match:
            return -1
        j = match.start(0)
        self.handle_pi(rawdata[i + 2:j])
        j = match.end(0)
        return j - i
    def get_starttag_text(self):
        return self.__starttag_text
    # Internal -- handle starttag, return length or -1 if not terminated
    def parse_starttag(self, i):
        self.__starttag_text = None
        start_pos = i
        rawdata = self.rawdata
        if shorttagopen.match(rawdata, i):
            # SGML shorthand: <tag/data/ == <tag>data</tag>
            # XXX Can data contain &... (entity or char refs)?
            # XXX Can data contain < or > (tag characters)?
            # XXX Can there be whitespace before the first /?
            match = shorttag.match(rawdata, i)
            if not match:
                return -1
            tag, data = match.group(1, 2)
            self.__starttag_text = '<%s/' % tag
            tag = tag.lower()
            k = match.end(0)
            self.finish_shorttag(tag, data)
            self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
            return k
        # XXX The following should skip matching quotes (' or ")
        # As a shortcut way to exit, this isn't so bad, but shouldn't
        # be used to locate the actual end of the start tag since the
        # < or > characters may be embedded in an attribute value.
        match = endbracket.search(rawdata, i + 1)
        if not match:
            return -1
        j = match.start(0)
        # Now parse the data between i+1 and j into a tag and attrs
        attrs = []
        if rawdata[i:i + 2] == '<>':
            # SGML shorthand: <> == <last open tag seen>
            k = j
            tag = self.lasttag
        else:
            match = tagfind.match(rawdata, i + 1)
            if not match:
                self.error('unexpected call to parse_starttag')
            k = match.end(0)
            tag = rawdata[i + 1:k].lower()
            self.lasttag = tag
        while k < j:
            match = attrfind.match(rawdata, k)
            if not match:
                break
            attrname, rest, attrvalue = match.group(1, 2, 3)
            if not rest:
                attrvalue = attrname
            else:
                if (
                    attrvalue[:1] == "'" == attrvalue[-1:] or
                    attrvalue[:1] == '"' == attrvalue[-1:]
                ):
                    # strip quotes
                    attrvalue = attrvalue[1:-1]
                attrvalue = self.entity_or_charref.sub(self._convert_ref, attrvalue)
            attrs.append((attrname.lower(), attrvalue))
            k = match.end(0)
        if rawdata[j] == '>':
            j = j + 1
        self.__starttag_text = rawdata[start_pos:j]
        self.finish_starttag(tag, attrs)
        return j
    # Internal -- convert entity or character reference
    def _convert_ref(self, match):
        if match.group(2):
            return self.convert_charref(match.group(2)) or \
                '&#%s%s' % match.groups()[1:]
        elif match.group(3):
            return self.convert_entityref(match.group(1)) or \
                '&%s;' % match.group(1)
        else:
            return '&%s' % match.group(1)
    # Internal -- parse endtag
    def parse_endtag(self, i):
        rawdata = self.rawdata
        match = endbracket.search(rawdata, i + 1)
        if not match:
            return -1
        j = match.start(0)
        tag = rawdata[i + 2:j].strip().lower()
        if rawdata[j] == '>':
            j = j + 1
        self.finish_endtag(tag)
        return j
    # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
    def finish_shorttag(self, tag, data):
        self.finish_starttag(tag, [])
        self.handle_data(data)
        self.finish_endtag(tag)
    # Internal -- finish processing of start tag
    # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
    def finish_starttag(self, tag, attrs):
        try:
            method = getattr(self, 'start_' + tag)
        except AttributeError:
            try:
                method = getattr(self, 'do_' + tag)
            except AttributeError:
                self.unknown_starttag(tag, attrs)
                return -1
            else:
                self.handle_starttag(tag, method, attrs)
                return 0
        else:
            self.stack.append(tag)
            self.handle_starttag(tag, method, attrs)
            return 1
    # Internal -- finish processing of end tag
    def finish_endtag(self, tag):
        if not tag:
            found = len(self.stack) - 1
            if found < 0:
                self.unknown_endtag(tag)
                return
        else:
            if tag not in self.stack:
                try:
                    method = getattr(self, 'end_' + tag)
                except AttributeError:
                    self.unknown_endtag(tag)
                else:
                    self.report_unbalanced(tag)
                return
            found = len(self.stack)
            for i in range(found):
                if self.stack[i] == tag:
                    found = i
        while len(self.stack) > found:
            tag = self.stack[-1]
            try:
                method = getattr(self, 'end_' + tag)
            except AttributeError:
                method = None
            if method:
                self.handle_endtag(tag, method)
            else:
                self.unknown_endtag(tag)
            del self.stack[-1]
    # Overridable -- handle start tag
    def handle_starttag(self, tag, method, attrs):
        method(attrs)
    # Overridable -- handle end tag
    def handle_endtag(self, tag, method):
        method()
    # Example -- report an unbalanced </...> tag.
    def report_unbalanced(self, tag):
        if self.verbose:
            print('*** Unbalanced </' + tag + '>')
            print('*** Stack:', self.stack)
    def convert_charref(self, name):
        """Convert character reference, may be overridden."""
        try:
            n = int(name)
        except ValueError:
            return
        if not 0 <= n <= 127:
            return
        return self.convert_codepoint(n)
    def convert_codepoint(self, codepoint):
        return chr(codepoint)
    def handle_charref(self, name):
        """Handle character reference, no need to override."""
        replacement = self.convert_charref(name)
        if replacement is None:
            self.unknown_charref(name)
        else:
            self.handle_data(replacement)
    # Definition of entities -- derived classes may override
    entitydefs = \
            {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
    def convert_entityref(self, name):
        """Convert entity references.
        As an alternative to overriding this method; one can tailor the
        results by setting up the self.entitydefs mapping appropriately.
        """
        table = self.entitydefs
        if name in table:
            return table[name]
        else:
            return
    def handle_entityref(self, name):
        """Handle entity references, no need to override."""
        replacement = self.convert_entityref(name)
        if replacement is None:
            self.unknown_entityref(name)
        else:
            self.handle_data(replacement)
    # Example -- handle data, should be overridden
    def handle_data(self, data):
        pass
    # Example -- handle comment, could be overridden
    def handle_comment(self, data):
        pass
    # Example -- handle declaration, could be overridden
    def handle_decl(self, decl):
        pass
    # Example -- handle processing instruction, could be overridden
    def handle_pi(self, data):
        pass
    # To be overridden -- handlers for unknown objects
    def unknown_starttag(self, tag, attrs):
        pass
    def unknown_endtag(self, tag):
        pass
    def unknown_charref(self, ref):
        pass
    def unknown_entityref(self, ref):
        pass
 class TestSGMLParser(SGMLParser):
    def __init__(self, verbose=0):
        self.testdata = ""
        SGMLParser.__init__(self, verbose)
    def handle_data(self, data):
        self.testdata = self.testdata + data
        if len(repr(self.testdata)) >= 70:
            self.flush()
    def flush(self):
        data = self.testdata
        if data:
            self.testdata = ""
            print('data:', repr(data))
    def handle_comment(self, data):
        self.flush()
        r = repr(data)
        if len(r) > 68:
            r = r[:32] + '...' + r[-32:]
        print('comment:', r)
    def unknown_starttag(self, tag, attrs):
        self.flush()
        if not attrs:
            print('start tag: <' + tag + '>')
        else:
            print('start tag: <' + tag, end=' ')
            for name, value in attrs:
                print(name + '=' + '"' + value + '"', end=' ')
            print('>')
    def unknown_endtag(self, tag):
        self.flush()
        print('end tag: </' + tag + '>')
    def unknown_entityref(self, ref):
        self.flush()
        print('*** unknown entity ref: &' + ref + ';')
    def unknown_charref(self, ref):
        self.flush()
        print('*** unknown char ref: &#' + ref + ';')
    def unknown_decl(self, data):
        self.flush()
        print('*** unknown decl: [' + data + ']')
    def close(self):
        SGMLParser.close(self)
        self.flush()
 def test(args=None):
    import sys
    if args is None:
        args = sys.argv[1:]
    if args and args[0] == '-s':
        args = args[1:]
        klass = SGMLParser
    else:
        klass = TestSGMLParser
    if args:
        file = args[0]
    else:
        file = 'test.html'
    if file == '-':
        f = sys.stdin
    else:
        try:
            f = open(file, 'r')
        except IOError as msg:
            print(file, ":", msg)
            sys.exit(1)
    data = f.read()
    if f is not sys.stdin:
        f.close()
    x = klass()
    for c in data:
        x.feed(c)
    x.close()
 if __name__ == '__main__':
    test()