From da8092c58aab78ec9b58c601ad7cb2830cb48d31 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Sun, 23 Feb 2020 01:34:34 -0500
Subject: [PATCH 1/9] do not use a side effect of allocating list(map(...)) to
 execute functions for each member of a list

---
 src/calibre/ebooks/docx/to_html.py           |  3 ++-
 src/calibre/ebooks/mobi/writer2/indexer.py   | 10 ++++++----
 src/calibre/ebooks/oeb/parse_utils.py        |  3 ++-
 src/calibre/ebooks/oeb/polish/check/css.py   |  3 ++-
 src/calibre/ebooks/oeb/polish/check/links.py |  3 ++-
 src/calibre/ebooks/oeb/polish/container.py   |  3 ++-
 src/calibre/ebooks/oeb/polish/replace.py     |  3 ++-
 src/calibre/ebooks/pdf/html_writer.py        |  3 ++-
 src/calibre/gui2/icon_theme.py               |  3 ++-
 src/calibre/gui2/preferences/server.py       |  2 +-
 src/calibre/gui2/tag_browser/ui.py           |  4 ++--
 src/calibre/gui2/tweak_book/boss.py          |  3 ++-
 src/calibre/gui2/tweak_book/file_list.py     |  3 ++-
 src/calibre/srv/http_response.py             |  3 ++-
 src/calibre/srv/render_book.py               |  3 ++-
 src/calibre/utils/fonts/sfnt/subset.py       |  3 ++-
 16 files changed, 35 insertions(+), 20 deletions(-)
diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py
index a10a6ce181..d284bae8c9 100644
--- a/src/calibre/ebooks/docx/to_html.py
+++ b/src/calibre/ebooks/docx/to_html.py
@@ -182,7 +182,8 @@ class Convert:
                         indent = float(style.text_indent[:-2]) + indent
                     style.text_indent = '%.3gpt' % indent
                     parent.text = tabs[-1].tail or ''
-                    list(map(parent.remove, tabs))
+                    for i in tabs:
+                        parent.remove(i)
 
         self.images.rid_map = orig_rid_map
 
diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py
index 0fc15b1161..40b7502ac5 100644
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@@ -65,8 +65,8 @@ class TAGX:  # {{{
         '''
         TAGX block for the Primary index header of a periodical
         '''
-        list(map(self.add_tag, (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,
-            73, 0)))
+        for i in (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,73, 0):
+            self.add_tag(i)
         return self.header(2) + bytes(self.byts)
 
     @property
@@ -74,7 +74,8 @@ class TAGX:  # {{{
         '''
         TAGX block for the secondary index header of a periodical
         '''
-        list(map(self.add_tag, (11, 0)))
+        for i in (11, 0):
+            self.add_tag(i)
         return self.header(1) + bytes(self.byts)
 
     @property
@@ -82,7 +83,8 @@ class TAGX:  # {{{
         '''
         TAGX block for the primary index header of a flat book
         '''
-        list(map(self.add_tag, (1, 2, 3, 4, 0)))
+        for i in (1, 2, 3, 4, 0):
+            self.add_tag(i)
         return self.header(1) + bytes(self.byts)
 
 
diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py
index f4557dbccb..19707b982b 100644
--- a/src/calibre/ebooks/oeb/parse_utils.py
+++ b/src/calibre/ebooks/oeb/parse_utils.py
@@ -65,7 +65,8 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
     for b in bodies:
         for x in b:
             body.append(x)
-    tuple(map(root.append, (head, body)))
+    for x in (head, body):
+        root.append(x)
     if log is not None:
         log.warn('Merging multiple <head> and <body> sections')
     return root
diff --git a/src/calibre/ebooks/oeb/polish/check/css.py b/src/calibre/ebooks/oeb/polish/check/css.py
index 1332f7e31d..5e4987ae3e 100644
--- a/src/calibre/ebooks/oeb/polish/check/css.py
+++ b/src/calibre/ebooks/oeb/polish/check/css.py
@@ -221,7 +221,8 @@ class Pool:
             if not sip.isdeleted(x):
                 sip.delete(x)
 
-        tuple(map(safe_delete, self.workers))
+        for i in self.workers:
+            safe_delete(i)
         self.workers = []
 
 
diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py
index 5f118394b6..25041af698 100644
--- a/src/calibre/ebooks/oeb/polish/check/links.py
+++ b/src/calibre/ebooks/oeb/polish/check/links.py
@@ -410,7 +410,8 @@ def check_external_links(container, progress_callback=(lambda num, total:None),
         return []
     items = Queue()
     ans = []
-    tuple(map(items.put, iteritems(external_links)))
+    for el in iteritems(external_links):
+        items.put(el)
     progress_callback(0, len(external_links))
     done = []
     downloaded_html_ids = {}
diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py
index 13b00d0be6..66e95d75cd 100644
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@@ -825,7 +825,8 @@ class Container(ContainerBase):  # {{{
         imap = {name:item_id for item_id, name in iteritems(imap)}
         items = [item for item, name, linear in self.spine_iter]
         tail, last_tail = (items[0].tail, items[-1].tail) if items else ('\n    ', '\n  ')
-        tuple(map(self.remove_from_xml, items))
+        for i in items:
+            self.remove_from_xml(i)
         spine = self.opf_xpath('//opf:spine')[0]
         spine.text = tail
         for name, linear in spine_items:
diff --git a/src/calibre/ebooks/oeb/polish/replace.py b/src/calibre/ebooks/oeb/polish/replace.py
index b16876597a..6815554922 100644
--- a/src/calibre/ebooks/oeb/polish/replace.py
+++ b/src/calibre/ebooks/oeb/polish/replace.py
@@ -384,7 +384,8 @@ def remove_links_to(container, predicate):
             removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
         if removed:
             changed.add(name)
-    tuple(map(container.dirty, changed))
+    for i in changed:
+        container.dirty(i)
     return changed
 
 
diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py
index 6ab9d5a0a2..89f27b4fed 100644
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@@ -456,7 +456,8 @@ def add_anchors_markup(root, uuid, anchors):
             a.tail = '\n'
         div.append(a)
     a.count = 0
-    tuple(map(a, anchors))
+    for anchor in anchors:
+        a(anchor)
     a(uuid)
 
 
diff --git a/src/calibre/gui2/icon_theme.py b/src/calibre/gui2/icon_theme.py
index 1f51a1a9ab..6db1d887ed 100644
--- a/src/calibre/gui2/icon_theme.py
+++ b/src/calibre/gui2/icon_theme.py
@@ -490,7 +490,8 @@ def get_cover(metadata):
 
 def get_covers(themes, dialog, num_of_workers=8):
     items = Queue()
-    tuple(map(items.put, themes))
+    for i in themes:
+        items.put(i)
 
     def callback(metadata, x):
         if not sip.isdeleted(dialog) and not dialog.dialog_closed:
diff --git a/src/calibre/gui2/preferences/server.py b/src/calibre/gui2/preferences/server.py
index 80d1684bc0..f8658ba1c4 100644
--- a/src/calibre/gui2/preferences/server.py
+++ b/src/calibre/gui2/preferences/server.py
@@ -217,7 +217,7 @@ class Choices(QComboBox):
         self.setEditable(False)
         opt = options[name]
         self.choices = opt.choices
-        tuple(map(self.addItem, opt.choices))
+        self.addItems(opt.choices)
         self.currentIndexChanged.connect(self.changed_signal.emit)
         init_opt(self, opt, layout)
 
diff --git a/src/calibre/gui2/tag_browser/ui.py b/src/calibre/gui2/tag_browser/ui.py
index 15dc5e408d..cf0f20d603 100644
--- a/src/calibre/gui2/tag_browser/ui.py
+++ b/src/calibre/gui2/tag_browser/ui.py
@@ -600,8 +600,8 @@ class TagBrowserBar(QWidget):  # {{{
         find_shown = self.toggle_search_button.isChecked()
         self.toggle_search_button.setVisible(not find_shown)
         l = self.layout()
-        items = [l.itemAt(i) for i in range(l.count())]
-        tuple(map(l.removeItem, items))
+        for i in (l.itemAt(i) for i in range(l.count())):
+            l.removeItem(i)
         if find_shown:
             l.addWidget(self.alter_tb)
             self.alter_tb.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonIconOnly)
diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py
index fa007539d6..573a7a486c 100644
--- a/src/calibre/gui2/tweak_book/boss.py
+++ b/src/calibre/gui2/tweak_book/boss.py
@@ -391,7 +391,8 @@ class Boss(QObject):
             if ef:
                 if isinstance(ef, str):
                     ef = [ef]
-                tuple(map(self.gui.file_list.request_edit, ef))
+                for i in ef:
+                    self.gui.file_list.request_edit(i)
             else:
                 if tprefs['restore_book_state']:
                     self.restore_book_edit_state()
diff --git a/src/calibre/gui2/tweak_book/file_list.py b/src/calibre/gui2/tweak_book/file_list.py
index 6e5f2d0718..930d1bd829 100644
--- a/src/calibre/gui2/tweak_book/file_list.py
+++ b/src/calibre/gui2/tweak_book/file_list.py
@@ -1129,7 +1129,8 @@ class MergeDialog(QDialog):  # {{{
 
         buttons = self.buttons = [QRadioButton(n) for n in names]
         buttons[0].setChecked(True)
-        tuple(map(w.l.addWidget, buttons))
+        for i in buttons:
+            w.l.addWidget(i)
         sa.setWidget(w)
 
         self.resize(self.sizeHint() + QSize(150, 20))
diff --git a/src/calibre/srv/http_response.py b/src/calibre/srv/http_response.py
index a20b2fc6d0..9053e9ae4d 100644
--- a/src/calibre/srv/http_response.py
+++ b/src/calibre/srv/http_response.py
@@ -254,7 +254,8 @@ class RequestData:  # {{{
 
     def filesystem_file_with_custom_etag(self, output, *etag_parts):
         etag = hashlib.sha1()
-        tuple(map(lambda x:etag.update(str(x).encode('utf-8')), etag_parts))
+        for i in etag_parts:
+            etag.update(str(i).encode('utf-8'))
         return ETaggedFile(output, etag.hexdigest())
 
     def filesystem_file_with_constant_etag(self, output, etag_as_hexencoded_string):
diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py
index b9fb136466..ac80092344 100644
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@@ -194,7 +194,8 @@ def toc_anchor_map(toc):
         if name and node['id'] not in seen_map[name]:
             ans[name].append({'id':node['id'], 'frag':node['frag']})
             seen_map[name].add(node['id'])
-        tuple(map(process_node, node['children']))
+        for i in node['children']:
+            process_node(i)
 
     process_node(toc)
     return dict(ans)
diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py
index 9a037cbc23..7bf8da962c 100644
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@@ -272,7 +272,8 @@ def main(args):
                 raise SystemExit(1)
             if opts.codes:
                 parts = tuple(map(conv_code, parts))
-            tuple(map(not_single, parts))
+            for i in parts:
+                not_single(i)
             ranges.add(tuple(parts))
         else:
             if opts.codes:

From d62d8b911115f239d0a02ff73631d32a22bda544 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Mon, 8 Nov 2021 16:35:24 -0500
Subject: [PATCH 2/9] remove function attribute that was never used

---
 src/calibre/ebooks/pdf/html_writer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py
index 89f27b4fed..a1c6901466 100644
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@@ -455,7 +455,6 @@ def add_anchors_markup(root, uuid, anchors):
             # rescale the viewport
             a.tail = '\n'
         div.append(a)
-    a.count = 0
     for anchor in anchors:
         a(anchor)
     a(uuid)

From 4be01e7000793892e8bcb3de1db1eda167c514b0 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Sun, 23 Feb 2020 01:40:02 -0500
Subject: [PATCH 3/9] use tuple up front instead of list which must later be
 converted to tuple

We don't need a mutable type, anyway, and the only other use of it is
recreating the list as = tuple(map(...)) so why convert *again* to a
tuple?
---
 src/calibre/utils/fonts/sfnt/subset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py
index 7bf8da962c..46f13d2e45 100644
--- a/src/calibre/utils/fonts/sfnt/subset.py
+++ b/src/calibre/utils/fonts/sfnt/subset.py
@@ -266,7 +266,7 @@ def main(args):
 
     for c in chars:
         if '-' in c:
-            parts = [x.strip() for x in c.split('-')]
+            parts = tuple(x.strip() for x in c.split('-'))
             if len(parts) != 2:
                 prints('Invalid range:', c, file=sys.stderr)
                 raise SystemExit(1)
@@ -274,7 +274,7 @@ def main(args):
                 parts = tuple(map(conv_code, parts))
             for i in parts:
                 not_single(i)
-            ranges.add(tuple(parts))
+            ranges.add(parts)
         else:
             if opts.codes:
                 c = conv_code(c)

From 2302cb6a96853c734293cf834bf7dad9725ab3c3 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Sun, 30 Aug 2020 19:52:35 -0400
Subject: [PATCH 4/9] De-vendor sgmllib

The upstream feedparser 6.0 beta introduces a hard dependency on
PyPI sgmllib3k, while 5.2.1 (released in 2015) doesn't properly support
python3 except via 2to3 and documentation about manually copying over
sgmllib.py

Let's just rely on sgmllib3k.
---
 bypy/sources.json |  10 +
 src/sgmllib.py    | 574 ----------------------------------------------
 2 files changed, 10 insertions(+), 574 deletions(-)
 delete mode 100644 src/sgmllib.py

diff --git a/bypy/sources.json b/bypy/sources.json
index 3e3285328d..f183f68556 100644
--- a/bypy/sources.json
+++ b/bypy/sources.json
@@ -632,6 +632,16 @@
         }
     },
 
+    {
+        "name": "sgmllib3k",
+        "python": 3,
+        "unix": {
+            "filename": "sgmllib3k-1.0.0.tar.gz",
+            "hash": "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9",
+            "urls": ["pypi"]
+        }
+    },
+
     {
         "name": "markdown",
         "unix": {
diff --git a/src/sgmllib.py b/src/sgmllib.py
deleted file mode 100644
index 6422b7603c..0000000000
--- a/src/sgmllib.py
+++ /dev/null
@@ -1,574 +0,0 @@
-"""A parser for SGML, using the derived class as a static DTD."""
-# Needed for feedparser under python 3 where this module has been removed
-
-# XXX This only supports those SGML features used by HTML.
-
-# XXX There should be a way to distinguish between PCDATA (parsed
-# character data -- the normal case), RCDATA (replaceable character
-# data -- only char and entity references and end tags are special)
-# and CDATA (character data -- only end tags are special).  RCDATA is
-# not supported at all.
-
-import _markupbase
-import re
-
-__all__ = ["SGMLParser", "SGMLParseError"]
-
-# Regular expressions used for parsing
-
-interesting = re.compile('[&<]')
-incomplete = re.compile(
-    '&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
-    '<([a-zA-Z][^<>]*|'
-    '/([a-zA-Z][^<>]*)?|'
-    '![^<>]*)?'
-)
-
-entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
-charref = re.compile('&#([0-9]+)[^0-9]')
-
-starttagopen = re.compile('<[>a-zA-Z]')
-shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
-shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
-piclose = re.compile('>')
-endbracket = re.compile('[<>]')
-tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
-attrfind = re.compile(
-    r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
-    r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?'
-)
-
-
-class SGMLParseError(RuntimeError):
-    """Exception raised for all parse errors."""
-    pass
-
-
-# SGML parser base class -- find tags and call handler functions.
-# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
-# The dtd is defined by deriving a class which defines methods
-# with special names to handle tags: start_foo and end_foo to handle
-# <foo> and </foo>, respectively, or do_foo to handle <foo> by itself.
-# (Tags are converted to lower case for this purpose.)  The data
-# between tags is passed to the parser by calling self.handle_data()
-# with some data as argument (the data may be split up in arbitrary
-# chunks).  Entity references are passed by calling
-# self.handle_entityref() with the entity reference as argument.
-
-
-class SGMLParser(_markupbase.ParserBase):
-    # Definition of entities -- derived classes may override
-    entity_or_charref = re.compile(
-        '&(?:'
-        '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
-        ')(;?)'
-    )
-
-    def __init__(self, verbose=0):
-        """Initialize and reset this instance."""
-        self.verbose = verbose
-        self.reset()
-
-    def reset(self):
-        """Reset this instance. Loses all unprocessed data."""
-        self.__starttag_text = None
-        self.rawdata = ''
-        self.stack = []
-        self.lasttag = '???'
-        self.nomoretags = 0
-        self.literal = 0
-        _markupbase.ParserBase.reset(self)
-
-    def setnomoretags(self):
-        """Enter literal mode (CDATA) till EOF.
-
-        Intended for derived classes only.
-        """
-        self.nomoretags = self.literal = 1
-
-    def setliteral(self, *args):
-        """Enter literal mode (CDATA).
-
-        Intended for derived classes only.
-        """
-        self.literal = 1
-
-    def feed(self, data):
-        """Feed some data to the parser.
-
-        Call this as often as you want, with as little or as much text
-        as you want (may include '\n').  (This just saves the text,
-        all the processing is done by goahead().)
-        """
-
-        self.rawdata = self.rawdata + data
-        self.goahead(0)
-
-    def close(self):
-        """Handle the remaining data."""
-        self.goahead(1)
-
-    def error(self, message):
-        raise SGMLParseError(message)
-
-    # Internal -- handle data as far as reasonable.  May leave state
-    # and data to be processed by a subsequent call.  If 'end' is
-    # true, force handling all data as if followed by EOF marker.
-    def goahead(self, end):
-        rawdata = self.rawdata
-        i = 0
-        n = len(rawdata)
-        while i < n:
-            if self.nomoretags:
-                self.handle_data(rawdata[i:n])
-                i = n
-                break
-            match = interesting.search(rawdata, i)
-            if match:
-                j = match.start()
-            else:
-                j = n
-            if i < j:
-                self.handle_data(rawdata[i:j])
-            i = j
-            if i == n:
-                break
-            if rawdata[i] == '<':
-                if starttagopen.match(rawdata, i):
-                    if self.literal:
-                        self.handle_data(rawdata[i])
-                        i = i + 1
-                        continue
-                    k = self.parse_starttag(i)
-                    if k < 0:
-                        break
-                    i = k
-                    continue
-                if rawdata.startswith("</", i):
-                    k = self.parse_endtag(i)
-                    if k < 0:
-                        break
-                    i = k
-                    self.literal = 0
-                    continue
-                if self.literal:
-                    if n > (i + 1):
-                        self.handle_data("<")
-                        i = i + 1
-                    else:
-                        # incomplete
-                        break
-                    continue
-                if rawdata.startswith("<!--", i):
-                    # Strictly speaking, a comment is --.*--
-                    # within a declaration tag <!...>.
-                    # This should be removed,
-                    # and comments handled only in parse_declaration.
-                    k = self.parse_comment(i)
-                    if k < 0:
-                        break
-                    i = k
-                    continue
-                if rawdata.startswith("<?", i):
-                    k = self.parse_pi(i)
-                    if k < 0:
-                        break
-                    i = i + k
-                    continue
-                if rawdata.startswith("<!", i):
-                    # This is some sort of declaration; in "HTML as
-                    # deployed," this should only be the document type
-                    # declaration ("<!DOCTYPE html...>").
-                    k = self.parse_declaration(i)
-                    if k < 0:
-                        break
-                    i = k
-                    continue
-            elif rawdata[i] == '&':
-                if self.literal:
-                    self.handle_data(rawdata[i])
-                    i = i + 1
-                    continue
-                match = charref.match(rawdata, i)
-                if match:
-                    name = match.group(1)
-                    self.handle_charref(name)
-                    i = match.end(0)
-                    if rawdata[i - 1] != ';':
-                        i = i - 1
-                    continue
-                match = entityref.match(rawdata, i)
-                if match:
-                    name = match.group(1)
-                    self.handle_entityref(name)
-                    i = match.end(0)
-                    if rawdata[i - 1] != ';':
-                        i = i - 1
-                    continue
-            else:
-                self.error('neither < nor & ??')
-            # We get here only if incomplete matches but
-            # nothing else
-            match = incomplete.match(rawdata, i)
-            if not match:
-                self.handle_data(rawdata[i])
-                i = i + 1
-                continue
-            j = match.end(0)
-            if j == n:
-                break  # Really incomplete
-            self.handle_data(rawdata[i:j])
-            i = j
-        # end while
-        if end and i < n:
-            self.handle_data(rawdata[i:n])
-            i = n
-        self.rawdata = rawdata[i:]
-        # XXX if end: check for empty stack
-
-    # Extensions for the DOCTYPE scanner:
-    _decl_otherchars = '='
-
-    # Internal -- parse processing instr, return length or -1 if not terminated
-    def parse_pi(self, i):
-        rawdata = self.rawdata
-        if rawdata[i:i + 2] != '<?':
-            self.error('unexpected call to parse_pi()')
-        match = piclose.search(rawdata, i + 2)
-        if not match:
-            return -1
-        j = match.start(0)
-        self.handle_pi(rawdata[i + 2:j])
-        j = match.end(0)
-        return j - i
-
-    def get_starttag_text(self):
-        return self.__starttag_text
-
-    # Internal -- handle starttag, return length or -1 if not terminated
-    def parse_starttag(self, i):
-        self.__starttag_text = None
-        start_pos = i
-        rawdata = self.rawdata
-        if shorttagopen.match(rawdata, i):
-            # SGML shorthand: <tag/data/ == <tag>data</tag>
-            # XXX Can data contain &... (entity or char refs)?
-            # XXX Can data contain < or > (tag characters)?
-            # XXX Can there be whitespace before the first /?
-            match = shorttag.match(rawdata, i)
-            if not match:
-                return -1
-            tag, data = match.group(1, 2)
-            self.__starttag_text = '<%s/' % tag
-            tag = tag.lower()
-            k = match.end(0)
-            self.finish_shorttag(tag, data)
-            self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
-            return k
-        # XXX The following should skip matching quotes (' or ")
-        # As a shortcut way to exit, this isn't so bad, but shouldn't
-        # be used to locate the actual end of the start tag since the
-        # < or > characters may be embedded in an attribute value.
-        match = endbracket.search(rawdata, i + 1)
-        if not match:
-            return -1
-        j = match.start(0)
-        # Now parse the data between i+1 and j into a tag and attrs
-        attrs = []
-        if rawdata[i:i + 2] == '<>':
-            # SGML shorthand: <> == <last open tag seen>
-            k = j
-            tag = self.lasttag
-        else:
-            match = tagfind.match(rawdata, i + 1)
-            if not match:
-                self.error('unexpected call to parse_starttag')
-            k = match.end(0)
-            tag = rawdata[i + 1:k].lower()
-            self.lasttag = tag
-        while k < j:
-            match = attrfind.match(rawdata, k)
-            if not match:
-                break
-            attrname, rest, attrvalue = match.group(1, 2, 3)
-            if not rest:
-                attrvalue = attrname
-            else:
-                if (
-                    attrvalue[:1] == "'" == attrvalue[-1:] or
-                    attrvalue[:1] == '"' == attrvalue[-1:]
-                ):
-                    # strip quotes
-                    attrvalue = attrvalue[1:-1]
-                attrvalue = self.entity_or_charref.sub(self._convert_ref, attrvalue)
-            attrs.append((attrname.lower(), attrvalue))
-            k = match.end(0)
-        if rawdata[j] == '>':
-            j = j + 1
-        self.__starttag_text = rawdata[start_pos:j]
-        self.finish_starttag(tag, attrs)
-        return j
-
-    # Internal -- convert entity or character reference
-    def _convert_ref(self, match):
-        if match.group(2):
-            return self.convert_charref(match.group(2)) or \
-                '&#%s%s' % match.groups()[1:]
-        elif match.group(3):
-            return self.convert_entityref(match.group(1)) or \
-                '&%s;' % match.group(1)
-        else:
-            return '&%s' % match.group(1)
-
-    # Internal -- parse endtag
-    def parse_endtag(self, i):
-        rawdata = self.rawdata
-        match = endbracket.search(rawdata, i + 1)
-        if not match:
-            return -1
-        j = match.start(0)
-        tag = rawdata[i + 2:j].strip().lower()
-        if rawdata[j] == '>':
-            j = j + 1
-        self.finish_endtag(tag)
-        return j
-
-    # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
-    def finish_shorttag(self, tag, data):
-        self.finish_starttag(tag, [])
-        self.handle_data(data)
-        self.finish_endtag(tag)
-
-    # Internal -- finish processing of start tag
-    # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag
-    def finish_starttag(self, tag, attrs):
-        try:
-            method = getattr(self, 'start_' + tag)
-        except AttributeError:
-            try:
-                method = getattr(self, 'do_' + tag)
-            except AttributeError:
-                self.unknown_starttag(tag, attrs)
-                return -1
-            else:
-                self.handle_starttag(tag, method, attrs)
-                return 0
-        else:
-            self.stack.append(tag)
-            self.handle_starttag(tag, method, attrs)
-            return 1
-
-    # Internal -- finish processing of end tag
-    def finish_endtag(self, tag):
-        if not tag:
-            found = len(self.stack) - 1
-            if found < 0:
-                self.unknown_endtag(tag)
-                return
-        else:
-            if tag not in self.stack:
-                try:
-                    method = getattr(self, 'end_' + tag)
-                except AttributeError:
-                    self.unknown_endtag(tag)
-                else:
-                    self.report_unbalanced(tag)
-                return
-            found = len(self.stack)
-            for i in range(found):
-                if self.stack[i] == tag:
-                    found = i
-        while len(self.stack) > found:
-            tag = self.stack[-1]
-            try:
-                method = getattr(self, 'end_' + tag)
-            except AttributeError:
-                method = None
-            if method:
-                self.handle_endtag(tag, method)
-            else:
-                self.unknown_endtag(tag)
-            del self.stack[-1]
-
-    # Overridable -- handle start tag
-    def handle_starttag(self, tag, method, attrs):
-        method(attrs)
-
-    # Overridable -- handle end tag
-    def handle_endtag(self, tag, method):
-        method()
-
-    # Example -- report an unbalanced </...> tag.
-    def report_unbalanced(self, tag):
-        if self.verbose:
-            print('*** Unbalanced </' + tag + '>')
-            print('*** Stack:', self.stack)
-
-    def convert_charref(self, name):
-        """Convert character reference, may be overridden."""
-        try:
-            n = int(name)
-        except ValueError:
-            return
-        if not 0 <= n <= 127:
-            return
-        return self.convert_codepoint(n)
-
-    def convert_codepoint(self, codepoint):
-        return chr(codepoint)
-
-    def handle_charref(self, name):
-        """Handle character reference, no need to override."""
-        replacement = self.convert_charref(name)
-        if replacement is None:
-            self.unknown_charref(name)
-        else:
-            self.handle_data(replacement)
-
-    # Definition of entities -- derived classes may override
-    entitydefs = \
-            {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
-
-    def convert_entityref(self, name):
-        """Convert entity references.
-
-        As an alternative to overriding this method; one can tailor the
-        results by setting up the self.entitydefs mapping appropriately.
-        """
-        table = self.entitydefs
-        if name in table:
-            return table[name]
-        else:
-            return
-
-    def handle_entityref(self, name):
-        """Handle entity references, no need to override."""
-        replacement = self.convert_entityref(name)
-        if replacement is None:
-            self.unknown_entityref(name)
-        else:
-            self.handle_data(replacement)
-
-    # Example -- handle data, should be overridden
-    def handle_data(self, data):
-        pass
-
-    # Example -- handle comment, could be overridden
-    def handle_comment(self, data):
-        pass
-
-    # Example -- handle declaration, could be overridden
-    def handle_decl(self, decl):
-        pass
-
-    # Example -- handle processing instruction, could be overridden
-    def handle_pi(self, data):
-        pass
-
-    # To be overridden -- handlers for unknown objects
-    def unknown_starttag(self, tag, attrs):
-        pass
-
-    def unknown_endtag(self, tag):
-        pass
-
-    def unknown_charref(self, ref):
-        pass
-
-    def unknown_entityref(self, ref):
-        pass
-
-
-class TestSGMLParser(SGMLParser):
-
-    def __init__(self, verbose=0):
-        self.testdata = ""
-        SGMLParser.__init__(self, verbose)
-
-    def handle_data(self, data):
-        self.testdata = self.testdata + data
-        if len(repr(self.testdata)) >= 70:
-            self.flush()
-
-    def flush(self):
-        data = self.testdata
-        if data:
-            self.testdata = ""
-            print('data:', repr(data))
-
-    def handle_comment(self, data):
-        self.flush()
-        r = repr(data)
-        if len(r) > 68:
-            r = r[:32] + '...' + r[-32:]
-        print('comment:', r)
-
-    def unknown_starttag(self, tag, attrs):
-        self.flush()
-        if not attrs:
-            print('start tag: <' + tag + '>')
-        else:
-            print('start tag: <' + tag, end=' ')
-            for name, value in attrs:
-                print(name + '=' + '"' + value + '"', end=' ')
-            print('>')
-
-    def unknown_endtag(self, tag):
-        self.flush()
-        print('end tag: </' + tag + '>')
-
-    def unknown_entityref(self, ref):
-        self.flush()
-        print('*** unknown entity ref: &' + ref + ';')
-
-    def unknown_charref(self, ref):
-        self.flush()
-        print('*** unknown char ref: &#' + ref + ';')
-
-    def unknown_decl(self, data):
-        self.flush()
-        print('*** unknown decl: [' + data + ']')
-
-    def close(self):
-        SGMLParser.close(self)
-        self.flush()
-
-
-def test(args=None):
-    import sys
-
-    if args is None:
-        args = sys.argv[1:]
-
-    if args and args[0] == '-s':
-        args = args[1:]
-        klass = SGMLParser
-    else:
-        klass = TestSGMLParser
-
-    if args:
-        file = args[0]
-    else:
-        file = 'test.html'
-
-    if file == '-':
-        f = sys.stdin
-    else:
-        try:
-            f = open(file, 'r')
-        except IOError as msg:
-            print(file, ":", msg)
-            sys.exit(1)
-
-    data = f.read()
-    if f is not sys.stdin:
-        f.close()
-
-    x = klass()
-    for c in data:
-        x.feed(c)
-    x.close()
-
-
-if __name__ == '__main__':
-    test()

From 14aa61d2468ba6b630033f652e5e24e39991018b Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Mon, 7 Sep 2020 21:35:42 -0400
Subject: [PATCH 5/9] Do not print error messages during plugin initialization
 to stdout.

This non-fatal warning emits output during calibredb commands and breaks
scripting.
---
 src/calibre/customize/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py
index cd1183af9b..7fe802dc0a 100644
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@@ -750,7 +750,7 @@ def initialize_plugins(perf=False):
                 times[plugin.name] = time.time() - st
             _initialized_plugins.append(plugin)
         except:
-            print('Failed to initialize plugin:', repr(zfp))
+            print('Failed to initialize plugin:', repr(zfp), file=sys.stderr)
             if DEBUG:
                 traceback.print_exc()
     # Prevent a custom plugin from overriding stdout/stderr as this breaks

From c2f76f29326700bfb251bb00203794096743a9ce Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Thu, 29 Oct 2020 23:18:01 -0400
Subject: [PATCH 6/9] viewer: error out if print book is used when no book is
 open

This is slightly nicer than erroring out, but with a traceback due to
trying to access a nonexistent pathtoebook attribute.
---
 src/calibre/gui2/viewer/ui.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/calibre/gui2/viewer/ui.py b/src/calibre/gui2/viewer/ui.py
index c74b55ac80..d1c8e8f701 100644
--- a/src/calibre/gui2/viewer/ui.py
+++ b/src/calibre/gui2/viewer/ui.py
@@ -428,6 +428,10 @@ class EbookViewer(MainWindow):
         error_dialog(self, title, msg, det_msg=details or None, show=True)
 
     def print_book(self):
+        if not hasattr(set_book_path, 'pathtoebook'):
+            error_dialog(self, _('No ebook selected'), _(
+                "Cannot print a book if no book is open yet."), show=True)
+            return
         from .printing import print_book
         print_book(set_book_path.pathtoebook, book_title=self.current_book_data['metadata']['title'], parent=self)
 

From 4761866fa301e6726333d08152d408df3e3f2e01 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Sun, 27 Jun 2021 17:50:03 -0400
Subject: [PATCH 7/9] tests: fix exclusion of FreeBSD code

calibre.devices.usbms.hal was added as a FreeBSD backend, but the test
exclusion only tries importing it on Linux. It does import on Linux, but
what we actually care about is that it imports on FreeBSD.
---
 setup/test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/setup/test.py b/setup/test.py
index 1022b73066..5225042e61 100644
--- a/setup/test.py
+++ b/setup/test.py
@@ -8,7 +8,7 @@ import subprocess
 import sys
 import unittest
 
-from setup import SRC, Command, islinux, ismacos, iswindows
+from setup import SRC, Command, isbsd, islinux, ismacos, iswindows
 
 TEST_MODULES = frozenset('srv db polish opf css docx cfi matcher icu smartypants build misc dbcli ebooks'.split())
 
@@ -50,8 +50,10 @@ class TestImports(unittest.TestCase):
             exclude_modules |= {
                     'calibre.linux',
                     'calibre.utils.linux_trash', 'calibre.utils.open_with.linux',
-                    'calibre.gui2.linux_file_dialogs', 'calibre.devices.usbms.hal',
+                    'calibre.gui2.linux_file_dialogs',
             }
+        if not isbsd:
+            exclude_modules.add('calibre.devices.usbms.hal')
         self.assertGreater(self.base_check(os.path.join(SRC, 'odf'), exclude_packages, exclude_modules), 10)
         base = os.path.join(SRC, 'calibre')
         self.assertGreater(self.base_check(base, exclude_packages, exclude_modules), 1000)

From 030c80a68c517aa36872fab47a5e4172df478f07 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Thu, 2 Dec 2021 10:29:32 -0500
Subject: [PATCH 8/9] fix the linux desktop filename mapping for ebook-edit

This was initially added in commit
efb83eb6fcd1b175e70281fc0776ca2e53422ad0 with a filler name that,
however, didn't match the usual style of desktop filenames. When the
actual desktop file was added in commit
a87092ba4f5e61aa51149c2c34880582613037f0 it didn't match the filename.

Although it was mass renamed from "tweak" to "edit" in commit
289ef5f0b9a4a87e6918744af64ff0d80f6a6c42, the word ordering was still
wrong.

As a result, application menus failed to correlate open windows to
pinned icons, or (on application menus that support it) group the
windows together.
---
 src/calibre/gui2/tweak_book/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/tweak_book/main.py b/src/calibre/gui2/tweak_book/main.py
index ac49543ce6..fce680fb9f 100644
--- a/src/calibre/gui2/tweak_book/main.py
+++ b/src/calibre/gui2/tweak_book/main.py
@@ -70,7 +70,7 @@ def _run(args, notify=None):
     parser = option_parser()
     opts, args = parser.parse_args(args)
     decouple('edit-book-'), set_gui_prefs(tprefs)
-    override = 'calibre-edit-book' if islinux else None
+    override = 'calibre-ebook-edit' if islinux else None
     app = Application(args, override_program_name=override, color_prefs=tprefs, windows_app_uid=EDITOR_APP_UID)
     app.file_event_hook = EventAccumulator()
     app.load_builtin_fonts()

From d5fa341d13340c725992cbc2859bfea5816b2089 Mon Sep 17 00:00:00 2001
From: Eli Schwartz <eschwartz@archlinux.org>
Date: Thu, 2 Dec 2021 10:41:34 -0500
Subject: [PATCH 9/9] fix the linux desktop filename mapping for lrfviewer

In commit e7e0aea6958b1419a5e72c85da0e0f920d36fb53, this was added with
the wrong name -- the "lrfviewer" application doesn't have a hyphenated
name, oddly enough, even though it is in fact "ebook-viewer" not
"ebookviewer".

On the other hand, it's probably not worth fixing the inconsistency,
because changing desktop filenames can be (a bit) disruptive if people
have pinned or modified them, and it's not worth it just for a stray
hyphen. Changing the actual program executable name is even less worth
it...
---
 src/calibre/gui2/lrf_renderer/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/lrf_renderer/main.py b/src/calibre/gui2/lrf_renderer/main.py
index a01c48d712..45decc2bc9 100644
--- a/src/calibre/gui2/lrf_renderer/main.py
+++ b/src/calibre/gui2/lrf_renderer/main.py
@@ -305,7 +305,7 @@ def main(args=sys.argv, logger=None):
         return 1
     pid = os.fork() if (islinux or isbsd) else -1
     if pid <= 0:
-        override = 'calibre-lrf-viewer' if islinux else None
+        override = 'calibre-lrfviewer' if islinux else None
         app = Application(args, override_program_name=override)
         app.setWindowIcon(QIcon(I('viewer.png')))
         opts = normalize_settings(parser, opts)