From da8092c58aab78ec9b58c601ad7cb2830cb48d31 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Sun, 23 Feb 2020 01:34:34 -0500 Subject: [PATCH 1/9] do not use a side effect of allocating list(map(...)) to execute functions for each member of a list --- src/calibre/ebooks/docx/to_html.py | 3 ++- src/calibre/ebooks/mobi/writer2/indexer.py | 10 ++++++---- src/calibre/ebooks/oeb/parse_utils.py | 3 ++- src/calibre/ebooks/oeb/polish/check/css.py | 3 ++- src/calibre/ebooks/oeb/polish/check/links.py | 3 ++- src/calibre/ebooks/oeb/polish/container.py | 3 ++- src/calibre/ebooks/oeb/polish/replace.py | 3 ++- src/calibre/ebooks/pdf/html_writer.py | 3 ++- src/calibre/gui2/icon_theme.py | 3 ++- src/calibre/gui2/preferences/server.py | 2 +- src/calibre/gui2/tag_browser/ui.py | 4 ++-- src/calibre/gui2/tweak_book/boss.py | 3 ++- src/calibre/gui2/tweak_book/file_list.py | 3 ++- src/calibre/srv/http_response.py | 3 ++- src/calibre/srv/render_book.py | 3 ++- src/calibre/utils/fonts/sfnt/subset.py | 3 ++- 16 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index a10a6ce181..d284bae8c9 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -182,7 +182,8 @@ class Convert: indent = float(style.text_indent[:-2]) + indent style.text_indent = '%.3gpt' % indent parent.text = tabs[-1].tail or '' - list(map(parent.remove, tabs)) + for i in tabs: + parent.remove(i) self.images.rid_map = orig_rid_map diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 0fc15b1161..40b7502ac5 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -65,8 +65,8 @@ class TAGX: # {{{ ''' TAGX block for the Primary index header of a periodical ''' - list(map(self.add_tag, (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72, - 73, 0))) + for i in (1, 2, 3, 4, 5, 21, 22, 23, 0, 69, 70, 71, 72,73, 0): + self.add_tag(i) return self.header(2) + bytes(self.byts) @property @@ -74,7 +74,8 @@ class TAGX: # {{{ ''' TAGX block for the secondary index header of a periodical ''' - list(map(self.add_tag, (11, 0))) + for i in (11, 0): + self.add_tag(i) return self.header(1) + bytes(self.byts) @property @@ -82,7 +83,8 @@ class TAGX: # {{{ ''' TAGX block for the primary index header of a flat book ''' - list(map(self.add_tag, (1, 2, 3, 4, 0))) + for i in (1, 2, 3, 4, 0): + self.add_tag(i) return self.header(1) + bytes(self.byts) diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index f4557dbccb..19707b982b 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -65,7 +65,8 @@ def merge_multiple_html_heads_and_bodies(root, log=None): for b in bodies: for x in b: body.append(x) - tuple(map(root.append, (head, body))) + for x in (head, body): + root.append(x) if log is not None: log.warn('Merging multiple and sections') return root diff --git a/src/calibre/ebooks/oeb/polish/check/css.py b/src/calibre/ebooks/oeb/polish/check/css.py index 1332f7e31d..5e4987ae3e 100644 --- a/src/calibre/ebooks/oeb/polish/check/css.py +++ b/src/calibre/ebooks/oeb/polish/check/css.py @@ -221,7 +221,8 @@ class Pool: if not sip.isdeleted(x): sip.delete(x) - tuple(map(safe_delete, self.workers)) + for i in self.workers: + safe_delete(i) self.workers = [] diff --git a/src/calibre/ebooks/oeb/polish/check/links.py b/src/calibre/ebooks/oeb/polish/check/links.py index 5f118394b6..25041af698 100644 --- a/src/calibre/ebooks/oeb/polish/check/links.py +++ b/src/calibre/ebooks/oeb/polish/check/links.py @@ -410,7 +410,8 @@ def check_external_links(container, progress_callback=(lambda num, total:None), return [] items = Queue() ans = [] - tuple(map(items.put, iteritems(external_links))) + for el in iteritems(external_links): + items.put(el) progress_callback(0, len(external_links)) done = [] downloaded_html_ids = {} diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 13b00d0be6..66e95d75cd 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -825,7 +825,8 @@ class Container(ContainerBase): # {{{ imap = {name:item_id for item_id, name in iteritems(imap)} items = [item for item, name, linear in self.spine_iter] tail, last_tail = (items[0].tail, items[-1].tail) if items else ('\n ', '\n ') - tuple(map(self.remove_from_xml, items)) + for i in items: + self.remove_from_xml(i) spine = self.opf_xpath('//opf:spine')[0] spine.text = tail for name, linear in spine_items: diff --git a/src/calibre/ebooks/oeb/polish/replace.py b/src/calibre/ebooks/oeb/polish/replace.py index b16876597a..6815554922 100644 --- a/src/calibre/ebooks/oeb/polish/replace.py +++ b/src/calibre/ebooks/oeb/polish/replace.py @@ -384,7 +384,8 @@ def remove_links_to(container, predicate): removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate) if removed: changed.add(name) - tuple(map(container.dirty, changed)) + for i in changed: + container.dirty(i) return changed diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index 6ab9d5a0a2..89f27b4fed 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -456,7 +456,8 @@ def add_anchors_markup(root, uuid, anchors): a.tail = '\n' div.append(a) a.count = 0 - tuple(map(a, anchors)) + for anchor in anchors: + a(anchor) a(uuid) diff --git a/src/calibre/gui2/icon_theme.py b/src/calibre/gui2/icon_theme.py index 1f51a1a9ab..6db1d887ed 100644 --- a/src/calibre/gui2/icon_theme.py +++ b/src/calibre/gui2/icon_theme.py @@ -490,7 +490,8 @@ def get_cover(metadata): def get_covers(themes, dialog, num_of_workers=8): items = Queue() - tuple(map(items.put, themes)) + for i in themes: + items.put(i) def callback(metadata, x): if not sip.isdeleted(dialog) and not dialog.dialog_closed: diff --git a/src/calibre/gui2/preferences/server.py b/src/calibre/gui2/preferences/server.py index 80d1684bc0..f8658ba1c4 100644 --- a/src/calibre/gui2/preferences/server.py +++ b/src/calibre/gui2/preferences/server.py @@ -217,7 +217,7 @@ class Choices(QComboBox): self.setEditable(False) opt = options[name] self.choices = opt.choices - tuple(map(self.addItem, opt.choices)) + self.addItems(opt.choices) self.currentIndexChanged.connect(self.changed_signal.emit) init_opt(self, opt, layout) diff --git a/src/calibre/gui2/tag_browser/ui.py b/src/calibre/gui2/tag_browser/ui.py index 15dc5e408d..cf0f20d603 100644 --- a/src/calibre/gui2/tag_browser/ui.py +++ b/src/calibre/gui2/tag_browser/ui.py @@ -600,8 +600,8 @@ class TagBrowserBar(QWidget): # {{{ find_shown = self.toggle_search_button.isChecked() self.toggle_search_button.setVisible(not find_shown) l = self.layout() - items = [l.itemAt(i) for i in range(l.count())] - tuple(map(l.removeItem, items)) + for i in (l.itemAt(i) for i in range(l.count())): + l.removeItem(i) if find_shown: l.addWidget(self.alter_tb) self.alter_tb.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonIconOnly) diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index fa007539d6..573a7a486c 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -391,7 +391,8 @@ class Boss(QObject): if ef: if isinstance(ef, str): ef = [ef] - tuple(map(self.gui.file_list.request_edit, ef)) + for i in ef: + self.gui.file_list.request_edit(i) else: if tprefs['restore_book_state']: self.restore_book_edit_state() diff --git a/src/calibre/gui2/tweak_book/file_list.py b/src/calibre/gui2/tweak_book/file_list.py index 6e5f2d0718..930d1bd829 100644 --- a/src/calibre/gui2/tweak_book/file_list.py +++ b/src/calibre/gui2/tweak_book/file_list.py @@ -1129,7 +1129,8 @@ class MergeDialog(QDialog): # {{{ buttons = self.buttons = [QRadioButton(n) for n in names] buttons[0].setChecked(True) - tuple(map(w.l.addWidget, buttons)) + for i in buttons: + w.l.addWidget(i) sa.setWidget(w) self.resize(self.sizeHint() + QSize(150, 20)) diff --git a/src/calibre/srv/http_response.py b/src/calibre/srv/http_response.py index a20b2fc6d0..9053e9ae4d 100644 --- a/src/calibre/srv/http_response.py +++ b/src/calibre/srv/http_response.py @@ -254,7 +254,8 @@ class RequestData: # {{{ def filesystem_file_with_custom_etag(self, output, *etag_parts): etag = hashlib.sha1() - tuple(map(lambda x:etag.update(str(x).encode('utf-8')), etag_parts)) + for i in etag_parts: + etag.update(str(i).encode('utf-8')) return ETaggedFile(output, etag.hexdigest()) def filesystem_file_with_constant_etag(self, output, etag_as_hexencoded_string): diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index b9fb136466..ac80092344 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -194,7 +194,8 @@ def toc_anchor_map(toc): if name and node['id'] not in seen_map[name]: ans[name].append({'id':node['id'], 'frag':node['frag']}) seen_map[name].add(node['id']) - tuple(map(process_node, node['children'])) + for i in node['children']: + process_node(i) process_node(toc) return dict(ans) diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py index 9a037cbc23..7bf8da962c 100644 --- a/src/calibre/utils/fonts/sfnt/subset.py +++ b/src/calibre/utils/fonts/sfnt/subset.py @@ -272,7 +272,8 @@ def main(args): raise SystemExit(1) if opts.codes: parts = tuple(map(conv_code, parts)) - tuple(map(not_single, parts)) + for i in parts: + not_single(i) ranges.add(tuple(parts)) else: if opts.codes: From d62d8b911115f239d0a02ff73631d32a22bda544 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Mon, 8 Nov 2021 16:35:24 -0500 Subject: [PATCH 2/9] remove function attribute that was never used --- src/calibre/ebooks/pdf/html_writer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index 89f27b4fed..a1c6901466 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -455,7 +455,6 @@ def add_anchors_markup(root, uuid, anchors): # rescale the viewport a.tail = '\n' div.append(a) - a.count = 0 for anchor in anchors: a(anchor) a(uuid) From 4be01e7000793892e8bcb3de1db1eda167c514b0 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Sun, 23 Feb 2020 01:40:02 -0500 Subject: [PATCH 3/9] use tuple up front instead of list which must later be converted to tuple We don't need a mutable type, anyway, and the only other use of it is recreating the list as = tuple(map(...)) so why convert *again* to a tuple? --- src/calibre/utils/fonts/sfnt/subset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/calibre/utils/fonts/sfnt/subset.py b/src/calibre/utils/fonts/sfnt/subset.py index 7bf8da962c..46f13d2e45 100644 --- a/src/calibre/utils/fonts/sfnt/subset.py +++ b/src/calibre/utils/fonts/sfnt/subset.py @@ -266,7 +266,7 @@ def main(args): for c in chars: if '-' in c: - parts = [x.strip() for x in c.split('-')] + parts = tuple(x.strip() for x in c.split('-')) if len(parts) != 2: prints('Invalid range:', c, file=sys.stderr) raise SystemExit(1) @@ -274,7 +274,7 @@ def main(args): parts = tuple(map(conv_code, parts)) for i in parts: not_single(i) - ranges.add(tuple(parts)) + ranges.add(parts) else: if opts.codes: c = conv_code(c) From 2302cb6a96853c734293cf834bf7dad9725ab3c3 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Sun, 30 Aug 2020 19:52:35 -0400 Subject: [PATCH 4/9] De-vendor sgmllib The upstream feedparser 6.0 beta introduces a hard dependency on PyPI sgmllib3k, while 5.2.1 (released in 2015) doesn't properly support python3 except via 2to3 and documentation about manually copying over sgmllib.py Let's just rely on sgmllib3k. --- bypy/sources.json | 10 + src/sgmllib.py | 574 ---------------------------------------------- 2 files changed, 10 insertions(+), 574 deletions(-) delete mode 100644 src/sgmllib.py diff --git a/bypy/sources.json b/bypy/sources.json index 3e3285328d..f183f68556 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -632,6 +632,16 @@ } }, + { + "name": "sgmllib3k", + "python": 3, + "unix": { + "filename": "sgmllib3k-1.0.0.tar.gz", + "hash": "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9", + "urls": ["pypi"] + } + }, + { "name": "markdown", "unix": { diff --git a/src/sgmllib.py b/src/sgmllib.py deleted file mode 100644 index 6422b7603c..0000000000 --- a/src/sgmllib.py +++ /dev/null @@ -1,574 +0,0 @@ -"""A parser for SGML, using the derived class as a static DTD.""" -# Needed for feedparser under python 3 where this module has been removed - -# XXX This only supports those SGML features used by HTML. - -# XXX There should be a way to distinguish between PCDATA (parsed -# character data -- the normal case), RCDATA (replaceable character -# data -- only char and entity references and end tags are special) -# and CDATA (character data -- only end tags are special). RCDATA is -# not supported at all. - -import _markupbase -import re - -__all__ = ["SGMLParser", "SGMLParseError"] - -# Regular expressions used for parsing - -interesting = re.compile('[&<]') -incomplete = re.compile( - '&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' - '<([a-zA-Z][^<>]*|' - '/([a-zA-Z][^<>]*)?|' - '![^<>]*)?' -) - -entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') -charref = re.compile('&#([0-9]+)[^0-9]') - -starttagopen = re.compile('<[>a-zA-Z]') -shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') -shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') -piclose = re.compile('>') -endbracket = re.compile('[<>]') -tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') -attrfind = re.compile( - r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?' -) - - -class SGMLParseError(RuntimeError): - """Exception raised for all parse errors.""" - pass - - -# SGML parser base class -- find tags and call handler functions. -# Usage: p = SGMLParser(); p.feed(data); ...; p.close(). -# The dtd is defined by deriving a class which defines methods -# with special names to handle tags: start_foo and end_foo to handle -# and , respectively, or do_foo to handle by itself. -# (Tags are converted to lower case for this purpose.) The data -# between tags is passed to the parser by calling self.handle_data() -# with some data as argument (the data may be split up in arbitrary -# chunks). Entity references are passed by calling -# self.handle_entityref() with the entity reference as argument. - - -class SGMLParser(_markupbase.ParserBase): - # Definition of entities -- derived classes may override - entity_or_charref = re.compile( - '&(?:' - '([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)' - ')(;?)' - ) - - def __init__(self, verbose=0): - """Initialize and reset this instance.""" - self.verbose = verbose - self.reset() - - def reset(self): - """Reset this instance. Loses all unprocessed data.""" - self.__starttag_text = None - self.rawdata = '' - self.stack = [] - self.lasttag = '???' - self.nomoretags = 0 - self.literal = 0 - _markupbase.ParserBase.reset(self) - - def setnomoretags(self): - """Enter literal mode (CDATA) till EOF. - - Intended for derived classes only. - """ - self.nomoretags = self.literal = 1 - - def setliteral(self, *args): - """Enter literal mode (CDATA). - - Intended for derived classes only. - """ - self.literal = 1 - - def feed(self, data): - """Feed some data to the parser. - - Call this as often as you want, with as little or as much text - as you want (may include '\n'). (This just saves the text, - all the processing is done by goahead().) - """ - - self.rawdata = self.rawdata + data - self.goahead(0) - - def close(self): - """Handle the remaining data.""" - self.goahead(1) - - def error(self, message): - raise SGMLParseError(message) - - # Internal -- handle data as far as reasonable. May leave state - # and data to be processed by a subsequent call. If 'end' is - # true, force handling all data as if followed by EOF marker. - def goahead(self, end): - rawdata = self.rawdata - i = 0 - n = len(rawdata) - while i < n: - if self.nomoretags: - self.handle_data(rawdata[i:n]) - i = n - break - match = interesting.search(rawdata, i) - if match: - j = match.start() - else: - j = n - if i < j: - self.handle_data(rawdata[i:j]) - i = j - if i == n: - break - if rawdata[i] == '<': - if starttagopen.match(rawdata, i): - if self.literal: - self.handle_data(rawdata[i]) - i = i + 1 - continue - k = self.parse_starttag(i) - if k < 0: - break - i = k - continue - if rawdata.startswith(" (i + 1): - self.handle_data("<") - i = i + 1 - else: - # incomplete - break - continue - if rawdata.startswith("