From ac6bd5e0fd894ac3a4b455d38a025c022405b217 Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Mon, 27 May 2019 14:09:06 -0400 Subject: [PATCH] etree: use encoding='unicode' instead of passing unicode_type object --- src/calibre/ebooks/conversion/plugins/epub_input.py | 4 ++-- src/calibre/ebooks/conversion/plugins/fb2_input.py | 4 ++-- src/calibre/ebooks/conversion/plugins/htmlz_output.py | 2 +- src/calibre/ebooks/docx/container.py | 3 +-- src/calibre/ebooks/docx/index.py | 4 ++-- src/calibre/ebooks/docx/toc.py | 4 ++-- src/calibre/ebooks/docx/writer/styles.py | 4 ++-- src/calibre/ebooks/fb2/fb2ml.py | 2 +- src/calibre/ebooks/metadata/fb2.py | 4 ++-- src/calibre/ebooks/metadata/opf2.py | 8 ++++---- src/calibre/ebooks/metadata/toc.py | 2 +- src/calibre/ebooks/mobi/writer8/main.py | 2 +- src/calibre/ebooks/oeb/base.py | 2 +- src/calibre/ebooks/oeb/parse_utils.py | 4 ++-- src/calibre/ebooks/oeb/polish/stats.py | 2 +- src/calibre/ebooks/oeb/polish/toc.py | 8 ++++---- src/calibre/ebooks/oeb/reader.py | 2 +- src/calibre/ebooks/oeb/transforms/metadata.py | 4 ++-- src/calibre/ebooks/oeb/transforms/split.py | 4 ++-- src/calibre/ebooks/pdf/reflow.py | 6 +++--- src/calibre/ebooks/pml/pmlml.py | 2 +- src/calibre/ebooks/rtf/rtfml.py | 2 +- src/calibre/ebooks/snb/snbml.py | 2 +- src/calibre/ebooks/txt/txtml.py | 4 ++-- src/calibre/gui2/comments_editor.py | 2 +- src/calibre/gui2/dialogs/plugin_updater.py | 2 +- src/calibre/gui2/tweak_book/diff/view.py | 2 +- src/calibre/gui2/tweak_book/text_search.py | 4 ++-- src/calibre/utils/open_with/osx.py | 6 +++--- src/calibre/web/feeds/__init__.py | 2 +- src/calibre/web/feeds/news.py | 4 ++-- 31 files changed, 53 insertions(+), 54 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/epub_input.py b/src/calibre/ebooks/conversion/plugins/epub_input.py index 5aa689ce97..1df6813eb3 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_input.py +++ b/src/calibre/ebooks/conversion/plugins/epub_input.py @@ -7,7 +7,7 @@ import os, re, posixpath from itertools import cycle from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from polyglot.builtins import unicode_type, as_bytes, getcwd +from polyglot.builtins import as_bytes, getcwd ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC' IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding' @@ -369,7 +369,7 @@ class EPUBInput(InputFormatPlugin): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring( - x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join( + x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join( x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: diff --git a/src/calibre/ebooks/conversion/plugins/fb2_input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py index d802fe2b2a..9ab3147e52 100644 --- a/src/calibre/ebooks/conversion/plugins/fb2_input.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py @@ -8,7 +8,7 @@ import os, re from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre import guess_type -from polyglot.builtins import iteritems, unicode_type, getcwd +from polyglot.builtins import iteritems, getcwd FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0' FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1' @@ -71,7 +71,7 @@ class FB2Input(InputFormatPlugin): stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: - css += etree.tostring(s, encoding=unicode_type, method='text', + css += etree.tostring(s, encoding='unicode', method='text', with_tail=False) + '\n\n' if css: import css_parser, logging diff --git a/src/calibre/ebooks/conversion/plugins/htmlz_output.py b/src/calibre/ebooks/conversion/plugins/htmlz_output.py index 3a77c4332c..c72fd09adf 100644 --- a/src/calibre/ebooks/conversion/plugins/htmlz_output.py +++ b/src/calibre/ebooks/conversion/plugins/htmlz_output.py @@ -100,7 +100,7 @@ class HTMLZOutput(OutputFormatPlugin): for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: - data = unicode_type(etree.tostring(item.data, encoding=unicode_type)) + data = etree.tostring(item.data, encoding='unicode') else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index b3c12cb43f..8738ba5375 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -19,7 +19,6 @@ from calibre.utils.localization import canonicalize_lang from calibre.utils.logging import default_log from calibre.utils.zipfile import ZipFile from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER -from polyglot.builtins import unicode_type def fromstring(raw, parser=RECOVER_PARSER): @@ -56,7 +55,7 @@ def read_doc_props(raw, mi, XPath): desc = XPath('//dc:description')(root) if desc: - raw = etree.tostring(desc[0], method='text', encoding=unicode_type) + raw = etree.tostring(desc[0], method='text', encoding='unicode') raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary mi.comments = raw.strip() diff --git a/src/calibre/ebooks/docx/index.py b/src/calibre/ebooks/docx/index.py index f2c8b5243c..974d25d9a1 100644 --- a/src/calibre/ebooks/docx/index.py +++ b/src/calibre/ebooks/docx/index.py @@ -10,7 +10,7 @@ from operator import itemgetter from lxml import etree from calibre.utils.icu import partition_by_first_letter, sort_key -from polyglot.builtins import iteritems, unicode_type, filter +from polyglot.builtins import iteritems, filter def get_applicable_xe_fields(index, xe_fields, XPath, expand): @@ -246,7 +246,7 @@ def polish_index_markup(index, blocks): a = block.xpath('descendant::a[1]') text = '' if a: - text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip() + text = etree.tostring(a[0], method='text', with_tail=False, encoding='unicode').strip() if ':' in text: path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':')))) if len(parts) > 1: diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py index 50893e739c..a287514e2c 100644 --- a/src/calibre/ebooks/docx/toc.py +++ b/src/calibre/ebooks/docx/toc.py @@ -12,7 +12,7 @@ from lxml.etree import tostring from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.oeb.polish.toc import elem_to_toc_text -from polyglot.builtins import iteritems, unicode_type, range +from polyglot.builtins import iteritems, range def from_headings(body, log, namespace): @@ -93,7 +93,7 @@ def link_to_txt(a, styles, object_map): if rs.css.get('display', None) == 'none': a.remove(child) - return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip() + return tostring(a, method='text', with_tail=False, encoding='unicode').strip() def from_toc(docx, link_map, styles, object_map, log, namespace): diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py index 0931491958..4cea15d599 100644 --- a/src/calibre/ebooks/docx/writer/styles.py +++ b/src/calibre/ebooks/docx/writer/styles.py @@ -14,7 +14,7 @@ from lxml import etree from calibre.ebooks import parse_css_length from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero from calibre.utils.localization import lang_as_iso639_1 -from polyglot.builtins import iteritems, unicode_type, filter +from polyglot.builtins import iteritems, filter from tinycss.css21 import CSS21Parser css_parser = CSS21Parser() @@ -46,7 +46,7 @@ def bmap(x): def is_dropcaps(html_tag, tag_style): - return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left' + return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left' class CombinedStyle(object): diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 5dede1505e..9936ffad7e 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -65,7 +65,7 @@ class FB2MLizer(object): output = self.clean_text(u''.join(output)) if self.opts.pretty_print: - return u'\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True) + return u'\n%s' % etree.tostring(etree.fromstring(output), encoding='unicode', pretty_print=True) else: return u'' + output diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py index 04f67c97dd..bbd1aec7cc 100644 --- a/src/calibre/ebooks/metadata/fb2.py +++ b/src/calibre/ebooks/metadata/fb2.py @@ -27,7 +27,7 @@ NAMESPACES = { 'xlink' : 'http://www.w3.org/1999/xlink' } -tostring = partial(etree.tostring, method='text', encoding=unicode_type) +tostring = partial(etree.tostring, method='text', encoding='unicode') def XLINK(tag): @@ -448,7 +448,7 @@ def ensure_namespace(doc): break if bare_tags: import re - raw = etree.tostring(doc, encoding=unicode_type) + raw = etree.tostring(doc, encoding='unicode') raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw) doc = etree.fromstring(raw) return doc diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 273a700eb9..c57fe7c758 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -893,7 +893,7 @@ class OPF(object): # {{{ ans = None for match in self.pubdate_path(self.metadata): try: - val = parse_date(etree.tostring(match, encoding=unicode_type, + val = parse_date(etree.tostring(match, encoding='unicode', method='text', with_tail=False).strip()) except: continue @@ -906,7 +906,7 @@ class OPF(object): # {{{ least_val = least_elem = None for match in self.pubdate_path(self.metadata): try: - cval = parse_date(etree.tostring(match, encoding=unicode_type, + cval = parse_date(etree.tostring(match, encoding='unicode', method='text', with_tail=False).strip()) except: match.getparent().remove(match) @@ -964,7 +964,7 @@ class OPF(object): # {{{ for attr, val in iteritems(x.attrib): if attr.endswith('scheme'): typ = icu_lower(val) - val = etree.tostring(x, with_tail=False, encoding=unicode_type, + val = etree.tostring(x, with_tail=False, encoding='unicode', method='text').strip() if val and typ not in ('calibre', 'uuid'): if typ == 'isbn' and val.lower().startswith('urn:isbn:'): @@ -973,7 +973,7 @@ class OPF(object): # {{{ found_scheme = True break if not found_scheme: - val = etree.tostring(x, with_tail=False, encoding=unicode_type, + val = etree.tostring(x, with_tail=False, encoding='unicode', method='text').strip() if val.lower().startswith('urn:isbn:'): val = check_isbn(val.split(':')[-1]) diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index 2287d12aab..52bfea8377 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -210,7 +210,7 @@ class TOC(list): text = u'' for txt in txt_path(nl): text += etree.tostring(txt, method='text', - encoding=unicode_type, with_tail=False) + encoding='unicode', with_tail=False) content = content_path(np) if content and text: content = content[0] diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 5acfd36b53..4253c060f8 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -235,7 +235,7 @@ class KF8Writer(object): root = self.data(item) for svg in XPath('//svg:svg')(root): - raw = etree.tostring(svg, encoding=unicode_type, with_tail=False) + raw = etree.tostring(svg, encoding='unicode', with_tail=False) idx = len(self.flows) self.flows.append(raw) p = svg.getparent() diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 25bb2ad62b..e6c609d7dc 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -400,7 +400,7 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True): def xml2text(elem, pretty_print=False): - return etree.tostring(elem, method='text', encoding=unicode_type, with_tail=False, pretty_print=pretty_print) + return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print) def escape_cdata(root): diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index ef1d485cbf..1f9e7c841b 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -112,7 +112,7 @@ def _html4_parse(data): for elem in data.iter(tag=etree.Comment): if elem.text: elem.text = elem.text.strip('-') - data = etree.tostring(data, encoding=unicode_type) + data = etree.tostring(data, encoding='unicode') # Setting huge_tree=True causes crashes in windows with large files parser = etree.XMLParser(no_network=True) @@ -273,7 +273,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, if not namespace(data.tag): log.warn('Forcing', filename, 'into XHTML namespace') data.attrib['xmlns'] = XHTML_NS - data = etree.tostring(data, encoding=unicode_type) + data = etree.tostring(data, encoding='unicode') try: data = etree.fromstring(data, parser=parser) diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index d9f46bd4af..c7a5d5c779 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -120,7 +120,7 @@ def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize if before: ans.append(before) if for_pseudo is not None: - ans.append(tostring(elem, method='text', encoding=unicode_type, with_tail=False)) + ans.append(tostring(elem, method='text', encoding='unicode', with_tail=False)) else: if elem.text: ans.append(elem.text) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 0d693a478a..c752a0735e 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -142,7 +142,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name): text = '' for txt in child_xpath(nl, 'text'): text += etree.tostring(txt, method='text', - encoding=unicode_type, with_tail=False) + encoding='unicode', with_tail=False) content = child_xpath(navpoint, 'content') if content: content = content[0] @@ -190,7 +190,7 @@ def parse_ncx(container, ncx_name): def add_from_li(container, li, parent, nav_name): dest = frag = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): - text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() + text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) @@ -225,7 +225,7 @@ def parse_nav(container, nav_name): if ol is not None: process_nav_node(container, ol, toc_root, nav_name) for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())): - text = etree.tostring(h, method='text', encoding=unicode_type, with_tail=False) or h.get('title') + text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title') if text: toc_root.toc_title = text break @@ -323,7 +323,7 @@ def get_nav_landmarks(container): for a in li.iterdescendants(XHTML('a')): href, rtype = a.get('href'), a.get(et) if href: - title = etree.tostring(a, method='text', encoding=unicode_type, with_tail=False).strip() + title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip() href, frag = href.partition('#')[::2] name = container.href_to_name(href, nav) if container.has_name(name): diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index e32fa4a349..467b0113c1 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -428,7 +428,7 @@ class OEBReader(object): 'descendant::calibre:meta[@name = "description"]') if descriptionElement: description = etree.tostring(descriptionElement[0], - method='text', encoding=unicode_type).strip() + method='text', encoding='unicode').strip() if not description: description = None else: diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index 90e1ca2e84..7f5342d6f4 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' import os, re from calibre.utils.date import isoformat, now from calibre import guess_type -from polyglot.builtins import iteritems, unicode_type, filter +from polyglot.builtins import iteritems, filter filter @@ -206,7 +206,7 @@ class MergeMetadata(object): for item in affected_items: body = XPath('//h:body')(item.data) if body: - text = etree.tostring(body[0], method='text', encoding=unicode_type) + text = etree.tostring(body[0], method='text', encoding='unicode') else: text = '' text = re.sub(r'\s+', '', text) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 4de590465d..d0e1a334ec 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -20,7 +20,7 @@ from calibre.ebooks.epub import rules from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize) from calibre.ebooks.oeb.polish.split import do_split -from polyglot.builtins import iteritems, unicode_type, range, map +from polyglot.builtins import iteritems, range, map from css_selectors import Select, SelectorError XPath = functools.partial(_XPath, namespaces=NAMESPACES) @@ -295,7 +295,7 @@ class FlowSplitter(object): if body is None: return False txt = re.sub(u'\\s+|\\xa0', '', - etree.tostring(body, method='text', encoding=unicode_type)) + etree.tostring(body, method='text', encoding='unicode')) if len(txt) > 1: return False for img in root.xpath('//h:img', namespaces=NAMESPACES): diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 0367a2fd4e..54c44ebe3c 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -11,7 +11,7 @@ from itertools import count from lxml import etree -from polyglot.builtins import unicode_type, range, map +from polyglot.builtins import range, map class Font(object): @@ -76,10 +76,10 @@ class Text(Element): text.tail = '' self.text_as_string = etree.tostring(text, method='text', - encoding=unicode_type) + encoding='unicode') self.raw = text.text if text.text else u'' for x in text.iterchildren(): - self.raw += etree.tostring(x, method='xml', encoding=unicode_type) + self.raw += etree.tostring(x, method='xml', encoding='unicode') self.average_character_width = self.width/len(self.text_as_string) def coalesce(self, other, page_number): diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 4a0df0d7b4..772117321c 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -135,7 +135,7 @@ class PMLMLizer(object): text = [u''] for item in self.oeb_book.spine: self.log.debug('Converting %s to PML markup...' % item.href) - content = unicode_type(etree.tostring(item.data, encoding=unicode_type)) + content = etree.tostring(item.data, encoding='unicode') content = self.prepare_text(content) content = etree.fromstring(content) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py index 2e84cfbafc..d4b339c53c 100644 --- a/src/calibre/ebooks/rtf/rtfml.py +++ b/src/calibre/ebooks/rtf/rtfml.py @@ -120,7 +120,7 @@ class RTFMLizer(object): self.log.debug('Converting %s to RTF markup...' % item.href) # Removing comments is needed as comments with -- inside them can # cause fromstring() to fail - content = re.sub(u'', u'', etree.tostring(item.data, encoding=unicode_type), flags=re.DOTALL) + content = re.sub(u'', u'', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL) content = self.remove_newlines(content) content = self.remove_tabs(content) content = etree.fromstring(content) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index d04c590ab9..659959e6f0 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -85,7 +85,7 @@ class SNBMLizer(object): from calibre.ebooks.oeb.stylizer import Stylizer output = [u''] stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) - content = unicode_type(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode_type)) + content = etree.tostring(self.item.data.find(XHTML('body')), encoding='unicode') # content = self.remove_newlines(content) trees = {} for subitem, subtitle in self.subitems: diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index 220cb56e6d..d4c000f164 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -12,7 +12,7 @@ Transform OEB content into plain text import re from lxml import etree -from polyglot.builtins import unicode_type, string_or_bytes +from polyglot.builtins import string_or_bytes BLOCK_TAGS = [ @@ -74,7 +74,7 @@ class TXTMLizer(object): for x in item.data.iterdescendants(etree.Comment): if x.text and '--' in x.text: x.text = x.text.replace('--', '__') - content = unicode_type(etree.tostring(item.data, encoding=unicode_type)) + content = etree.tostring(item.data, encoding='unicode') content = self.remove_newlines(content) content = etree.fromstring(content) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) diff --git a/src/calibre/gui2/comments_editor.py b/src/calibre/gui2/comments_editor.py index da9d790a43..0fffa212f0 100644 --- a/src/calibre/gui2/comments_editor.py +++ b/src/calibre/gui2/comments_editor.py @@ -360,7 +360,7 @@ class EditorWidget(QWebView, LineEditECM): # {{{ for body in root.xpath('//body'): if body.text: elems.append(body.text) - elems += [html.tostring(x, encoding=unicode_type) for x in body if + elems += [html.tostring(x, encoding='unicode') for x in body if x.tag not in ('script', 'style')] if len(elems) > 1: diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py index e3fa0b6d1e..275e77dadf 100644 --- a/src/calibre/gui2/dialogs/plugin_updater.py +++ b/src/calibre/gui2/dialogs/plugin_updater.py @@ -840,7 +840,7 @@ class PluginUpdaterDialog(SizePersistedDialog): continue if heading_node.text_content().lower().find('version history') != -1: div_node = spoiler_node.xpath('div')[0] - text = html.tostring(div_node, method='html', encoding=unicode_type) + text = html.tostring(div_node, method='html', encoding='unicode') return re.sub(r'', '
', text) except: if DEBUG: diff --git a/src/calibre/gui2/tweak_book/diff/view.py b/src/calibre/gui2/tweak_book/diff/view.py index 6286091bd7..8c9e9c5f5b 100644 --- a/src/calibre/gui2/tweak_book/diff/view.py +++ b/src/calibre/gui2/tweak_book/diff/view.py @@ -65,7 +65,7 @@ def beautify_text(raw, syntax): else: root = parse(raw, line_numbers=False) pretty_html_tree(None, root) - return etree.tostring(root, encoding=unicode_type) + return etree.tostring(root, encoding='unicode') class LineNumberMap(dict): # {{{ diff --git a/src/calibre/gui2/tweak_book/text_search.py b/src/calibre/gui2/tweak_book/text_search.py index 03ee4fc215..0f00763580 100644 --- a/src/calibre/gui2/tweak_book/text_search.py +++ b/src/calibre/gui2/tweak_book/text_search.py @@ -16,7 +16,7 @@ from calibre.gui2.tweak_book import tprefs, editors, current_container from calibre.gui2.tweak_book.search import get_search_regex, InvalidRegex, initialize_search_request from calibre.gui2.tweak_book.widgets import BusyCursor from calibre.gui2.widgets2 import HistoryComboBox -from polyglot.builtins import iteritems, unicode_type, error_message +from polyglot.builtins import iteritems, error_message # UI {{{ @@ -179,7 +179,7 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name else: root = current_container().parsed(fname) if hasattr(root, 'xpath'): - raw = tostring(root, method='text', encoding=unicode_type, with_tail=True) + raw = tostring(root, method='text', encoding='unicode', with_tail=True) else: raw = current_container().raw_data(fname) if pat.search(raw) is not None: diff --git a/src/calibre/utils/open_with/osx.py b/src/calibre/utils/open_with/osx.py index 83dd0de249..8cce4e8116 100644 --- a/src/calibre/utils/open_with/osx.py +++ b/src/calibre/utils/open_with/osx.py @@ -10,7 +10,7 @@ from collections import defaultdict from calibre.ptempfile import TemporaryDirectory from calibre.utils.icu import numeric_sort_key -from polyglot.builtins import iteritems, unicode_type, string_or_bytes +from polyglot.builtins import iteritems, string_or_bytes from polyglot.plistlib import loads application_locations = ('/Applications', '~/Applications', '~/Desktop') @@ -30,8 +30,8 @@ def generate_public_uti_map(): for table in tables: for tr in table.xpath('descendant::tr')[1:]: td = tr.xpath('descendant::td') - identifier = etree.tostring(td[0], method='text', encoding=unicode_type).strip() - tags = etree.tostring(td[2], method='text', encoding=unicode_type).strip() + identifier = etree.tostring(td[0], method='text', encoding='unicode').strip() + tags = etree.tostring(td[2], method='text', encoding='unicode').strip() identifier = identifier.split()[0].replace('\u200b', '') exts = [x.strip()[1:].lower() for x in tags.split(',') if x.strip().startswith('.')] for ext in exts: diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 825cd5466f..46a1f964d0 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -43,7 +43,7 @@ class Article(object): if summary and '<' in summary: try: s = html.fragment_fromstring(summary, create_parent=True) - summary = html.tostring(s, method='text', encoding=unicode_type) + summary = html.tostring(s, method='text', encoding='unicode') except: print('Failed to process article summary, deleting:') print(summary.encode('utf-8')) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 2a6f61568a..c2220e1af0 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -743,7 +743,7 @@ class BasicNewsRecipe(Recipe): heading.text = extracted_title body.insert(0, heading) - raw_html = tostring(root, encoding=unicode_type) + raw_html = tostring(root, encoding='unicode') return raw_html @@ -1667,7 +1667,7 @@ class BasicNewsRecipe(Recipe): return tag if callable(getattr(tag, 'xpath', None)) and not hasattr(tag, 'contents'): # a lxml tag from lxml.etree import tostring - ans = tostring(tag, method='text', encoding=unicode_type, with_tail=False) + ans = tostring(tag, method='text', encoding='unicode', with_tail=False) else: strings = [] for item in tag.contents: