etree: use encoding='unicode' instead of passing unicode_type object

This commit is contained in:
Eli Schwartz 2019-05-27 14:09:06 -04:00
parent b5eda37c75
commit ac6bd5e0fd
No known key found for this signature in database
GPG Key ID: CEB167EFB5722BD6
31 changed files with 53 additions and 54 deletions

View File

@ -7,7 +7,7 @@ import os, re, posixpath
from itertools import cycle from itertools import cycle
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from polyglot.builtins import unicode_type, as_bytes, getcwd from polyglot.builtins import as_bytes, getcwd
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC' ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding' IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
@ -369,7 +369,7 @@ class EPUBInput(InputFormatPlugin):
href = text = None href = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')): for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring( text = etree.tostring(
x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join( x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(
x.xpath('descendant-or-self::*/@title')).strip() x.xpath('descendant-or-self::*/@title')).strip()
href = x.get('href') href = x.get('href')
if href: if href:

View File

@ -8,7 +8,7 @@ import os, re
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre import guess_type from calibre import guess_type
from polyglot.builtins import iteritems, unicode_type, getcwd from polyglot.builtins import iteritems, getcwd
FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0' FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1' FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'
@ -71,7 +71,7 @@ class FB2Input(InputFormatPlugin):
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
css = '' css = ''
for s in stylesheets: for s in stylesheets:
css += etree.tostring(s, encoding=unicode_type, method='text', css += etree.tostring(s, encoding='unicode', method='text',
with_tail=False) + '\n\n' with_tail=False) + '\n\n'
if css: if css:
import css_parser, logging import css_parser, logging

View File

@ -100,7 +100,7 @@ class HTMLZOutput(OutputFormatPlugin):
for item in oeb_book.manifest: for item in oeb_book.manifest:
if item.media_type in OEB_IMAGES and item.href in images: if item.media_type in OEB_IMAGES and item.href in images:
if item.media_type == SVG_MIME: if item.media_type == SVG_MIME:
data = unicode_type(etree.tostring(item.data, encoding=unicode_type)) data = etree.tostring(item.data, encoding='unicode')
else: else:
data = item.data data = item.data
fname = os.path.join(tdir, u'images', images[item.href]) fname = os.path.join(tdir, u'images', images[item.href])

View File

@ -19,7 +19,6 @@ from calibre.utils.localization import canonicalize_lang
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
from polyglot.builtins import unicode_type
def fromstring(raw, parser=RECOVER_PARSER): def fromstring(raw, parser=RECOVER_PARSER):
@ -56,7 +55,7 @@ def read_doc_props(raw, mi, XPath):
desc = XPath('//dc:description')(root) desc = XPath('//dc:description')(root)
if desc: if desc:
raw = etree.tostring(desc[0], method='text', encoding=unicode_type) raw = etree.tostring(desc[0], method='text', encoding='unicode')
raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary
mi.comments = raw.strip() mi.comments = raw.strip()

View File

@ -10,7 +10,7 @@ from operator import itemgetter
from lxml import etree from lxml import etree
from calibre.utils.icu import partition_by_first_letter, sort_key from calibre.utils.icu import partition_by_first_letter, sort_key
from polyglot.builtins import iteritems, unicode_type, filter from polyglot.builtins import iteritems, filter
def get_applicable_xe_fields(index, xe_fields, XPath, expand): def get_applicable_xe_fields(index, xe_fields, XPath, expand):
@ -246,7 +246,7 @@ def polish_index_markup(index, blocks):
a = block.xpath('descendant::a[1]') a = block.xpath('descendant::a[1]')
text = '' text = ''
if a: if a:
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip() text = etree.tostring(a[0], method='text', with_tail=False, encoding='unicode').strip()
if ':' in text: if ':' in text:
path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':')))) path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':'))))
if len(parts) > 1: if len(parts) > 1:

View File

@ -12,7 +12,7 @@ from lxml.etree import tostring
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
from polyglot.builtins import iteritems, unicode_type, range from polyglot.builtins import iteritems, range
def from_headings(body, log, namespace): def from_headings(body, log, namespace):
@ -93,7 +93,7 @@ def link_to_txt(a, styles, object_map):
if rs.css.get('display', None) == 'none': if rs.css.get('display', None) == 'none':
a.remove(child) a.remove(child)
return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip() return tostring(a, method='text', with_tail=False, encoding='unicode').strip()
def from_toc(docx, link_map, styles, object_map, log, namespace): def from_toc(docx, link_map, styles, object_map, log, namespace):

View File

@ -14,7 +14,7 @@ from lxml import etree
from calibre.ebooks import parse_css_length from calibre.ebooks import parse_css_length
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
from calibre.utils.localization import lang_as_iso639_1 from calibre.utils.localization import lang_as_iso639_1
from polyglot.builtins import iteritems, unicode_type, filter from polyglot.builtins import iteritems, filter
from tinycss.css21 import CSS21Parser from tinycss.css21 import CSS21Parser
css_parser = CSS21Parser() css_parser = CSS21Parser()
@ -46,7 +46,7 @@ def bmap(x):
def is_dropcaps(html_tag, tag_style): def is_dropcaps(html_tag, tag_style):
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left' return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left'
class CombinedStyle(object): class CombinedStyle(object):

View File

@ -65,7 +65,7 @@ class FB2MLizer(object):
output = self.clean_text(u''.join(output)) output = self.clean_text(u''.join(output))
if self.opts.pretty_print: if self.opts.pretty_print:
return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True) return u'<?xml version="1.0" encoding="UTF-8"?>\n%s' % etree.tostring(etree.fromstring(output), encoding='unicode', pretty_print=True)
else: else:
return u'<?xml version="1.0" encoding="UTF-8"?>' + output return u'<?xml version="1.0" encoding="UTF-8"?>' + output

View File

@ -27,7 +27,7 @@ NAMESPACES = {
'xlink' : 'http://www.w3.org/1999/xlink' 'xlink' : 'http://www.w3.org/1999/xlink'
} }
tostring = partial(etree.tostring, method='text', encoding=unicode_type) tostring = partial(etree.tostring, method='text', encoding='unicode')
def XLINK(tag): def XLINK(tag):
@ -448,7 +448,7 @@ def ensure_namespace(doc):
break break
if bare_tags: if bare_tags:
import re import re
raw = etree.tostring(doc, encoding=unicode_type) raw = etree.tostring(doc, encoding='unicode')
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw) raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
doc = etree.fromstring(raw) doc = etree.fromstring(raw)
return doc return doc

View File

@ -893,7 +893,7 @@ class OPF(object): # {{{
ans = None ans = None
for match in self.pubdate_path(self.metadata): for match in self.pubdate_path(self.metadata):
try: try:
val = parse_date(etree.tostring(match, encoding=unicode_type, val = parse_date(etree.tostring(match, encoding='unicode',
method='text', with_tail=False).strip()) method='text', with_tail=False).strip())
except: except:
continue continue
@ -906,7 +906,7 @@ class OPF(object): # {{{
least_val = least_elem = None least_val = least_elem = None
for match in self.pubdate_path(self.metadata): for match in self.pubdate_path(self.metadata):
try: try:
cval = parse_date(etree.tostring(match, encoding=unicode_type, cval = parse_date(etree.tostring(match, encoding='unicode',
method='text', with_tail=False).strip()) method='text', with_tail=False).strip())
except: except:
match.getparent().remove(match) match.getparent().remove(match)
@ -964,7 +964,7 @@ class OPF(object): # {{{
for attr, val in iteritems(x.attrib): for attr, val in iteritems(x.attrib):
if attr.endswith('scheme'): if attr.endswith('scheme'):
typ = icu_lower(val) typ = icu_lower(val)
val = etree.tostring(x, with_tail=False, encoding=unicode_type, val = etree.tostring(x, with_tail=False, encoding='unicode',
method='text').strip() method='text').strip()
if val and typ not in ('calibre', 'uuid'): if val and typ not in ('calibre', 'uuid'):
if typ == 'isbn' and val.lower().startswith('urn:isbn:'): if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
@ -973,7 +973,7 @@ class OPF(object): # {{{
found_scheme = True found_scheme = True
break break
if not found_scheme: if not found_scheme:
val = etree.tostring(x, with_tail=False, encoding=unicode_type, val = etree.tostring(x, with_tail=False, encoding='unicode',
method='text').strip() method='text').strip()
if val.lower().startswith('urn:isbn:'): if val.lower().startswith('urn:isbn:'):
val = check_isbn(val.split(':')[-1]) val = check_isbn(val.split(':')[-1])

View File

@ -210,7 +210,7 @@ class TOC(list):
text = u'' text = u''
for txt in txt_path(nl): for txt in txt_path(nl):
text += etree.tostring(txt, method='text', text += etree.tostring(txt, method='text',
encoding=unicode_type, with_tail=False) encoding='unicode', with_tail=False)
content = content_path(np) content = content_path(np)
if content and text: if content and text:
content = content[0] content = content[0]

View File

@ -235,7 +235,7 @@ class KF8Writer(object):
root = self.data(item) root = self.data(item)
for svg in XPath('//svg:svg')(root): for svg in XPath('//svg:svg')(root):
raw = etree.tostring(svg, encoding=unicode_type, with_tail=False) raw = etree.tostring(svg, encoding='unicode', with_tail=False)
idx = len(self.flows) idx = len(self.flows)
self.flows.append(raw) self.flows.append(raw)
p = svg.getparent() p = svg.getparent()

View File

@ -400,7 +400,7 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True):
def xml2text(elem, pretty_print=False): def xml2text(elem, pretty_print=False):
return etree.tostring(elem, method='text', encoding=unicode_type, with_tail=False, pretty_print=pretty_print) return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print)
def escape_cdata(root): def escape_cdata(root):

View File

@ -112,7 +112,7 @@ def _html4_parse(data):
for elem in data.iter(tag=etree.Comment): for elem in data.iter(tag=etree.Comment):
if elem.text: if elem.text:
elem.text = elem.text.strip('-') elem.text = elem.text.strip('-')
data = etree.tostring(data, encoding=unicode_type) data = etree.tostring(data, encoding='unicode')
# Setting huge_tree=True causes crashes in windows with large files # Setting huge_tree=True causes crashes in windows with large files
parser = etree.XMLParser(no_network=True) parser = etree.XMLParser(no_network=True)
@ -273,7 +273,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
if not namespace(data.tag): if not namespace(data.tag):
log.warn('Forcing', filename, 'into XHTML namespace') log.warn('Forcing', filename, 'into XHTML namespace')
data.attrib['xmlns'] = XHTML_NS data.attrib['xmlns'] = XHTML_NS
data = etree.tostring(data, encoding=unicode_type) data = etree.tostring(data, encoding='unicode')
try: try:
data = etree.fromstring(data, parser=parser) data = etree.fromstring(data, parser=parser)

View File

@ -120,7 +120,7 @@ def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize
if before: if before:
ans.append(before) ans.append(before)
if for_pseudo is not None: if for_pseudo is not None:
ans.append(tostring(elem, method='text', encoding=unicode_type, with_tail=False)) ans.append(tostring(elem, method='text', encoding='unicode', with_tail=False))
else: else:
if elem.text: if elem.text:
ans.append(elem.text) ans.append(elem.text)

View File

@ -142,7 +142,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name):
text = '' text = ''
for txt in child_xpath(nl, 'text'): for txt in child_xpath(nl, 'text'):
text += etree.tostring(txt, method='text', text += etree.tostring(txt, method='text',
encoding=unicode_type, with_tail=False) encoding='unicode', with_tail=False)
content = child_xpath(navpoint, 'content') content = child_xpath(navpoint, 'content')
if content: if content:
content = content[0] content = content[0]
@ -190,7 +190,7 @@ def parse_ncx(container, ncx_name):
def add_from_li(container, li, parent, nav_name): def add_from_li(container, li, parent, nav_name):
dest = frag = text = None dest = frag = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')): for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
href = x.get('href') href = x.get('href')
if href: if href:
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
@ -225,7 +225,7 @@ def parse_nav(container, nav_name):
if ol is not None: if ol is not None:
process_nav_node(container, ol, toc_root, nav_name) process_nav_node(container, ol, toc_root, nav_name)
for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())): for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
text = etree.tostring(h, method='text', encoding=unicode_type, with_tail=False) or h.get('title') text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title')
if text: if text:
toc_root.toc_title = text toc_root.toc_title = text
break break
@ -323,7 +323,7 @@ def get_nav_landmarks(container):
for a in li.iterdescendants(XHTML('a')): for a in li.iterdescendants(XHTML('a')):
href, rtype = a.get('href'), a.get(et) href, rtype = a.get('href'), a.get(et)
if href: if href:
title = etree.tostring(a, method='text', encoding=unicode_type, with_tail=False).strip() title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip()
href, frag = href.partition('#')[::2] href, frag = href.partition('#')[::2]
name = container.href_to_name(href, nav) name = container.href_to_name(href, nav)
if container.has_name(name): if container.has_name(name):

View File

@ -428,7 +428,7 @@ class OEBReader(object):
'descendant::calibre:meta[@name = "description"]') 'descendant::calibre:meta[@name = "description"]')
if descriptionElement: if descriptionElement:
description = etree.tostring(descriptionElement[0], description = etree.tostring(descriptionElement[0],
method='text', encoding=unicode_type).strip() method='text', encoding='unicode').strip()
if not description: if not description:
description = None description = None
else: else:

View File

@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
import os, re import os, re
from calibre.utils.date import isoformat, now from calibre.utils.date import isoformat, now
from calibre import guess_type from calibre import guess_type
from polyglot.builtins import iteritems, unicode_type, filter from polyglot.builtins import iteritems, filter
filter filter
@ -206,7 +206,7 @@ class MergeMetadata(object):
for item in affected_items: for item in affected_items:
body = XPath('//h:body')(item.data) body = XPath('//h:body')(item.data)
if body: if body:
text = etree.tostring(body[0], method='text', encoding=unicode_type) text = etree.tostring(body[0], method='text', encoding='unicode')
else: else:
text = '' text = ''
text = re.sub(r'\s+', '', text) text = re.sub(r'\s+', '', text)

View File

@ -20,7 +20,7 @@ from calibre.ebooks.epub import rules
from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize) urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize)
from calibre.ebooks.oeb.polish.split import do_split from calibre.ebooks.oeb.polish.split import do_split
from polyglot.builtins import iteritems, unicode_type, range, map from polyglot.builtins import iteritems, range, map
from css_selectors import Select, SelectorError from css_selectors import Select, SelectorError
XPath = functools.partial(_XPath, namespaces=NAMESPACES) XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@ -295,7 +295,7 @@ class FlowSplitter(object):
if body is None: if body is None:
return False return False
txt = re.sub(u'\\s+|\\xa0', '', txt = re.sub(u'\\s+|\\xa0', '',
etree.tostring(body, method='text', encoding=unicode_type)) etree.tostring(body, method='text', encoding='unicode'))
if len(txt) > 1: if len(txt) > 1:
return False return False
for img in root.xpath('//h:img', namespaces=NAMESPACES): for img in root.xpath('//h:img', namespaces=NAMESPACES):

View File

@ -11,7 +11,7 @@ from itertools import count
from lxml import etree from lxml import etree
from polyglot.builtins import unicode_type, range, map from polyglot.builtins import range, map
class Font(object): class Font(object):
@ -76,10 +76,10 @@ class Text(Element):
text.tail = '' text.tail = ''
self.text_as_string = etree.tostring(text, method='text', self.text_as_string = etree.tostring(text, method='text',
encoding=unicode_type) encoding='unicode')
self.raw = text.text if text.text else u'' self.raw = text.text if text.text else u''
for x in text.iterchildren(): for x in text.iterchildren():
self.raw += etree.tostring(x, method='xml', encoding=unicode_type) self.raw += etree.tostring(x, method='xml', encoding='unicode')
self.average_character_width = self.width/len(self.text_as_string) self.average_character_width = self.width/len(self.text_as_string)
def coalesce(self, other, page_number): def coalesce(self, other, page_number):

View File

@ -135,7 +135,7 @@ class PMLMLizer(object):
text = [u''] text = [u'']
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to PML markup...' % item.href) self.log.debug('Converting %s to PML markup...' % item.href)
content = unicode_type(etree.tostring(item.data, encoding=unicode_type)) content = etree.tostring(item.data, encoding='unicode')
content = self.prepare_text(content) content = self.prepare_text(content)
content = etree.fromstring(content) content = etree.fromstring(content)
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)

View File

@ -120,7 +120,7 @@ class RTFMLizer(object):
self.log.debug('Converting %s to RTF markup...' % item.href) self.log.debug('Converting %s to RTF markup...' % item.href)
# Removing comments is needed as comments with -- inside them can # Removing comments is needed as comments with -- inside them can
# cause fromstring() to fail # cause fromstring() to fail
content = re.sub(u'<!--.*?-->', u'', etree.tostring(item.data, encoding=unicode_type), flags=re.DOTALL) content = re.sub(u'<!--.*?-->', u'', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL)
content = self.remove_newlines(content) content = self.remove_newlines(content)
content = self.remove_tabs(content) content = self.remove_tabs(content)
content = etree.fromstring(content) content = etree.fromstring(content)

View File

@ -85,7 +85,7 @@ class SNBMLizer(object):
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
output = [u''] output = [u'']
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode_type(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode_type)) content = etree.tostring(self.item.data.find(XHTML('body')), encoding='unicode')
# content = self.remove_newlines(content) # content = self.remove_newlines(content)
trees = {} trees = {}
for subitem, subtitle in self.subitems: for subitem, subtitle in self.subitems:

View File

@ -12,7 +12,7 @@ Transform OEB content into plain text
import re import re
from lxml import etree from lxml import etree
from polyglot.builtins import unicode_type, string_or_bytes from polyglot.builtins import string_or_bytes
BLOCK_TAGS = [ BLOCK_TAGS = [
@ -74,7 +74,7 @@ class TXTMLizer(object):
for x in item.data.iterdescendants(etree.Comment): for x in item.data.iterdescendants(etree.Comment):
if x.text and '--' in x.text: if x.text and '--' in x.text:
x.text = x.text.replace('--', '__') x.text = x.text.replace('--', '__')
content = unicode_type(etree.tostring(item.data, encoding=unicode_type)) content = etree.tostring(item.data, encoding='unicode')
content = self.remove_newlines(content) content = self.remove_newlines(content)
content = etree.fromstring(content) content = etree.fromstring(content)
stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)

View File

@ -360,7 +360,7 @@ class EditorWidget(QWebView, LineEditECM): # {{{
for body in root.xpath('//body'): for body in root.xpath('//body'):
if body.text: if body.text:
elems.append(body.text) elems.append(body.text)
elems += [html.tostring(x, encoding=unicode_type) for x in body if elems += [html.tostring(x, encoding='unicode') for x in body if
x.tag not in ('script', 'style')] x.tag not in ('script', 'style')]
if len(elems) > 1: if len(elems) > 1:

View File

@ -840,7 +840,7 @@ class PluginUpdaterDialog(SizePersistedDialog):
continue continue
if heading_node.text_content().lower().find('version history') != -1: if heading_node.text_content().lower().find('version history') != -1:
div_node = spoiler_node.xpath('div')[0] div_node = spoiler_node.xpath('div')[0]
text = html.tostring(div_node, method='html', encoding=unicode_type) text = html.tostring(div_node, method='html', encoding='unicode')
return re.sub(r'<div\s.*?>', '<div>', text) return re.sub(r'<div\s.*?>', '<div>', text)
except: except:
if DEBUG: if DEBUG:

View File

@ -65,7 +65,7 @@ def beautify_text(raw, syntax):
else: else:
root = parse(raw, line_numbers=False) root = parse(raw, line_numbers=False)
pretty_html_tree(None, root) pretty_html_tree(None, root)
return etree.tostring(root, encoding=unicode_type) return etree.tostring(root, encoding='unicode')
class LineNumberMap(dict): # {{{ class LineNumberMap(dict): # {{{

View File

@ -16,7 +16,7 @@ from calibre.gui2.tweak_book import tprefs, editors, current_container
from calibre.gui2.tweak_book.search import get_search_regex, InvalidRegex, initialize_search_request from calibre.gui2.tweak_book.search import get_search_regex, InvalidRegex, initialize_search_request
from calibre.gui2.tweak_book.widgets import BusyCursor from calibre.gui2.tweak_book.widgets import BusyCursor
from calibre.gui2.widgets2 import HistoryComboBox from calibre.gui2.widgets2 import HistoryComboBox
from polyglot.builtins import iteritems, unicode_type, error_message from polyglot.builtins import iteritems, error_message
# UI {{{ # UI {{{
@ -179,7 +179,7 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name
else: else:
root = current_container().parsed(fname) root = current_container().parsed(fname)
if hasattr(root, 'xpath'): if hasattr(root, 'xpath'):
raw = tostring(root, method='text', encoding=unicode_type, with_tail=True) raw = tostring(root, method='text', encoding='unicode', with_tail=True)
else: else:
raw = current_container().raw_data(fname) raw = current_container().raw_data(fname)
if pat.search(raw) is not None: if pat.search(raw) is not None:

View File

@ -10,7 +10,7 @@ from collections import defaultdict
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.utils.icu import numeric_sort_key from calibre.utils.icu import numeric_sort_key
from polyglot.builtins import iteritems, unicode_type, string_or_bytes from polyglot.builtins import iteritems, string_or_bytes
from polyglot.plistlib import loads from polyglot.plistlib import loads
application_locations = ('/Applications', '~/Applications', '~/Desktop') application_locations = ('/Applications', '~/Applications', '~/Desktop')
@ -30,8 +30,8 @@ def generate_public_uti_map():
for table in tables: for table in tables:
for tr in table.xpath('descendant::tr')[1:]: for tr in table.xpath('descendant::tr')[1:]:
td = tr.xpath('descendant::td') td = tr.xpath('descendant::td')
identifier = etree.tostring(td[0], method='text', encoding=unicode_type).strip() identifier = etree.tostring(td[0], method='text', encoding='unicode').strip()
tags = etree.tostring(td[2], method='text', encoding=unicode_type).strip() tags = etree.tostring(td[2], method='text', encoding='unicode').strip()
identifier = identifier.split()[0].replace('\u200b', '') identifier = identifier.split()[0].replace('\u200b', '')
exts = [x.strip()[1:].lower() for x in tags.split(',') if x.strip().startswith('.')] exts = [x.strip()[1:].lower() for x in tags.split(',') if x.strip().startswith('.')]
for ext in exts: for ext in exts:

View File

@ -43,7 +43,7 @@ class Article(object):
if summary and '<' in summary: if summary and '<' in summary:
try: try:
s = html.fragment_fromstring(summary, create_parent=True) s = html.fragment_fromstring(summary, create_parent=True)
summary = html.tostring(s, method='text', encoding=unicode_type) summary = html.tostring(s, method='text', encoding='unicode')
except: except:
print('Failed to process article summary, deleting:') print('Failed to process article summary, deleting:')
print(summary.encode('utf-8')) print(summary.encode('utf-8'))

View File

@ -743,7 +743,7 @@ class BasicNewsRecipe(Recipe):
heading.text = extracted_title heading.text = extracted_title
body.insert(0, heading) body.insert(0, heading)
raw_html = tostring(root, encoding=unicode_type) raw_html = tostring(root, encoding='unicode')
return raw_html return raw_html
@ -1667,7 +1667,7 @@ class BasicNewsRecipe(Recipe):
return tag return tag
if callable(getattr(tag, 'xpath', None)) and not hasattr(tag, 'contents'): # a lxml tag if callable(getattr(tag, 'xpath', None)) and not hasattr(tag, 'contents'): # a lxml tag
from lxml.etree import tostring from lxml.etree import tostring
ans = tostring(tag, method='text', encoding=unicode_type, with_tail=False) ans = tostring(tag, method='text', encoding='unicode', with_tail=False)
else: else:
strings = [] strings = []
for item in tag.contents: for item in tag.contents: