diff --git a/src/calibre/ebooks/conversion/__init__.py b/src/calibre/ebooks/conversion/__init__.py index 6f7f017f6b..be49b37591 100644 --- a/src/calibre/ebooks/conversion/__init__.py +++ b/src/calibre/ebooks/conversion/__init__.py @@ -5,6 +5,8 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +from polyglot.builtins import native_string_type + class ConversionUserFeedBack(Exception): @@ -25,4 +27,4 @@ class ConversionUserFeedBack(Exception): # Ensure exception uses fully qualified name as this is used to detect it in # the GUI. -ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack') +ConversionUserFeedBack.__name__ = native_string_type('calibre.ebooks.conversion.ConversionUserFeedBack') diff --git a/src/calibre/ebooks/conversion/plugins/azw4_input.py b/src/calibre/ebooks/conversion/plugins/azw4_input.py index 7bceded58f..d3a7e6a5f9 100644 --- a/src/calibre/ebooks/conversion/plugins/azw4_input.py +++ b/src/calibre/ebooks/conversion/plugins/azw4_input.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2011, John Schember ' diff --git a/src/calibre/ebooks/conversion/plugins/chm_input.py b/src/calibre/ebooks/conversion/plugins/chm_input.py index 01b81bbbec..b87c347ca0 100644 --- a/src/calibre/ebooks/conversion/plugins/chm_input.py +++ b/src/calibre/ebooks/conversion/plugins/chm_input.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + ''' CHM File decoding support ''' __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ,' \ @@ -64,7 +66,7 @@ class CHMInput(InputFormatPlugin): metadata = Metadata(os.path.basename(chm_name)) encoding = self._chm_reader.get_encoding() or options.input_encoding or 'cp1252' self._chm_reader.CloseCHM() - # print tdir, mainpath + # print((tdir, mainpath)) # from calibre import ipython # ipython() @@ -117,10 +119,10 @@ class CHMInput(InputFormatPlugin): strip_encoding_pats=True, resolve_entities=True)[0] hhcroot = html.fromstring(hhcdata) toc = self._process_nodes(hhcroot) - # print "=============================" - # print "Printing hhcroot" - # print etree.tostring(hhcroot, pretty_print=True) - # print "=============================" + # print("=============================") + # print("Printing hhcroot") + # print(etree.tostring(hhcroot, pretty_print=True)) + # print("=============================") log.debug('Found %d section nodes' % toc.count()) htmlpath = os.path.splitext(hhcpath)[0] + ".html" base = os.path.dirname(os.path.abspath(htmlpath)) @@ -183,7 +185,7 @@ class CHMInput(InputFormatPlugin): p = node.xpath('ancestor::ul[1]/ancestor::li[1]/object[1]') parent = p[0] if p else None toc = ancestor_map.get(parent, toc) - title = href = u'' + title = href = '' for param in node.xpath('./param'): if match_string(param.attrib['name'], 'name'): title = param.attrib['value'] diff --git a/src/calibre/ebooks/conversion/plugins/epub_input.py b/src/calibre/ebooks/conversion/plugins/epub_input.py index 5aa689ce97..983d7b0d10 100644 --- a/src/calibre/ebooks/conversion/plugins/epub_input.py +++ b/src/calibre/ebooks/conversion/plugins/epub_input.py @@ -1,4 +1,5 @@ -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' @@ -7,7 +8,7 @@ import os, re, posixpath from itertools import cycle from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation -from polyglot.builtins import unicode_type, as_bytes, getcwd +from polyglot.builtins import as_bytes, getcwd ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC' IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding' @@ -44,7 +45,7 @@ class EPUBInput(InputFormatPlugin): import uuid, hashlib idpf_key = opf.raw_unique_identifier if idpf_key: - idpf_key = re.sub(u'[\u0020\u0009\u000d\u000a]', u'', idpf_key) + idpf_key = re.sub('[\u0020\u0009\u000d\u000a]', '', idpf_key) idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest() key = None for item in opf.identifier_iter(): @@ -269,7 +270,7 @@ class EPUBInput(InputFormatPlugin): encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) opf = self.find_opf() if opf is None: - for f in walk(u'.'): + for f in walk('.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) @@ -369,7 +370,7 @@ class EPUBInput(InputFormatPlugin): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring( - x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join( + x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join( x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: diff --git a/src/calibre/ebooks/conversion/plugins/fb2_input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py index d802fe2b2a..9ab3147e52 100644 --- a/src/calibre/ebooks/conversion/plugins/fb2_input.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py @@ -8,7 +8,7 @@ import os, re from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre import guess_type -from polyglot.builtins import iteritems, unicode_type, getcwd +from polyglot.builtins import iteritems, getcwd FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0' FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1' @@ -71,7 +71,7 @@ class FB2Input(InputFormatPlugin): stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: - css += etree.tostring(s, encoding=unicode_type, method='text', + css += etree.tostring(s, encoding='unicode', method='text', with_tail=False) + '\n\n' if css: import css_parser, logging diff --git a/src/calibre/ebooks/conversion/plugins/fb2_output.py b/src/calibre/ebooks/conversion/plugins/fb2_output.py index 8ffe4ec78d..0313086bb4 100644 --- a/src/calibre/ebooks/conversion/plugins/fb2_output.py +++ b/src/calibre/ebooks/conversion/plugins/fb2_output.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' @@ -67,80 +68,80 @@ class FB2Output(OutputFormatPlugin): # Children's 'child_tale', # Fairy Tales 'child_verse', # Verses - 'child_prose', # Prose - 'child_sf', # Science Fiction - 'child_det', # Detectives & Thrillers - 'child_adv', # Adventures - 'child_education', # Educational - 'children', # Other - # Poetry & Dramaturgy - 'poetry', # Poetry - 'dramaturgy', # Dramaturgy - # Antique literature - 'antique_ant', # Antique - 'antique_european', # European - 'antique_russian', # Old russian - 'antique_east', # Old east - 'antique_myths', # Myths. Legends. Epos - 'antique', # Other - # Scientific#educational - 'sci_history', # History - 'sci_psychology', # Psychology - 'sci_culture', # Cultural science - 'sci_religion', # Religious studies - 'sci_philosophy', # Philosophy - 'sci_politics', # Politics - 'sci_business', # Business literature - 'sci_juris', # Jurisprudence - 'sci_linguistic', # Linguistics - 'sci_medicine', # Medicine - 'sci_phys', # Physics - 'sci_math', # Mathematics - 'sci_chem', # Chemistry - 'sci_biology', # Biology - 'sci_tech', # Technical - 'science', # Other - # Computers & Internet - 'comp_www', # Internet - 'comp_programming', # Programming - 'comp_hard', # Hardware - 'comp_soft', # Software - 'comp_db', # Databases - 'comp_osnet', # OS & Networking - 'computers', # Other - # Reference - 'ref_encyc', # Encyclopedias - 'ref_dict', # Dictionaries - 'ref_ref', # Reference - 'ref_guide', # Guidebooks - 'reference', # Other - # Nonfiction - 'nonf_biography', # Biography & Memoirs - 'nonf_publicism', # Publicism - 'nonf_criticism', # Criticism - 'design', # Art & design - 'nonfiction', # Other - # Religion & Inspiration - 'religion_rel', # Religion - 'religion_esoterics', # Esoterics - 'religion_self', # Self#improvement - 'religion', # Other - # Humor - 'humor_anecdote', # Anecdote (funny stories) - 'humor_prose', # Prose - 'humor_verse', # Verses - 'humor', # Other - # Home & Family - 'home_cooking', # Cooking - 'home_pets', # Pets - 'home_crafts', # Hobbies & Crafts - 'home_entertain', # Entertaining - 'home_health', # Health - 'home_garden', # Garden - 'home_diy', # Do it yourself - 'home_sport', # Sports - 'home_sex', # Erotica & sex - 'home', # Other + 'child_prose', # Prose + 'child_sf', # Science Fiction + 'child_det', # Detectives & Thrillers + 'child_adv', # Adventures + 'child_education', # Educational + 'children', # Other + # Poetry & Dramaturgy + 'poetry', # Poetry + 'dramaturgy', # Dramaturgy + # Antique literature + 'antique_ant', # Antique + 'antique_european', # European + 'antique_russian', # Old russian + 'antique_east', # Old east + 'antique_myths', # Myths. Legends. Epos + 'antique', # Other + # Scientific#educational + 'sci_history', # History + 'sci_psychology', # Psychology + 'sci_culture', # Cultural science + 'sci_religion', # Religious studies + 'sci_philosophy', # Philosophy + 'sci_politics', # Politics + 'sci_business', # Business literature + 'sci_juris', # Jurisprudence + 'sci_linguistic', # Linguistics + 'sci_medicine', # Medicine + 'sci_phys', # Physics + 'sci_math', # Mathematics + 'sci_chem', # Chemistry + 'sci_biology', # Biology + 'sci_tech', # Technical + 'science', # Other + # Computers & Internet + 'comp_www', # Internet + 'comp_programming', # Programming + 'comp_hard', # Hardware + 'comp_soft', # Software + 'comp_db', # Databases + 'comp_osnet', # OS & Networking + 'computers', # Other + # Reference + 'ref_encyc', # Encyclopedias + 'ref_dict', # Dictionaries + 'ref_ref', # Reference + 'ref_guide', # Guidebooks + 'reference', # Other + # Nonfiction + 'nonf_biography', # Biography & Memoirs + 'nonf_publicism', # Publicism + 'nonf_criticism', # Criticism + 'design', # Art & design + 'nonfiction', # Other + # Religion & Inspiration + 'religion_rel', # Religion + 'religion_esoterics', # Esoterics + 'religion_self', # Self#improvement + 'religion', # Other + # Humor + 'humor_anecdote', # Anecdote (funny stories) + 'humor_prose', # Prose + 'humor_verse', # Verses + 'humor', # Other + # Home & Family + 'home_cooking', # Cooking + 'home_pets', # Pets + 'home_crafts', # Hobbies & Crafts + 'home_entertain', # Entertaining + 'home_health', # Health + 'home_garden', # Garden + 'home_diy', # Do it yourself + 'home_sport', # Sports + 'home_sex', # Erotica & sex + 'home', # Other ] ui_data = { 'sectionize': { diff --git a/src/calibre/ebooks/conversion/plugins/htmlz_output.py b/src/calibre/ebooks/conversion/plugins/htmlz_output.py index 3a77c4332c..c72fd09adf 100644 --- a/src/calibre/ebooks/conversion/plugins/htmlz_output.py +++ b/src/calibre/ebooks/conversion/plugins/htmlz_output.py @@ -100,7 +100,7 @@ class HTMLZOutput(OutputFormatPlugin): for item in oeb_book.manifest: if item.media_type in OEB_IMAGES and item.href in images: if item.media_type == SVG_MIME: - data = unicode_type(etree.tostring(item.data, encoding=unicode_type)) + data = etree.tostring(item.data, encoding='unicode') else: data = item.data fname = os.path.join(tdir, u'images', images[item.href]) diff --git a/src/calibre/ebooks/conversion/plugins/lit_input.py b/src/calibre/ebooks/conversion/plugins/lit_input.py index 78d858e49c..aa8f0c2925 100644 --- a/src/calibre/ebooks/conversion/plugins/lit_input.py +++ b/src/calibre/ebooks/conversion/plugins/lit_input.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' diff --git a/src/calibre/ebooks/conversion/plugins/lit_output.py b/src/calibre/ebooks/conversion/plugins/lit_output.py index f6f7a8cba3..a0d9a7f1a9 100644 --- a/src/calibre/ebooks/conversion/plugins/lit_output.py +++ b/src/calibre/ebooks/conversion/plugins/lit_output.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' diff --git a/src/calibre/ebooks/conversion/plugins/lrf_input.py b/src/calibre/ebooks/conversion/plugins/lrf_input.py index 926b6a2c29..e69654540d 100644 --- a/src/calibre/ebooks/conversion/plugins/lrf_input.py +++ b/src/calibre/ebooks/conversion/plugins/lrf_input.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' @@ -85,4 +85,4 @@ class LRFInput(InputFormatPlugin): with open('content.opf', 'wb') as f: f.write(result) styles.write() - return os.path.abspath(u'content.opf') + return os.path.abspath('content.opf') diff --git a/src/calibre/ebooks/conversion/plugins/lrf_output.py b/src/calibre/ebooks/conversion/plugins/lrf_output.py index edae1f5688..88b6a6885d 100644 --- a/src/calibre/ebooks/conversion/plugins/lrf_output.py +++ b/src/calibre/ebooks/conversion/plugins/lrf_output.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' @@ -188,7 +188,7 @@ class LRFOutput(OutputFormatPlugin): self.flatten_toc() from calibre.ptempfile import TemporaryDirectory - with TemporaryDirectory(u'_lrf_output') as tdir: + with TemporaryDirectory('_lrf_output') as tdir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) diff --git a/src/calibre/ebooks/conversion/plugins/mobi_input.py b/src/calibre/ebooks/conversion/plugins/mobi_input.py index 8c42e8345d..90a2dedd89 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_input.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py @@ -1,4 +1,5 @@ -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' @@ -29,13 +30,13 @@ class MOBIInput(InputFormatPlugin): mr = MobiReader(stream, log, options.input_encoding, options.debug_pipeline) if mr.kf8_type is None: - mr.extract_content(u'.', parse_cache) + mr.extract_content('.', parse_cache) except: mr = MobiReader(stream, log, options.input_encoding, options.debug_pipeline, try_extra_data_fix=True) if mr.kf8_type is None: - mr.extract_content(u'.', parse_cache) + mr.extract_content('.', parse_cache) if mr.kf8_type is not None: log('Found KF8 MOBI of type %r'%mr.kf8_type) @@ -52,7 +53,8 @@ class MOBIInput(InputFormatPlugin): if raw: if isinstance(raw, unicode_type): raw = raw.encode('utf-8') - open(u'debug-raw.html', 'wb').write(raw) + with open('debug-raw.html', 'wb') as f: + f.write(raw) from calibre.ebooks.oeb.base import close_self_closing_tags for f, root in parse_cache.items(): raw = html.tostring(root, encoding='utf-8', method='xml', diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index ee939f26be..28cff521bc 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' diff --git a/src/calibre/ebooks/conversion/plugins/odt_input.py b/src/calibre/ebooks/conversion/plugins/odt_input.py index fa71161213..f2f045b977 100644 --- a/src/calibre/ebooks/conversion/plugins/odt_input.py +++ b/src/calibre/ebooks/conversion/plugins/odt_input.py @@ -1,4 +1,5 @@ -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' __docformat__ = 'restructuredtext en' diff --git a/src/calibre/ebooks/conversion/plugins/oeb_output.py b/src/calibre/ebooks/conversion/plugins/oeb_output.py index c5146e0001..417701433d 100644 --- a/src/calibre/ebooks/conversion/plugins/oeb_output.py +++ b/src/calibre/ebooks/conversion/plugins/oeb_output.py @@ -1,4 +1,5 @@ -from __future__ import with_statement +from __future__ import absolute_import, division, print_function, unicode_literals + __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' diff --git a/src/calibre/ebooks/conversion/plugins/pdb_input.py b/src/calibre/ebooks/conversion/plugins/pdb_input.py index 970b605a14..a55a4ffec3 100644 --- a/src/calibre/ebooks/conversion/plugins/pdb_input.py +++ b/src/calibre/ebooks/conversion/plugins/pdb_input.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' diff --git a/src/calibre/ebooks/conversion/plugins/pdb_output.py b/src/calibre/ebooks/conversion/plugins/pdb_output.py index 0fc5662ccd..de450c175d 100644 --- a/src/calibre/ebooks/conversion/plugins/pdb_output.py +++ b/src/calibre/ebooks/conversion/plugins/pdb_output.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' diff --git a/src/calibre/ebooks/conversion/plugins/pdf_input.py b/src/calibre/ebooks/conversion/plugins/pdf_input.py index 4bbebb01bf..242d64b980 100644 --- a/src/calibre/ebooks/conversion/plugins/pdf_input.py +++ b/src/calibre/ebooks/conversion/plugins/pdf_input.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' @@ -38,7 +39,7 @@ class PDFInput(InputFormatPlugin): with open(u'index.xml', 'rb') as f: xml = clean_ascii_chars(f.read()) PDFDocument(xml, self.opts, self.log) - return os.path.join(getcwd(), u'metadata.opf') + return os.path.join(getcwd(), 'metadata.opf') def convert(self, stream, options, file_ext, log, accelerators): @@ -57,7 +58,7 @@ class PDFInput(InputFormatPlugin): mi = get_metadata(stream, 'pdf') opf = OPFCreator(getcwd(), mi) - manifest = [(u'index.html', None)] + manifest = [('index.html', None)] images = os.listdir(getcwd()) images.remove('index.html') @@ -66,16 +67,16 @@ class PDFInput(InputFormatPlugin): log.debug('Generating manifest...') opf.create_manifest(manifest) - opf.create_spine([u'index.html']) + opf.create_spine(['index.html']) log.debug('Rendering manifest...') - with open(u'metadata.opf', 'wb') as opffile: + with open('metadata.opf', 'wb') as opffile: opf.render(opffile) - if os.path.exists(u'toc.ncx'): + if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: - with open(u'metadata.opf', 'r+b') as f: + with open('metadata.opf', 'r+b') as f: raw = f.read().replace(b'', stop) html = preprocessor.fix_nbsp_indents(html) @@ -152,20 +152,20 @@ class DocAnalysis(object): maxLineLength=1900 # Discard larger than this to stay in range buckets=20 # Each line is divided into a bucket based on length - # print "there are "+str(len(lines))+" lines" + # print("there are "+unicode_type(len(lines))+" lines") # max = 0 # for line in self.lines: # l = len(line) # if l > max: # max = l - # print "max line found is "+str(max) + # print("max line found is "+unicode_type(max)) # Build the line length histogram hRaw = [0 for i in range(0,buckets)] for line in self.lines: l = len(line) if l > minLineLength and l < maxLineLength: - l = int(l/100) - # print "adding "+str(l) + l = int(l // 100) + # print("adding "+unicode_type(l)) hRaw[l]+=1 # Normalize the histogram into percents @@ -174,8 +174,8 @@ class DocAnalysis(object): h = [float(count)/totalLines for count in hRaw] else: h = [] - # print "\nhRaw histogram lengths are: "+str(hRaw) - # print " percents are: "+str(h)+"\n" + # print("\nhRaw histogram lengths are: "+unicode_type(hRaw)) + # print(" percents are: "+unicode_type(h)+"\n") # Find the biggest bucket maxValue = 0 @@ -184,10 +184,10 @@ class DocAnalysis(object): maxValue = h[i] if maxValue < percent: - # print "Line lengths are too variable. Not unwrapping." + # print("Line lengths are too variable. Not unwrapping.") return False else: - # print str(maxValue)+" of the lines were in one bucket" + # print(unicode_type(maxValue)+" of the lines were in one bucket") return True @@ -232,7 +232,7 @@ class Dehyphenator(object): if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None: lookupword = self.removeprefix.sub('', lookupword) if self.verbose > 2: - self.log("lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)) + self.log("lookup word is: "+lookupword+", orig is: " + hyphenated) try: searchresult = self.html.find(lookupword.lower()) except: @@ -240,33 +240,33 @@ class Dehyphenator(object): if self.format == 'html_cleanup' or self.format == 'txt_cleanup': if self.html.find(lookupword) != -1 or searchresult != -1: if self.verbose > 2: - self.log(" Cleanup:returned dehyphenated word: " + str(dehyphenated)) + self.log(" Cleanup:returned dehyphenated word: " + dehyphenated) return dehyphenated elif self.html.find(hyphenated) != -1: if self.verbose > 2: - self.log(" Cleanup:returned hyphenated word: " + str(hyphenated)) + self.log(" Cleanup:returned hyphenated word: " + hyphenated) return hyphenated else: if self.verbose > 2: - self.log(" Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)) + self.log(" Cleanup:returning original text "+firsthalf+" + linefeed "+secondhalf) return firsthalf+'\u2014'+wraptags+secondhalf else: if self.format == 'individual_words' and len(firsthalf) + len(secondhalf) <= 6: if self.verbose > 2: - self.log("too short, returned hyphenated word: " + str(hyphenated)) + self.log("too short, returned hyphenated word: " + hyphenated) return hyphenated if len(firsthalf) <= 2 and len(secondhalf) <= 2: if self.verbose > 2: - self.log("too short, returned hyphenated word: " + str(hyphenated)) + self.log("too short, returned hyphenated word: " + hyphenated) return hyphenated if self.html.find(lookupword) != -1 or searchresult != -1: if self.verbose > 2: - self.log(" returned dehyphenated word: " + str(dehyphenated)) + self.log(" returned dehyphenated word: " + dehyphenated) return dehyphenated else: if self.verbose > 2: - self.log(" returned hyphenated word: " + str(hyphenated)) + self.log(" returned hyphenated word: " + hyphenated) return hyphenated def __call__(self, html, format, length=1): @@ -595,7 +595,7 @@ class HTMLPreProcessor(object): docanalysis = DocAnalysis('pdf', html) length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor')) if length: - # print "The pdf line length returned is " + str(length) + # print("The pdf line length returned is " + unicode_type(length)) # unwrap em/en dashes end_rules.append((re.compile( r'(?<=.{%i}[–—])\s*

\s*(?=[\[a-z\d])' % length), lambda match: '')) diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index b3c12cb43f..8738ba5375 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -19,7 +19,6 @@ from calibre.utils.localization import canonicalize_lang from calibre.utils.logging import default_log from calibre.utils.zipfile import ZipFile from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER -from polyglot.builtins import unicode_type def fromstring(raw, parser=RECOVER_PARSER): @@ -56,7 +55,7 @@ def read_doc_props(raw, mi, XPath): desc = XPath('//dc:description')(root) if desc: - raw = etree.tostring(desc[0], method='text', encoding=unicode_type) + raw = etree.tostring(desc[0], method='text', encoding='unicode') raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary mi.comments = raw.strip() diff --git a/src/calibre/ebooks/docx/index.py b/src/calibre/ebooks/docx/index.py index f2c8b5243c..974d25d9a1 100644 --- a/src/calibre/ebooks/docx/index.py +++ b/src/calibre/ebooks/docx/index.py @@ -10,7 +10,7 @@ from operator import itemgetter from lxml import etree from calibre.utils.icu import partition_by_first_letter, sort_key -from polyglot.builtins import iteritems, unicode_type, filter +from polyglot.builtins import iteritems, filter def get_applicable_xe_fields(index, xe_fields, XPath, expand): @@ -246,7 +246,7 @@ def polish_index_markup(index, blocks): a = block.xpath('descendant::a[1]') text = '' if a: - text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode_type).strip() + text = etree.tostring(a[0], method='text', with_tail=False, encoding='unicode').strip() if ':' in text: path_map[block] = parts = list(filter(None, (x.strip() for x in text.split(':')))) if len(parts) > 1: diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py index 50893e739c..a287514e2c 100644 --- a/src/calibre/ebooks/docx/toc.py +++ b/src/calibre/ebooks/docx/toc.py @@ -12,7 +12,7 @@ from lxml.etree import tostring from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.oeb.polish.toc import elem_to_toc_text -from polyglot.builtins import iteritems, unicode_type, range +from polyglot.builtins import iteritems, range def from_headings(body, log, namespace): @@ -93,7 +93,7 @@ def link_to_txt(a, styles, object_map): if rs.css.get('display', None) == 'none': a.remove(child) - return tostring(a, method='text', with_tail=False, encoding=unicode_type).strip() + return tostring(a, method='text', with_tail=False, encoding='unicode').strip() def from_toc(docx, link_map, styles, object_map, log, namespace): diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py index 0931491958..4cea15d599 100644 --- a/src/calibre/ebooks/docx/writer/styles.py +++ b/src/calibre/ebooks/docx/writer/styles.py @@ -14,7 +14,7 @@ from lxml import etree from calibre.ebooks import parse_css_length from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero from calibre.utils.localization import lang_as_iso639_1 -from polyglot.builtins import iteritems, unicode_type, filter +from polyglot.builtins import iteritems, filter from tinycss.css21 import CSS21Parser css_parser = CSS21Parser() @@ -46,7 +46,7 @@ def bmap(x): def is_dropcaps(html_tag, tag_style): - return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode_type, with_tail=False)) < 5 and tag_style['float'] == 'left' + return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left' class CombinedStyle(object): diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 5dede1505e..9936ffad7e 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -65,7 +65,7 @@ class FB2MLizer(object): output = self.clean_text(u''.join(output)) if self.opts.pretty_print: - return u'\n%s' % etree.tostring(etree.fromstring(output), encoding=unicode_type, pretty_print=True) + return u'\n%s' % etree.tostring(etree.fromstring(output), encoding='unicode', pretty_print=True) else: return u'' + output diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py index 04f67c97dd..bbd1aec7cc 100644 --- a/src/calibre/ebooks/metadata/fb2.py +++ b/src/calibre/ebooks/metadata/fb2.py @@ -27,7 +27,7 @@ NAMESPACES = { 'xlink' : 'http://www.w3.org/1999/xlink' } -tostring = partial(etree.tostring, method='text', encoding=unicode_type) +tostring = partial(etree.tostring, method='text', encoding='unicode') def XLINK(tag): @@ -448,7 +448,7 @@ def ensure_namespace(doc): break if bare_tags: import re - raw = etree.tostring(doc, encoding=unicode_type) + raw = etree.tostring(doc, encoding='unicode') raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw) doc = etree.fromstring(raw) return doc diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index 273a700eb9..c57fe7c758 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -893,7 +893,7 @@ class OPF(object): # {{{ ans = None for match in self.pubdate_path(self.metadata): try: - val = parse_date(etree.tostring(match, encoding=unicode_type, + val = parse_date(etree.tostring(match, encoding='unicode', method='text', with_tail=False).strip()) except: continue @@ -906,7 +906,7 @@ class OPF(object): # {{{ least_val = least_elem = None for match in self.pubdate_path(self.metadata): try: - cval = parse_date(etree.tostring(match, encoding=unicode_type, + cval = parse_date(etree.tostring(match, encoding='unicode', method='text', with_tail=False).strip()) except: match.getparent().remove(match) @@ -964,7 +964,7 @@ class OPF(object): # {{{ for attr, val in iteritems(x.attrib): if attr.endswith('scheme'): typ = icu_lower(val) - val = etree.tostring(x, with_tail=False, encoding=unicode_type, + val = etree.tostring(x, with_tail=False, encoding='unicode', method='text').strip() if val and typ not in ('calibre', 'uuid'): if typ == 'isbn' and val.lower().startswith('urn:isbn:'): @@ -973,7 +973,7 @@ class OPF(object): # {{{ found_scheme = True break if not found_scheme: - val = etree.tostring(x, with_tail=False, encoding=unicode_type, + val = etree.tostring(x, with_tail=False, encoding='unicode', method='text').strip() if val.lower().startswith('urn:isbn:'): val = check_isbn(val.split(':')[-1]) diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index 2287d12aab..52bfea8377 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -210,7 +210,7 @@ class TOC(list): text = u'' for txt in txt_path(nl): text += etree.tostring(txt, method='text', - encoding=unicode_type, with_tail=False) + encoding='unicode', with_tail=False) content = content_path(np) if content and text: content = content[0] diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 5acfd36b53..4253c060f8 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -235,7 +235,7 @@ class KF8Writer(object): root = self.data(item) for svg in XPath('//svg:svg')(root): - raw = etree.tostring(svg, encoding=unicode_type, with_tail=False) + raw = etree.tostring(svg, encoding='unicode', with_tail=False) idx = len(self.flows) self.flows.append(raw) p = svg.getparent() diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 25bb2ad62b..e6c609d7dc 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -400,7 +400,7 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True): def xml2text(elem, pretty_print=False): - return etree.tostring(elem, method='text', encoding=unicode_type, with_tail=False, pretty_print=pretty_print) + return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print) def escape_cdata(root): diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index ef1d485cbf..1f9e7c841b 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -112,7 +112,7 @@ def _html4_parse(data): for elem in data.iter(tag=etree.Comment): if elem.text: elem.text = elem.text.strip('-') - data = etree.tostring(data, encoding=unicode_type) + data = etree.tostring(data, encoding='unicode') # Setting huge_tree=True causes crashes in windows with large files parser = etree.XMLParser(no_network=True) @@ -273,7 +273,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, if not namespace(data.tag): log.warn('Forcing', filename, 'into XHTML namespace') data.attrib['xmlns'] = XHTML_NS - data = etree.tostring(data, encoding=unicode_type) + data = etree.tostring(data, encoding='unicode') try: data = etree.fromstring(data, parser=parser) diff --git a/src/calibre/ebooks/oeb/polish/stats.py b/src/calibre/ebooks/oeb/polish/stats.py index d9f46bd4af..c7a5d5c779 100644 --- a/src/calibre/ebooks/oeb/polish/stats.py +++ b/src/calibre/ebooks/oeb/polish/stats.py @@ -120,7 +120,7 @@ def get_element_text(elem, resolve_property, resolve_pseudo_property, capitalize if before: ans.append(before) if for_pseudo is not None: - ans.append(tostring(elem, method='text', encoding=unicode_type, with_tail=False)) + ans.append(tostring(elem, method='text', encoding='unicode', with_tail=False)) else: if elem.text: ans.append(elem.text) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 0d693a478a..c752a0735e 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -142,7 +142,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name): text = '' for txt in child_xpath(nl, 'text'): text += etree.tostring(txt, method='text', - encoding=unicode_type, with_tail=False) + encoding='unicode', with_tail=False) content = child_xpath(navpoint, 'content') if content: content = content[0] @@ -190,7 +190,7 @@ def parse_ncx(container, ncx_name): def add_from_li(container, li, parent, nav_name): dest = frag = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): - text = etree.tostring(x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() + text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) @@ -225,7 +225,7 @@ def parse_nav(container, nav_name): if ol is not None: process_nav_node(container, ol, toc_root, nav_name) for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())): - text = etree.tostring(h, method='text', encoding=unicode_type, with_tail=False) or h.get('title') + text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title') if text: toc_root.toc_title = text break @@ -323,7 +323,7 @@ def get_nav_landmarks(container): for a in li.iterdescendants(XHTML('a')): href, rtype = a.get('href'), a.get(et) if href: - title = etree.tostring(a, method='text', encoding=unicode_type, with_tail=False).strip() + title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip() href, frag = href.partition('#')[::2] name = container.href_to_name(href, nav) if container.has_name(name): diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index e32fa4a349..467b0113c1 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -428,7 +428,7 @@ class OEBReader(object): 'descendant::calibre:meta[@name = "description"]') if descriptionElement: description = etree.tostring(descriptionElement[0], - method='text', encoding=unicode_type).strip() + method='text', encoding='unicode').strip() if not description: description = None else: diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index 90e1ca2e84..7f5342d6f4 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en' import os, re from calibre.utils.date import isoformat, now from calibre import guess_type -from polyglot.builtins import iteritems, unicode_type, filter +from polyglot.builtins import iteritems, filter filter @@ -206,7 +206,7 @@ class MergeMetadata(object): for item in affected_items: body = XPath('//h:body')(item.data) if body: - text = etree.tostring(body[0], method='text', encoding=unicode_type) + text = etree.tostring(body[0], method='text', encoding='unicode') else: text = '' text = re.sub(r'\s+', '', text) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index 4de590465d..d0e1a334ec 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -20,7 +20,7 @@ from calibre.ebooks.epub import rules from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, urldefrag, rewrite_links, urlunquote, XHTML, urlnormalize) from calibre.ebooks.oeb.polish.split import do_split -from polyglot.builtins import iteritems, unicode_type, range, map +from polyglot.builtins import iteritems, range, map from css_selectors import Select, SelectorError XPath = functools.partial(_XPath, namespaces=NAMESPACES) @@ -295,7 +295,7 @@ class FlowSplitter(object): if body is None: return False txt = re.sub(u'\\s+|\\xa0', '', - etree.tostring(body, method='text', encoding=unicode_type)) + etree.tostring(body, method='text', encoding='unicode')) if len(txt) > 1: return False for img in root.xpath('//h:img', namespaces=NAMESPACES): diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py index 0367a2fd4e..54c44ebe3c 100644 --- a/src/calibre/ebooks/pdf/reflow.py +++ b/src/calibre/ebooks/pdf/reflow.py @@ -11,7 +11,7 @@ from itertools import count from lxml import etree -from polyglot.builtins import unicode_type, range, map +from polyglot.builtins import range, map class Font(object): @@ -76,10 +76,10 @@ class Text(Element): text.tail = '' self.text_as_string = etree.tostring(text, method='text', - encoding=unicode_type) + encoding='unicode') self.raw = text.text if text.text else u'' for x in text.iterchildren(): - self.raw += etree.tostring(x, method='xml', encoding=unicode_type) + self.raw += etree.tostring(x, method='xml', encoding='unicode') self.average_character_width = self.width/len(self.text_as_string) def coalesce(self, other, page_number): diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 4a0df0d7b4..772117321c 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -135,7 +135,7 @@ class PMLMLizer(object): text = [u''] for item in self.oeb_book.spine: self.log.debug('Converting %s to PML markup...' % item.href) - content = unicode_type(etree.tostring(item.data, encoding=unicode_type)) + content = etree.tostring(item.data, encoding='unicode') content = self.prepare_text(content) content = etree.fromstring(content) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py index 2e84cfbafc..d4b339c53c 100644 --- a/src/calibre/ebooks/rtf/rtfml.py +++ b/src/calibre/ebooks/rtf/rtfml.py @@ -120,7 +120,7 @@ class RTFMLizer(object): self.log.debug('Converting %s to RTF markup...' % item.href) # Removing comments is needed as comments with -- inside them can # cause fromstring() to fail - content = re.sub(u'', u'', etree.tostring(item.data, encoding=unicode_type), flags=re.DOTALL) + content = re.sub(u'', u'', etree.tostring(item.data, encoding='unicode'), flags=re.DOTALL) content = self.remove_newlines(content) content = self.remove_tabs(content) content = etree.fromstring(content) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py index d04c590ab9..659959e6f0 100644 --- a/src/calibre/ebooks/snb/snbml.py +++ b/src/calibre/ebooks/snb/snbml.py @@ -85,7 +85,7 @@ class SNBMLizer(object): from calibre.ebooks.oeb.stylizer import Stylizer output = [u''] stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) - content = unicode_type(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode_type)) + content = etree.tostring(self.item.data.find(XHTML('body')), encoding='unicode') # content = self.remove_newlines(content) trees = {} for subitem, subtitle in self.subitems: diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index 220cb56e6d..d4c000f164 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -12,7 +12,7 @@ Transform OEB content into plain text import re from lxml import etree -from polyglot.builtins import unicode_type, string_or_bytes +from polyglot.builtins import string_or_bytes BLOCK_TAGS = [ @@ -74,7 +74,7 @@ class TXTMLizer(object): for x in item.data.iterdescendants(etree.Comment): if x.text and '--' in x.text: x.text = x.text.replace('--', '__') - content = unicode_type(etree.tostring(item.data, encoding=unicode_type)) + content = etree.tostring(item.data, encoding='unicode') content = self.remove_newlines(content) content = etree.fromstring(content) stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile) diff --git a/src/calibre/gui2/comments_editor.py b/src/calibre/gui2/comments_editor.py index da9d790a43..0fffa212f0 100644 --- a/src/calibre/gui2/comments_editor.py +++ b/src/calibre/gui2/comments_editor.py @@ -360,7 +360,7 @@ class EditorWidget(QWebView, LineEditECM): # {{{ for body in root.xpath('//body'): if body.text: elems.append(body.text) - elems += [html.tostring(x, encoding=unicode_type) for x in body if + elems += [html.tostring(x, encoding='unicode') for x in body if x.tag not in ('script', 'style')] if len(elems) > 1: diff --git a/src/calibre/gui2/dialogs/plugin_updater.py b/src/calibre/gui2/dialogs/plugin_updater.py index e3fa0b6d1e..275e77dadf 100644 --- a/src/calibre/gui2/dialogs/plugin_updater.py +++ b/src/calibre/gui2/dialogs/plugin_updater.py @@ -840,7 +840,7 @@ class PluginUpdaterDialog(SizePersistedDialog): continue if heading_node.text_content().lower().find('version history') != -1: div_node = spoiler_node.xpath('div')[0] - text = html.tostring(div_node, method='html', encoding=unicode_type) + text = html.tostring(div_node, method='html', encoding='unicode') return re.sub(r'', '

', text) except: if DEBUG: diff --git a/src/calibre/gui2/tweak_book/diff/view.py b/src/calibre/gui2/tweak_book/diff/view.py index 6286091bd7..8c9e9c5f5b 100644 --- a/src/calibre/gui2/tweak_book/diff/view.py +++ b/src/calibre/gui2/tweak_book/diff/view.py @@ -65,7 +65,7 @@ def beautify_text(raw, syntax): else: root = parse(raw, line_numbers=False) pretty_html_tree(None, root) - return etree.tostring(root, encoding=unicode_type) + return etree.tostring(root, encoding='unicode') class LineNumberMap(dict): # {{{ diff --git a/src/calibre/gui2/tweak_book/text_search.py b/src/calibre/gui2/tweak_book/text_search.py index 03ee4fc215..0f00763580 100644 --- a/src/calibre/gui2/tweak_book/text_search.py +++ b/src/calibre/gui2/tweak_book/text_search.py @@ -16,7 +16,7 @@ from calibre.gui2.tweak_book import tprefs, editors, current_container from calibre.gui2.tweak_book.search import get_search_regex, InvalidRegex, initialize_search_request from calibre.gui2.tweak_book.widgets import BusyCursor from calibre.gui2.widgets2 import HistoryComboBox -from polyglot.builtins import iteritems, unicode_type, error_message +from polyglot.builtins import iteritems, error_message # UI {{{ @@ -179,7 +179,7 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name else: root = current_container().parsed(fname) if hasattr(root, 'xpath'): - raw = tostring(root, method='text', encoding=unicode_type, with_tail=True) + raw = tostring(root, method='text', encoding='unicode', with_tail=True) else: raw = current_container().raw_data(fname) if pat.search(raw) is not None: diff --git a/src/calibre/utils/open_with/osx.py b/src/calibre/utils/open_with/osx.py index 83dd0de249..8cce4e8116 100644 --- a/src/calibre/utils/open_with/osx.py +++ b/src/calibre/utils/open_with/osx.py @@ -10,7 +10,7 @@ from collections import defaultdict from calibre.ptempfile import TemporaryDirectory from calibre.utils.icu import numeric_sort_key -from polyglot.builtins import iteritems, unicode_type, string_or_bytes +from polyglot.builtins import iteritems, string_or_bytes from polyglot.plistlib import loads application_locations = ('/Applications', '~/Applications', '~/Desktop') @@ -30,8 +30,8 @@ def generate_public_uti_map(): for table in tables: for tr in table.xpath('descendant::tr')[1:]: td = tr.xpath('descendant::td') - identifier = etree.tostring(td[0], method='text', encoding=unicode_type).strip() - tags = etree.tostring(td[2], method='text', encoding=unicode_type).strip() + identifier = etree.tostring(td[0], method='text', encoding='unicode').strip() + tags = etree.tostring(td[2], method='text', encoding='unicode').strip() identifier = identifier.split()[0].replace('\u200b', '') exts = [x.strip()[1:].lower() for x in tags.split(',') if x.strip().startswith('.')] for ext in exts: diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index 825cd5466f..46a1f964d0 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -43,7 +43,7 @@ class Article(object): if summary and '<' in summary: try: s = html.fragment_fromstring(summary, create_parent=True) - summary = html.tostring(s, method='text', encoding=unicode_type) + summary = html.tostring(s, method='text', encoding='unicode') except: print('Failed to process article summary, deleting:') print(summary.encode('utf-8')) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 2a6f61568a..c2220e1af0 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -743,7 +743,7 @@ class BasicNewsRecipe(Recipe): heading.text = extracted_title body.insert(0, heading) - raw_html = tostring(root, encoding=unicode_type) + raw_html = tostring(root, encoding='unicode') return raw_html @@ -1667,7 +1667,7 @@ class BasicNewsRecipe(Recipe): return tag if callable(getattr(tag, 'xpath', None)) and not hasattr(tag, 'contents'): # a lxml tag from lxml.etree import tostring - ans = tostring(tag, method='text', encoding=unicode_type, with_tail=False) + ans = tostring(tag, method='text', encoding='unicode', with_tail=False) else: strings = [] for item in tag.contents: