py3: More unicode porting

2025-08-11 09:13:57 -04:00 · 2019-06-14 18:39:58 +05:30 · 2019-06-14 18:39:58 +05:30 · d782fa0ae4
commit d782fa0ae4
parent 59ddfa67e5
9 changed files with 41 additions and 32 deletions
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -70,19 +71,19 @@ class RBMLizer(object):
    def mlize_spine(self):
        self.link_hrefs = {}
-        output = [u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
+        output = ['<HTML><HEAD><TITLE></TITLE></HEAD><BODY>']
        output.append(self.get_cover_page())
-        output.append(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
+        output.append('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk')
        output.append(self.get_text())
-        output.append(u'</BODY></HTML>')
+        output.append('</BODY></HTML>')
-        output = ''.join(output).replace(u'ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
+        output = ''.join(output).replace('ghji87yhjko0Caliblre-toc-placeholder-for-insertion-later8ujko0987yjk', self.get_toc())
        output = self.clean_text(output)
        return output
    def get_cover_page(self):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML
-        output = u''
+        output = ''
        if 'cover' in self.oeb_book.guide:
            if self.name_map.get(self.oeb_book.guide['cover'].href, None):
                output += '<IMG SRC="%s">' % self.name_map[self.oeb_book.guide['cover'].href]
@ -97,10 +98,10 @@ class RBMLizer(object):
        return output
    def get_toc(self):
-        toc = [u'']
+        toc = ['']
        if self.opts.inline_toc:
            self.log.debug('Generating table of contents...')
-            toc.append(u'<H1>%s</H1><UL>\n' % _('Table of Contents:'))
+            toc.append('<H1>%s</H1><UL>\n' % _('Table of Contents:'))
            for item in self.oeb_book.toc:
                if item.href in self.link_hrefs.keys():
                    toc.append('<LI><A HREF="#%s">%s</A></LI>\n' % (self.link_hrefs[item.href], item.title))
@ -113,7 +114,7 @@ class RBMLizer(object):
        from calibre.ebooks.oeb.stylizer import Stylizer
        from calibre.ebooks.oeb.base import XHTML
-        output = [u'']
+        output = ['']
        for item in self.oeb_book.spine:
            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
@ -129,7 +130,7 @@ class RBMLizer(object):
        if aid not in self.link_hrefs.keys():
            self.link_hrefs[aid] = 'calibre_link-%s' % len(self.link_hrefs.keys())
        aid = self.link_hrefs[aid]
-        return u'<A NAME="%s"></A>' % aid
+        return '<A NAME="%s"></A>' % aid
    def clean_text(self, text):
        # Remove anchors that do not have links
@ -148,16 +149,16 @@ class RBMLizer(object):
            if p is not None and isinstance(p.tag, string_or_bytes) and namespace(p.tag) == XHTML_NS \
                    and elem.tail:
                return [elem.tail]
-            return [u'']
+            return ['']
-        text = [u'']
+        text = ['']
        style = stylizer.style(elem)
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            if hasattr(elem, 'tail') and elem.tail:
                return [elem.tail]
-            return [u'']
+            return ['']
        tag = barename(elem.tag)
        tag_count = 0
--- a/src/calibre/ebooks/rb/reader.py
+++ b/src/calibre/ebooks/rb/reader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -13,7 +14,7 @@ from calibre.ebooks.rb import HEADER
 from calibre.ebooks.rb import RocketBookError
 from calibre.ebooks.metadata.rb import get_metadata
 from calibre.ebooks.metadata.opf2 import OPFCreator
-from polyglot.builtins import range
+from polyglot.builtins import range, as_unicode
 from polyglot.urllib import unquote
@ -74,7 +75,7 @@ class Reader(object):
        if toc_item.flags in (1, 2):
            return
-        output = u''
+        output = ''
        self.stream.seek(toc_item.offset)
        if toc_item.flags == 8:
@ -109,7 +110,7 @@ class Reader(object):
        images = []
        for item in self.toc:
-            iname = item.name.decode('utf-8')
+            iname = as_unicode(item.name)
            if iname.lower().endswith('html'):
                self.log.debug('HTML item %s found...' % iname)
                html.append(iname)
--- a/src/calibre/ebooks/tcr/init.py
+++ b/src/calibre/ebooks/tcr/init.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
--- a/src/calibre/ebooks/txt/init.py
+++ b/src/calibre/ebooks/txt/init.py
@ -1,4 +1,6 @@
 #!/usr/bin/env  python2
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, John Schember john@nachtimwald.com'
 __docformat__ = 'restructuredtext en'
@ -6,4 +8,3 @@ __docformat__ = 'restructuredtext en'
 '''
 Used for txt output
 '''
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '''2011, John Schember <john@nachtimwald.com>
@ -42,7 +43,7 @@ class MarkdownMLizer(OEB2HTML):
        return txt
    def mlize_spine(self, oeb_book):
-        output = [u'']
+        output = ['']
        for item in oeb_book.spine:
            self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
            self.rewrite_ids(item.data, item)
--- a/src/calibre/ebooks/txt/newlines.py
+++ b/src/calibre/ebooks/txt/newlines.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
@ -17,7 +19,7 @@ from calibre.ebooks.conversion.preprocess import DocAnalysis
 from calibre.utils.cleantext import clean_ascii_chars
 from polyglot.builtins import iteritems, unicode_type, map, range, long_type
-HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
+HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
 def clean_txt(txt):
@ -201,7 +203,7 @@ def separate_hard_scene_breaks(txt):
            return '\n%s\n' % line
        else:
            return line
-    txt = re.sub(unicode_type(r'(?miu)^[ \t-=~\/_]+$'), lambda mo: sep_break(mo.group()), txt)
+    txt = re.sub(r'(?miu)^[ \t-=~\/_]+$', lambda mo: sep_break(mo.group()), txt)
    return txt
@ -242,7 +244,7 @@ def split_string_separator(txt, size):
        size -= 2
        txt = []
        for part in (txt[i * size: (i + 1) * size] for i in range(0, len(txt), size)):
-            idx = part.rfind('.')
+            idx = part.rfind(b'.')
            if idx == -1:
                part += b'\n\n'
            else:
--- a/src/calibre/ebooks/txt/textileml.py
+++ b/src/calibre/ebooks/txt/textileml.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '2011, Leigh Parry <leighparry@blueyonder.co.uk>'
@ -34,7 +35,7 @@ class TextileMLizer(OEB2HTML):
        self.in_a_link = False
        self.our_ids = []
        self.images = {}
-        self.id_no_text = u''
+        self.id_no_text = ''
        self.style_embed = []
        self.remove_space_after_newline = False
        self.base_hrefs = [item.href for item in oeb_book.spine]
@ -56,7 +57,7 @@ class TextileMLizer(OEB2HTML):
        return txt
    def mlize_spine(self, oeb_book):
-        output = [u'']
+        output = ['']
        for item in oeb_book.spine:
            self.log.debug('Converting %s to Textile formatted TXT...' % item.href)
            self.rewrite_ids(item.data, item)
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -80,8 +80,8 @@ class TXTMLizer(object):
            stylizer = Stylizer(content, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            output += self.dump_text(content.find(XHTML('body')), stylizer, item)
            output += '\n\n\n\n\n\n'
-        output = u''.join(output)
+        output = ''.join(output)
-        output = u'\n'.join(l.rstrip() for l in output.splitlines())
+        output = '\n'.join(l.rstrip() for l in output.splitlines())
        output = self.cleanup_text(output)
        return output
@ -97,12 +97,12 @@ class TXTMLizer(object):
        return text
    def get_toc(self):
-        toc = [u'']
+        toc = ['']
        if getattr(self.opts, 'inline_toc', None):
            self.log.debug('Generating table of contents...')
-            toc.append(u'%s\n\n' % _(u'Table of Contents:'))
+            toc.append('%s\n\n' % _('Table of Contents:'))
            for item in self.toc_titles:
-                toc.append(u'* %s\n\n' % item)
+                toc.append('* %s\n\n' % item)
        return ''.join(toc)
    def create_flat_toc(self, nodes):
@ -224,11 +224,11 @@ class TXTMLizer(object):
        # Are we in a paragraph block?
        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
            if self.opts.remove_paragraph_spacing and not in_heading:
-                text.append(u'\t')
+                text.append('\t')
            in_block = True
        if tag in SPACE_TAGS:
-            text.append(u' ')
+            text.append(' ')
        # Hard scene breaks.
        if tag == 'hr':
@ -250,9 +250,9 @@ class TXTMLizer(object):
            text += self.dump_text(item, stylizer, page)
        if in_block:
-            text.append(u'\n\n')
+            text.append('\n\n')
        if in_heading:
-            text.append(u'\n')
+            text.append('\n')
            self.last_was_heading = True
        else:
            self.last_was_heading = False