py3: Use unicode_literals and migrate str() in a few more files

2025-07-09 03:04:10 -04:00 · 2019-05-19 13:20:26 +05:30 · 2019-05-19 13:20:26 +05:30 · 052cb43ae1
commit 052cb43ae1
parent 7f7c83a709
3 changed files with 29 additions and 30 deletions
--- a/src/calibre/web/feeds/init.py
+++ b/src/calibre/web/feeds/init.py
@ -1,6 +1,6 @@
 #!/usr/bin/env  python2

-from __future__ import print_function
+from __future__ import print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
@ -48,7 +48,7 @@ class Article(object):
                print('Failed to process article summary, deleting:')
                print(summary.encode('utf-8'))
                traceback.print_exc()
-                summary = u''
+                summary = ''
        self.text_summary = clean_ascii_chars(summary)
        self.author = author
        self.content = content
@ -83,7 +83,7 @@ class Article(object):

    def __repr__(self):
        return \
-(u'''\
+('''\
 Title       : %s
 URL         : %s
 Author      : %s
@ -93,7 +93,7 @@ TOC thumb   : %s
 Has content : %s
 '''%(self.title, self.url, self.author, self.summary[:20]+'...',
     self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail,
-     bool(self.content))).encode('utf-8')
+     bool(self.content)))

    def __str__(self):
        return repr(self)
@ -208,7 +208,7 @@ class Feed(object):
        content = [i.value for i in item.get('content', []) if i.value]
        content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace')
                for i in content]
-        content = u'\n'.join(content)
+        content = '\n'.join(content)
        if not content.strip():
            content = None
        if not link and not content:
@ -286,8 +286,8 @@ class FeedCollection(list):

    def __init__(self, feeds):
        list.__init__(self, [f for f in feeds if len(f.articles) > 0])
-        found_articles = set([])
-        duplicates = set([])
+        found_articles = set()
+        duplicates = set()

        def in_set(s, a):
            for x in s:
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -1,4 +1,4 @@
-from __future__ import with_statement
+from __future__ import with_statement, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
@ -54,7 +54,7 @@ class BasicNewsRecipe(Recipe):

    #: A couple of lines that describe the content this recipe downloads.
    #: This will be used primarily in a GUI that presents a list of recipes.
-    description = u''
+    description = ''

    #: The author of this recipe
    __author__             = __appname__
@ -288,7 +288,7 @@ class BasicNewsRecipe(Recipe):
    #: The CSS that is used to style the templates, i.e., the navigation bars and
    #: the Tables of Contents. Rather than overriding this variable, you should
    #: use `extra_css` in your recipe to customize look and feel.
-    template_css = u'''
+    template_css = '''
            .article_date {
                color: gray; font-family: monospace;
            }
@ -446,7 +446,7 @@ class BasicNewsRecipe(Recipe):
        so, override in your subclass.
        '''
        if not self.feeds:
-            raise NotImplementedError
+            raise NotImplementedError()
        if self.test:
            return self.feeds[:self.test[0]]
        return self.feeds
@ -462,7 +462,7 @@ class BasicNewsRecipe(Recipe):
                return url + '?&pagewanted=print'

        '''
-        raise NotImplementedError
+        raise NotImplementedError()

    @classmethod
    def image_url_processor(cls, baseurl, url):
@ -665,7 +665,7 @@ class BasicNewsRecipe(Recipe):

        `url_or_raw`: Either a URL or the downloaded index page as a string
        '''
-        if re.match(r'\w+://', url_or_raw):
+        if re.match((br'\w+://' if isinstance(url_or_raw, bytes) else r'\w+://'), url_or_raw):
            # We may be called in a thread (in the skip_ad_pages method), so
            # clone the browser to be safe. We cannot use self.cloned_browser
            # as it may or may not actually clone the browser, depending on if
@ -698,9 +698,7 @@ class BasicNewsRecipe(Recipe):
        if as_tree:
            from html5_parser import parse
            return parse(_raw)
-        else:
-            return BeautifulSoup(_raw)
-            return parse(_raw, return_root=False)
+        return BeautifulSoup(_raw)

    def extract_readable_article(self, html, url):
        '''
@ -725,12 +723,12 @@ class BasicNewsRecipe(Recipe):
            root = frag
        elif frag.tag == 'body':
            root = document_fromstring(
-                u'<html><head><title>%s</title></head></html>' %
+                '<html><head><title>%s</title></head></html>' %
                extracted_title)
            root.append(frag)
        else:
            root = document_fromstring(
-                u'<html><head><title>%s</title></head><body/></html>' %
+                '<html><head><title>%s</title></head><body/></html>' %
                extracted_title)
            root.xpath('//body')[0].append(frag)

@ -794,7 +792,7 @@ class BasicNewsRecipe(Recipe):
        calibre show the user a simple message instead of an error, call
        :meth:`abort_recipe_processing`.
        '''
-        raise NotImplementedError
+        raise NotImplementedError()

    def abort_recipe_processing(self, msg):
        '''
@ -815,7 +813,7 @@ class BasicNewsRecipe(Recipe):
        This method is typically useful for sites that try to make it difficult to
        access article content automatically.
        '''
-        raise NotImplementedError
+        raise NotImplementedError()

    def add_toc_thumbnail(self, article, src):
        '''
@ -902,9 +900,9 @@ class BasicNewsRecipe(Recipe):
        self.css_map = {}

        web2disk_cmdline = ['web2disk',
-            '--timeout', str(self.timeout),
-            '--max-recursions', str(self.recursions),
-            '--delay', str(self.delay),
+            '--timeout', unicode_type(self.timeout),
+            '--max-recursions', unicode_type(self.recursions),
+            '--delay', unicode_type(self.delay),
            ]

        if self.verbose:
@ -1068,10 +1066,10 @@ class BasicNewsRecipe(Recipe):
        src = force_unicode(src, 'utf-8')
        pos = cls.summary_length
        fuzz = 50
-        si = src.find(u';', pos)
+        si = src.find(';', pos)
        if si > 0 and si-pos > fuzz:
            si = -1
-        gi = src.find(u'>', pos)
+        gi = src.find('>', pos)
        if gi > 0 and gi-pos > fuzz:
            gi = -1
        npos = max(si, gi)
@ -1081,7 +1079,7 @@ class BasicNewsRecipe(Recipe):
        if len(ans) < len(src):
            from calibre.utils.cleantext import clean_xml_chars
            # Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
-            ans = clean_xml_chars(ans) + u'\u2026'
+            ans = clean_xml_chars(ans) + '\u2026'
        return ans

    def feed2index(self, f, feeds):
@ -1590,7 +1588,7 @@ class BasicNewsRecipe(Recipe):
        article.sub_pages  = result[1][1:]
        self.jobs_done += 1
        self.report_progress(float(self.jobs_done)/len(self.jobs),
-            _(u'Article downloaded: %s')%force_unicode(article.title))
+            _('Article downloaded: %s')%force_unicode(article.title))
        if result[2]:
            self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))

@ -1684,7 +1682,7 @@ class BasicNewsRecipe(Recipe):
                            strings.append(item['alt'])
                        except KeyError:
                            pass
-            ans = u''.join(strings)
+            ans = ''.join(strings)
        if normalize_whitespace:
            ans = re.sub(r'\s+', ' ', ans)
        return ans
--- a/src/calibre/web/feeds/templates.py
+++ b/src/calibre/web/feeds/templates.py
@ -1,4 +1,5 @@
 #!/usr/bin/env  python2
+from __future__ import unicode_literals

 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -36,13 +37,13 @@ class Template(object):
            if isbytestring(kwargs[key]):
                kwargs[key] = kwargs[key].decode('utf-8', 'replace')
            if kwargs[key] is None:
-                kwargs[key] = u''
+                kwargs[key] = ''
        args = list(args)
        for i in range(len(args)):
            if isbytestring(args[i]):
                args[i] = args[i].decode('utf-8', 'replace')
            if args[i] is None:
-                args[i] = u''
+                args[i] = ''

        self._generate(*args, **kwargs)