diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index db9e8bc572..3a58c9470d 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python2 -from __future__ import print_function +from __future__ import print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -48,7 +48,7 @@ class Article(object): print('Failed to process article summary, deleting:') print(summary.encode('utf-8')) traceback.print_exc() - summary = u'' + summary = '' self.text_summary = clean_ascii_chars(summary) self.author = author self.content = content @@ -83,7 +83,7 @@ class Article(object): def __repr__(self): return \ -(u'''\ +('''\ Title : %s URL : %s Author : %s @@ -93,7 +93,7 @@ TOC thumb : %s Has content : %s '''%(self.title, self.url, self.author, self.summary[:20]+'...', self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail, - bool(self.content))).encode('utf-8') + bool(self.content))) def __str__(self): return repr(self) @@ -208,7 +208,7 @@ class Feed(object): content = [i.value for i in item.get('content', []) if i.value] content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace') for i in content] - content = u'\n'.join(content) + content = '\n'.join(content) if not content.strip(): content = None if not link and not content: @@ -286,8 +286,8 @@ class FeedCollection(list): def __init__(self, feeds): list.__init__(self, [f for f in feeds if len(f.articles) > 0]) - found_articles = set([]) - duplicates = set([]) + found_articles = set() + duplicates = set() def in_set(s, a): for x in s: diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 4b7e9db435..2a6f61568a 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1,4 +1,4 @@ -from __future__ import with_statement +from __future__ import with_statement, unicode_literals __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' @@ -54,7 +54,7 @@ class BasicNewsRecipe(Recipe): #: A couple of lines that describe the content this recipe downloads. #: This will be used primarily in a GUI that presents a list of recipes. - description = u'' + description = '' #: The author of this recipe __author__ = __appname__ @@ -288,7 +288,7 @@ class BasicNewsRecipe(Recipe): #: The CSS that is used to style the templates, i.e., the navigation bars and #: the Tables of Contents. Rather than overriding this variable, you should #: use `extra_css` in your recipe to customize look and feel. - template_css = u''' + template_css = ''' .article_date { color: gray; font-family: monospace; } @@ -446,7 +446,7 @@ class BasicNewsRecipe(Recipe): so, override in your subclass. ''' if not self.feeds: - raise NotImplementedError + raise NotImplementedError() if self.test: return self.feeds[:self.test[0]] return self.feeds @@ -462,7 +462,7 @@ class BasicNewsRecipe(Recipe): return url + '?&pagewanted=print' ''' - raise NotImplementedError + raise NotImplementedError() @classmethod def image_url_processor(cls, baseurl, url): @@ -665,7 +665,7 @@ class BasicNewsRecipe(Recipe): `url_or_raw`: Either a URL or the downloaded index page as a string ''' - if re.match(r'\w+://', url_or_raw): + if re.match((br'\w+://' if isinstance(url_or_raw, bytes) else r'\w+://'), url_or_raw): # We may be called in a thread (in the skip_ad_pages method), so # clone the browser to be safe. We cannot use self.cloned_browser # as it may or may not actually clone the browser, depending on if @@ -698,9 +698,7 @@ class BasicNewsRecipe(Recipe): if as_tree: from html5_parser import parse return parse(_raw) - else: - return BeautifulSoup(_raw) - return parse(_raw, return_root=False) + return BeautifulSoup(_raw) def extract_readable_article(self, html, url): ''' @@ -725,12 +723,12 @@ class BasicNewsRecipe(Recipe): root = frag elif frag.tag == 'body': root = document_fromstring( - u'%s' % + '%s' % extracted_title) root.append(frag) else: root = document_fromstring( - u'%s' % + '%s' % extracted_title) root.xpath('//body')[0].append(frag) @@ -794,7 +792,7 @@ class BasicNewsRecipe(Recipe): calibre show the user a simple message instead of an error, call :meth:`abort_recipe_processing`. ''' - raise NotImplementedError + raise NotImplementedError() def abort_recipe_processing(self, msg): ''' @@ -815,7 +813,7 @@ class BasicNewsRecipe(Recipe): This method is typically useful for sites that try to make it difficult to access article content automatically. ''' - raise NotImplementedError + raise NotImplementedError() def add_toc_thumbnail(self, article, src): ''' @@ -902,9 +900,9 @@ class BasicNewsRecipe(Recipe): self.css_map = {} web2disk_cmdline = ['web2disk', - '--timeout', str(self.timeout), - '--max-recursions', str(self.recursions), - '--delay', str(self.delay), + '--timeout', unicode_type(self.timeout), + '--max-recursions', unicode_type(self.recursions), + '--delay', unicode_type(self.delay), ] if self.verbose: @@ -1068,10 +1066,10 @@ class BasicNewsRecipe(Recipe): src = force_unicode(src, 'utf-8') pos = cls.summary_length fuzz = 50 - si = src.find(u';', pos) + si = src.find(';', pos) if si > 0 and si-pos > fuzz: si = -1 - gi = src.find(u'>', pos) + gi = src.find('>', pos) if gi > 0 and gi-pos > fuzz: gi = -1 npos = max(si, gi) @@ -1081,7 +1079,7 @@ class BasicNewsRecipe(Recipe): if len(ans) < len(src): from calibre.utils.cleantext import clean_xml_chars # Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it - ans = clean_xml_chars(ans) + u'\u2026' + ans = clean_xml_chars(ans) + '\u2026' return ans def feed2index(self, f, feeds): @@ -1590,7 +1588,7 @@ class BasicNewsRecipe(Recipe): article.sub_pages = result[1][1:] self.jobs_done += 1 self.report_progress(float(self.jobs_done)/len(self.jobs), - _(u'Article downloaded: %s')%force_unicode(article.title)) + _('Article downloaded: %s')%force_unicode(article.title)) if result[2]: self.partial_failures.append((request.feed.title, article.title, article.url, result[2])) @@ -1684,7 +1682,7 @@ class BasicNewsRecipe(Recipe): strings.append(item['alt']) except KeyError: pass - ans = u''.join(strings) + ans = ''.join(strings) if normalize_whitespace: ans = re.sub(r'\s+', ' ', ans) return ans diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py index 5b180fd1cd..e3c05239d4 100644 --- a/src/calibre/web/feeds/templates.py +++ b/src/calibre/web/feeds/templates.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2 +from __future__ import unicode_literals __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' @@ -36,13 +37,13 @@ class Template(object): if isbytestring(kwargs[key]): kwargs[key] = kwargs[key].decode('utf-8', 'replace') if kwargs[key] is None: - kwargs[key] = u'' + kwargs[key] = '' args = list(args) for i in range(len(args)): if isbytestring(args[i]): args[i] = args[i].decode('utf-8', 'replace') if args[i] is None: - args[i] = u'' + args[i] = '' self._generate(*args, **kwargs)