diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py
index db9e8bc572..3a58c9470d 100644
--- a/src/calibre/web/feeds/__init__.py
+++ b/src/calibre/web/feeds/__init__.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python2
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
'''
@@ -48,7 +48,7 @@ class Article(object):
print('Failed to process article summary, deleting:')
print(summary.encode('utf-8'))
traceback.print_exc()
- summary = u''
+ summary = ''
self.text_summary = clean_ascii_chars(summary)
self.author = author
self.content = content
@@ -83,7 +83,7 @@ class Article(object):
def __repr__(self):
return \
-(u'''\
+('''\
Title : %s
URL : %s
Author : %s
@@ -93,7 +93,7 @@ TOC thumb : %s
Has content : %s
'''%(self.title, self.url, self.author, self.summary[:20]+'...',
self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail,
- bool(self.content))).encode('utf-8')
+ bool(self.content)))
def __str__(self):
return repr(self)
@@ -208,7 +208,7 @@ class Feed(object):
content = [i.value for i in item.get('content', []) if i.value]
content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace')
for i in content]
- content = u'\n'.join(content)
+ content = '\n'.join(content)
if not content.strip():
content = None
if not link and not content:
@@ -286,8 +286,8 @@ class FeedCollection(list):
def __init__(self, feeds):
list.__init__(self, [f for f in feeds if len(f.articles) > 0])
- found_articles = set([])
- duplicates = set([])
+ found_articles = set()
+ duplicates = set()
def in_set(s, a):
for x in s:
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 4b7e9db435..2a6f61568a 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -1,4 +1,4 @@
-from __future__ import with_statement
+from __future__ import with_statement, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
'''
@@ -54,7 +54,7 @@ class BasicNewsRecipe(Recipe):
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
- description = u''
+ description = ''
#: The author of this recipe
__author__ = __appname__
@@ -288,7 +288,7 @@ class BasicNewsRecipe(Recipe):
#: The CSS that is used to style the templates, i.e., the navigation bars and
#: the Tables of Contents. Rather than overriding this variable, you should
#: use `extra_css` in your recipe to customize look and feel.
- template_css = u'''
+ template_css = '''
.article_date {
color: gray; font-family: monospace;
}
@@ -446,7 +446,7 @@ class BasicNewsRecipe(Recipe):
so, override in your subclass.
'''
if not self.feeds:
- raise NotImplementedError
+ raise NotImplementedError()
if self.test:
return self.feeds[:self.test[0]]
return self.feeds
@@ -462,7 +462,7 @@ class BasicNewsRecipe(Recipe):
return url + '?&pagewanted=print'
'''
- raise NotImplementedError
+ raise NotImplementedError()
@classmethod
def image_url_processor(cls, baseurl, url):
@@ -665,7 +665,7 @@ class BasicNewsRecipe(Recipe):
`url_or_raw`: Either a URL or the downloaded index page as a string
'''
- if re.match(r'\w+://', url_or_raw):
+ if re.match((br'\w+://' if isinstance(url_or_raw, bytes) else r'\w+://'), url_or_raw):
# We may be called in a thread (in the skip_ad_pages method), so
# clone the browser to be safe. We cannot use self.cloned_browser
# as it may or may not actually clone the browser, depending on if
@@ -698,9 +698,7 @@ class BasicNewsRecipe(Recipe):
if as_tree:
from html5_parser import parse
return parse(_raw)
- else:
- return BeautifulSoup(_raw)
- return parse(_raw, return_root=False)
+ return BeautifulSoup(_raw)
def extract_readable_article(self, html, url):
'''
@@ -725,12 +723,12 @@ class BasicNewsRecipe(Recipe):
root = frag
elif frag.tag == 'body':
root = document_fromstring(
- u'%s' %
+ '%s' %
extracted_title)
root.append(frag)
else:
root = document_fromstring(
- u'%s' %
+ '%s' %
extracted_title)
root.xpath('//body')[0].append(frag)
@@ -794,7 +792,7 @@ class BasicNewsRecipe(Recipe):
calibre show the user a simple message instead of an error, call
:meth:`abort_recipe_processing`.
'''
- raise NotImplementedError
+ raise NotImplementedError()
def abort_recipe_processing(self, msg):
'''
@@ -815,7 +813,7 @@ class BasicNewsRecipe(Recipe):
This method is typically useful for sites that try to make it difficult to
access article content automatically.
'''
- raise NotImplementedError
+ raise NotImplementedError()
def add_toc_thumbnail(self, article, src):
'''
@@ -902,9 +900,9 @@ class BasicNewsRecipe(Recipe):
self.css_map = {}
web2disk_cmdline = ['web2disk',
- '--timeout', str(self.timeout),
- '--max-recursions', str(self.recursions),
- '--delay', str(self.delay),
+ '--timeout', unicode_type(self.timeout),
+ '--max-recursions', unicode_type(self.recursions),
+ '--delay', unicode_type(self.delay),
]
if self.verbose:
@@ -1068,10 +1066,10 @@ class BasicNewsRecipe(Recipe):
src = force_unicode(src, 'utf-8')
pos = cls.summary_length
fuzz = 50
- si = src.find(u';', pos)
+ si = src.find(';', pos)
if si > 0 and si-pos > fuzz:
si = -1
- gi = src.find(u'>', pos)
+ gi = src.find('>', pos)
if gi > 0 and gi-pos > fuzz:
gi = -1
npos = max(si, gi)
@@ -1081,7 +1079,7 @@ class BasicNewsRecipe(Recipe):
if len(ans) < len(src):
from calibre.utils.cleantext import clean_xml_chars
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
- ans = clean_xml_chars(ans) + u'\u2026'
+ ans = clean_xml_chars(ans) + '\u2026'
return ans
def feed2index(self, f, feeds):
@@ -1590,7 +1588,7 @@ class BasicNewsRecipe(Recipe):
article.sub_pages = result[1][1:]
self.jobs_done += 1
self.report_progress(float(self.jobs_done)/len(self.jobs),
- _(u'Article downloaded: %s')%force_unicode(article.title))
+ _('Article downloaded: %s')%force_unicode(article.title))
if result[2]:
self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
@@ -1684,7 +1682,7 @@ class BasicNewsRecipe(Recipe):
strings.append(item['alt'])
except KeyError:
pass
- ans = u''.join(strings)
+ ans = ''.join(strings)
if normalize_whitespace:
ans = re.sub(r'\s+', ' ', ans)
return ans
diff --git a/src/calibre/web/feeds/templates.py b/src/calibre/web/feeds/templates.py
index 5b180fd1cd..e3c05239d4 100644
--- a/src/calibre/web/feeds/templates.py
+++ b/src/calibre/web/feeds/templates.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python2
+from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal '
@@ -36,13 +37,13 @@ class Template(object):
if isbytestring(kwargs[key]):
kwargs[key] = kwargs[key].decode('utf-8', 'replace')
if kwargs[key] is None:
- kwargs[key] = u''
+ kwargs[key] = ''
args = list(args)
for i in range(len(args)):
if isbytestring(args[i]):
args[i] = args[i].decode('utf-8', 'replace')
if args[i] is None:
- args[i] = u''
+ args[i] = ''
self._generate(*args, **kwargs)