mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Use unicode_literals and migrate str() in a few more files
This commit is contained in:
parent
7f7c83a709
commit
052cb43ae1
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
from __future__ import print_function
|
||||
from __future__ import print_function, unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
@ -48,7 +48,7 @@ class Article(object):
|
||||
print('Failed to process article summary, deleting:')
|
||||
print(summary.encode('utf-8'))
|
||||
traceback.print_exc()
|
||||
summary = u''
|
||||
summary = ''
|
||||
self.text_summary = clean_ascii_chars(summary)
|
||||
self.author = author
|
||||
self.content = content
|
||||
@ -83,7 +83,7 @@ class Article(object):
|
||||
|
||||
def __repr__(self):
|
||||
return \
|
||||
(u'''\
|
||||
('''\
|
||||
Title : %s
|
||||
URL : %s
|
||||
Author : %s
|
||||
@ -93,7 +93,7 @@ TOC thumb : %s
|
||||
Has content : %s
|
||||
'''%(self.title, self.url, self.author, self.summary[:20]+'...',
|
||||
self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail,
|
||||
bool(self.content))).encode('utf-8')
|
||||
bool(self.content)))
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
@ -208,7 +208,7 @@ class Feed(object):
|
||||
content = [i.value for i in item.get('content', []) if i.value]
|
||||
content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace')
|
||||
for i in content]
|
||||
content = u'\n'.join(content)
|
||||
content = '\n'.join(content)
|
||||
if not content.strip():
|
||||
content = None
|
||||
if not link and not content:
|
||||
@ -286,8 +286,8 @@ class FeedCollection(list):
|
||||
|
||||
def __init__(self, feeds):
|
||||
list.__init__(self, [f for f in feeds if len(f.articles) > 0])
|
||||
found_articles = set([])
|
||||
duplicates = set([])
|
||||
found_articles = set()
|
||||
duplicates = set()
|
||||
|
||||
def in_set(s, a):
|
||||
for x in s:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from __future__ import with_statement
|
||||
from __future__ import with_statement, unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
'''
|
||||
@ -54,7 +54,7 @@ class BasicNewsRecipe(Recipe):
|
||||
|
||||
#: A couple of lines that describe the content this recipe downloads.
|
||||
#: This will be used primarily in a GUI that presents a list of recipes.
|
||||
description = u''
|
||||
description = ''
|
||||
|
||||
#: The author of this recipe
|
||||
__author__ = __appname__
|
||||
@ -288,7 +288,7 @@ class BasicNewsRecipe(Recipe):
|
||||
#: The CSS that is used to style the templates, i.e., the navigation bars and
|
||||
#: the Tables of Contents. Rather than overriding this variable, you should
|
||||
#: use `extra_css` in your recipe to customize look and feel.
|
||||
template_css = u'''
|
||||
template_css = '''
|
||||
.article_date {
|
||||
color: gray; font-family: monospace;
|
||||
}
|
||||
@ -446,7 +446,7 @@ class BasicNewsRecipe(Recipe):
|
||||
so, override in your subclass.
|
||||
'''
|
||||
if not self.feeds:
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError()
|
||||
if self.test:
|
||||
return self.feeds[:self.test[0]]
|
||||
return self.feeds
|
||||
@ -462,7 +462,7 @@ class BasicNewsRecipe(Recipe):
|
||||
return url + '?&pagewanted=print'
|
||||
|
||||
'''
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError()
|
||||
|
||||
@classmethod
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
@ -665,7 +665,7 @@ class BasicNewsRecipe(Recipe):
|
||||
|
||||
`url_or_raw`: Either a URL or the downloaded index page as a string
|
||||
'''
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
if re.match((br'\w+://' if isinstance(url_or_raw, bytes) else r'\w+://'), url_or_raw):
|
||||
# We may be called in a thread (in the skip_ad_pages method), so
|
||||
# clone the browser to be safe. We cannot use self.cloned_browser
|
||||
# as it may or may not actually clone the browser, depending on if
|
||||
@ -698,9 +698,7 @@ class BasicNewsRecipe(Recipe):
|
||||
if as_tree:
|
||||
from html5_parser import parse
|
||||
return parse(_raw)
|
||||
else:
|
||||
return BeautifulSoup(_raw)
|
||||
return parse(_raw, return_root=False)
|
||||
return BeautifulSoup(_raw)
|
||||
|
||||
def extract_readable_article(self, html, url):
|
||||
'''
|
||||
@ -725,12 +723,12 @@ class BasicNewsRecipe(Recipe):
|
||||
root = frag
|
||||
elif frag.tag == 'body':
|
||||
root = document_fromstring(
|
||||
u'<html><head><title>%s</title></head></html>' %
|
||||
'<html><head><title>%s</title></head></html>' %
|
||||
extracted_title)
|
||||
root.append(frag)
|
||||
else:
|
||||
root = document_fromstring(
|
||||
u'<html><head><title>%s</title></head><body/></html>' %
|
||||
'<html><head><title>%s</title></head><body/></html>' %
|
||||
extracted_title)
|
||||
root.xpath('//body')[0].append(frag)
|
||||
|
||||
@ -794,7 +792,7 @@ class BasicNewsRecipe(Recipe):
|
||||
calibre show the user a simple message instead of an error, call
|
||||
:meth:`abort_recipe_processing`.
|
||||
'''
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError()
|
||||
|
||||
def abort_recipe_processing(self, msg):
|
||||
'''
|
||||
@ -815,7 +813,7 @@ class BasicNewsRecipe(Recipe):
|
||||
This method is typically useful for sites that try to make it difficult to
|
||||
access article content automatically.
|
||||
'''
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError()
|
||||
|
||||
def add_toc_thumbnail(self, article, src):
|
||||
'''
|
||||
@ -902,9 +900,9 @@ class BasicNewsRecipe(Recipe):
|
||||
self.css_map = {}
|
||||
|
||||
web2disk_cmdline = ['web2disk',
|
||||
'--timeout', str(self.timeout),
|
||||
'--max-recursions', str(self.recursions),
|
||||
'--delay', str(self.delay),
|
||||
'--timeout', unicode_type(self.timeout),
|
||||
'--max-recursions', unicode_type(self.recursions),
|
||||
'--delay', unicode_type(self.delay),
|
||||
]
|
||||
|
||||
if self.verbose:
|
||||
@ -1068,10 +1066,10 @@ class BasicNewsRecipe(Recipe):
|
||||
src = force_unicode(src, 'utf-8')
|
||||
pos = cls.summary_length
|
||||
fuzz = 50
|
||||
si = src.find(u';', pos)
|
||||
si = src.find(';', pos)
|
||||
if si > 0 and si-pos > fuzz:
|
||||
si = -1
|
||||
gi = src.find(u'>', pos)
|
||||
gi = src.find('>', pos)
|
||||
if gi > 0 and gi-pos > fuzz:
|
||||
gi = -1
|
||||
npos = max(si, gi)
|
||||
@ -1081,7 +1079,7 @@ class BasicNewsRecipe(Recipe):
|
||||
if len(ans) < len(src):
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||
ans = clean_xml_chars(ans) + u'\u2026'
|
||||
ans = clean_xml_chars(ans) + '\u2026'
|
||||
return ans
|
||||
|
||||
def feed2index(self, f, feeds):
|
||||
@ -1590,7 +1588,7 @@ class BasicNewsRecipe(Recipe):
|
||||
article.sub_pages = result[1][1:]
|
||||
self.jobs_done += 1
|
||||
self.report_progress(float(self.jobs_done)/len(self.jobs),
|
||||
_(u'Article downloaded: %s')%force_unicode(article.title))
|
||||
_('Article downloaded: %s')%force_unicode(article.title))
|
||||
if result[2]:
|
||||
self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
|
||||
|
||||
@ -1684,7 +1682,7 @@ class BasicNewsRecipe(Recipe):
|
||||
strings.append(item['alt'])
|
||||
except KeyError:
|
||||
pass
|
||||
ans = u''.join(strings)
|
||||
ans = ''.join(strings)
|
||||
if normalize_whitespace:
|
||||
ans = re.sub(r'\s+', ' ', ans)
|
||||
return ans
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python2
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
@ -36,13 +37,13 @@ class Template(object):
|
||||
if isbytestring(kwargs[key]):
|
||||
kwargs[key] = kwargs[key].decode('utf-8', 'replace')
|
||||
if kwargs[key] is None:
|
||||
kwargs[key] = u''
|
||||
kwargs[key] = ''
|
||||
args = list(args)
|
||||
for i in range(len(args)):
|
||||
if isbytestring(args[i]):
|
||||
args[i] = args[i].decode('utf-8', 'replace')
|
||||
if args[i] is None:
|
||||
args[i] = u''
|
||||
args[i] = ''
|
||||
|
||||
self._generate(*args, **kwargs)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user