mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Use unicode_literals and migrate str() in a few more files
This commit is contained in:
parent
7f7c83a709
commit
052cb43ae1
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function, unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
@ -48,7 +48,7 @@ class Article(object):
|
|||||||
print('Failed to process article summary, deleting:')
|
print('Failed to process article summary, deleting:')
|
||||||
print(summary.encode('utf-8'))
|
print(summary.encode('utf-8'))
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
summary = u''
|
summary = ''
|
||||||
self.text_summary = clean_ascii_chars(summary)
|
self.text_summary = clean_ascii_chars(summary)
|
||||||
self.author = author
|
self.author = author
|
||||||
self.content = content
|
self.content = content
|
||||||
@ -83,7 +83,7 @@ class Article(object):
|
|||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return \
|
return \
|
||||||
(u'''\
|
('''\
|
||||||
Title : %s
|
Title : %s
|
||||||
URL : %s
|
URL : %s
|
||||||
Author : %s
|
Author : %s
|
||||||
@ -93,7 +93,7 @@ TOC thumb : %s
|
|||||||
Has content : %s
|
Has content : %s
|
||||||
'''%(self.title, self.url, self.author, self.summary[:20]+'...',
|
'''%(self.title, self.url, self.author, self.summary[:20]+'...',
|
||||||
self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail,
|
self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail,
|
||||||
bool(self.content))).encode('utf-8')
|
bool(self.content)))
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr(self)
|
return repr(self)
|
||||||
@ -208,7 +208,7 @@ class Feed(object):
|
|||||||
content = [i.value for i in item.get('content', []) if i.value]
|
content = [i.value for i in item.get('content', []) if i.value]
|
||||||
content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace')
|
content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace')
|
||||||
for i in content]
|
for i in content]
|
||||||
content = u'\n'.join(content)
|
content = '\n'.join(content)
|
||||||
if not content.strip():
|
if not content.strip():
|
||||||
content = None
|
content = None
|
||||||
if not link and not content:
|
if not link and not content:
|
||||||
@ -286,8 +286,8 @@ class FeedCollection(list):
|
|||||||
|
|
||||||
def __init__(self, feeds):
|
def __init__(self, feeds):
|
||||||
list.__init__(self, [f for f in feeds if len(f.articles) > 0])
|
list.__init__(self, [f for f in feeds if len(f.articles) > 0])
|
||||||
found_articles = set([])
|
found_articles = set()
|
||||||
duplicates = set([])
|
duplicates = set()
|
||||||
|
|
||||||
def in_set(s, a):
|
def in_set(s, a):
|
||||||
for x in s:
|
for x in s:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import with_statement
|
from __future__ import with_statement, unicode_literals
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
'''
|
'''
|
||||||
@ -54,7 +54,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
#: A couple of lines that describe the content this recipe downloads.
|
#: A couple of lines that describe the content this recipe downloads.
|
||||||
#: This will be used primarily in a GUI that presents a list of recipes.
|
#: This will be used primarily in a GUI that presents a list of recipes.
|
||||||
description = u''
|
description = ''
|
||||||
|
|
||||||
#: The author of this recipe
|
#: The author of this recipe
|
||||||
__author__ = __appname__
|
__author__ = __appname__
|
||||||
@ -288,7 +288,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: The CSS that is used to style the templates, i.e., the navigation bars and
|
#: The CSS that is used to style the templates, i.e., the navigation bars and
|
||||||
#: the Tables of Contents. Rather than overriding this variable, you should
|
#: the Tables of Contents. Rather than overriding this variable, you should
|
||||||
#: use `extra_css` in your recipe to customize look and feel.
|
#: use `extra_css` in your recipe to customize look and feel.
|
||||||
template_css = u'''
|
template_css = '''
|
||||||
.article_date {
|
.article_date {
|
||||||
color: gray; font-family: monospace;
|
color: gray; font-family: monospace;
|
||||||
}
|
}
|
||||||
@ -446,7 +446,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
so, override in your subclass.
|
so, override in your subclass.
|
||||||
'''
|
'''
|
||||||
if not self.feeds:
|
if not self.feeds:
|
||||||
raise NotImplementedError
|
raise NotImplementedError()
|
||||||
if self.test:
|
if self.test:
|
||||||
return self.feeds[:self.test[0]]
|
return self.feeds[:self.test[0]]
|
||||||
return self.feeds
|
return self.feeds
|
||||||
@ -462,7 +462,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
return url + '?&pagewanted=print'
|
return url + '?&pagewanted=print'
|
||||||
|
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def image_url_processor(cls, baseurl, url):
|
def image_url_processor(cls, baseurl, url):
|
||||||
@ -665,7 +665,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
`url_or_raw`: Either a URL or the downloaded index page as a string
|
`url_or_raw`: Either a URL or the downloaded index page as a string
|
||||||
'''
|
'''
|
||||||
if re.match(r'\w+://', url_or_raw):
|
if re.match((br'\w+://' if isinstance(url_or_raw, bytes) else r'\w+://'), url_or_raw):
|
||||||
# We may be called in a thread (in the skip_ad_pages method), so
|
# We may be called in a thread (in the skip_ad_pages method), so
|
||||||
# clone the browser to be safe. We cannot use self.cloned_browser
|
# clone the browser to be safe. We cannot use self.cloned_browser
|
||||||
# as it may or may not actually clone the browser, depending on if
|
# as it may or may not actually clone the browser, depending on if
|
||||||
@ -698,9 +698,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if as_tree:
|
if as_tree:
|
||||||
from html5_parser import parse
|
from html5_parser import parse
|
||||||
return parse(_raw)
|
return parse(_raw)
|
||||||
else:
|
|
||||||
return BeautifulSoup(_raw)
|
return BeautifulSoup(_raw)
|
||||||
return parse(_raw, return_root=False)
|
|
||||||
|
|
||||||
def extract_readable_article(self, html, url):
|
def extract_readable_article(self, html, url):
|
||||||
'''
|
'''
|
||||||
@ -725,12 +723,12 @@ class BasicNewsRecipe(Recipe):
|
|||||||
root = frag
|
root = frag
|
||||||
elif frag.tag == 'body':
|
elif frag.tag == 'body':
|
||||||
root = document_fromstring(
|
root = document_fromstring(
|
||||||
u'<html><head><title>%s</title></head></html>' %
|
'<html><head><title>%s</title></head></html>' %
|
||||||
extracted_title)
|
extracted_title)
|
||||||
root.append(frag)
|
root.append(frag)
|
||||||
else:
|
else:
|
||||||
root = document_fromstring(
|
root = document_fromstring(
|
||||||
u'<html><head><title>%s</title></head><body/></html>' %
|
'<html><head><title>%s</title></head><body/></html>' %
|
||||||
extracted_title)
|
extracted_title)
|
||||||
root.xpath('//body')[0].append(frag)
|
root.xpath('//body')[0].append(frag)
|
||||||
|
|
||||||
@ -794,7 +792,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
calibre show the user a simple message instead of an error, call
|
calibre show the user a simple message instead of an error, call
|
||||||
:meth:`abort_recipe_processing`.
|
:meth:`abort_recipe_processing`.
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError()
|
||||||
|
|
||||||
def abort_recipe_processing(self, msg):
|
def abort_recipe_processing(self, msg):
|
||||||
'''
|
'''
|
||||||
@ -815,7 +813,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
This method is typically useful for sites that try to make it difficult to
|
This method is typically useful for sites that try to make it difficult to
|
||||||
access article content automatically.
|
access article content automatically.
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError()
|
||||||
|
|
||||||
def add_toc_thumbnail(self, article, src):
|
def add_toc_thumbnail(self, article, src):
|
||||||
'''
|
'''
|
||||||
@ -902,9 +900,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
self.css_map = {}
|
self.css_map = {}
|
||||||
|
|
||||||
web2disk_cmdline = ['web2disk',
|
web2disk_cmdline = ['web2disk',
|
||||||
'--timeout', str(self.timeout),
|
'--timeout', unicode_type(self.timeout),
|
||||||
'--max-recursions', str(self.recursions),
|
'--max-recursions', unicode_type(self.recursions),
|
||||||
'--delay', str(self.delay),
|
'--delay', unicode_type(self.delay),
|
||||||
]
|
]
|
||||||
|
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
@ -1068,10 +1066,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
src = force_unicode(src, 'utf-8')
|
src = force_unicode(src, 'utf-8')
|
||||||
pos = cls.summary_length
|
pos = cls.summary_length
|
||||||
fuzz = 50
|
fuzz = 50
|
||||||
si = src.find(u';', pos)
|
si = src.find(';', pos)
|
||||||
if si > 0 and si-pos > fuzz:
|
if si > 0 and si-pos > fuzz:
|
||||||
si = -1
|
si = -1
|
||||||
gi = src.find(u'>', pos)
|
gi = src.find('>', pos)
|
||||||
if gi > 0 and gi-pos > fuzz:
|
if gi > 0 and gi-pos > fuzz:
|
||||||
gi = -1
|
gi = -1
|
||||||
npos = max(si, gi)
|
npos = max(si, gi)
|
||||||
@ -1081,7 +1079,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if len(ans) < len(src):
|
if len(ans) < len(src):
|
||||||
from calibre.utils.cleantext import clean_xml_chars
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
|
||||||
ans = clean_xml_chars(ans) + u'\u2026'
|
ans = clean_xml_chars(ans) + '\u2026'
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def feed2index(self, f, feeds):
|
def feed2index(self, f, feeds):
|
||||||
@ -1590,7 +1588,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
article.sub_pages = result[1][1:]
|
article.sub_pages = result[1][1:]
|
||||||
self.jobs_done += 1
|
self.jobs_done += 1
|
||||||
self.report_progress(float(self.jobs_done)/len(self.jobs),
|
self.report_progress(float(self.jobs_done)/len(self.jobs),
|
||||||
_(u'Article downloaded: %s')%force_unicode(article.title))
|
_('Article downloaded: %s')%force_unicode(article.title))
|
||||||
if result[2]:
|
if result[2]:
|
||||||
self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
|
self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
|
||||||
|
|
||||||
@ -1684,7 +1682,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
strings.append(item['alt'])
|
strings.append(item['alt'])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
ans = u''.join(strings)
|
ans = ''.join(strings)
|
||||||
if normalize_whitespace:
|
if normalize_whitespace:
|
||||||
ans = re.sub(r'\s+', ' ', ans)
|
ans = re.sub(r'\s+', ' ', ans)
|
||||||
return ans
|
return ans
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
@ -36,13 +37,13 @@ class Template(object):
|
|||||||
if isbytestring(kwargs[key]):
|
if isbytestring(kwargs[key]):
|
||||||
kwargs[key] = kwargs[key].decode('utf-8', 'replace')
|
kwargs[key] = kwargs[key].decode('utf-8', 'replace')
|
||||||
if kwargs[key] is None:
|
if kwargs[key] is None:
|
||||||
kwargs[key] = u''
|
kwargs[key] = ''
|
||||||
args = list(args)
|
args = list(args)
|
||||||
for i in range(len(args)):
|
for i in range(len(args)):
|
||||||
if isbytestring(args[i]):
|
if isbytestring(args[i]):
|
||||||
args[i] = args[i].decode('utf-8', 'replace')
|
args[i] = args[i].decode('utf-8', 'replace')
|
||||||
if args[i] is None:
|
if args[i] is None:
|
||||||
args[i] = u''
|
args[i] = ''
|
||||||
|
|
||||||
self._generate(*args, **kwargs)
|
self._generate(*args, **kwargs)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user