Also check python files using python 3 flake8

This commit is contained in:
Kovid Goyal 2019-04-13 07:49:43 +05:30
parent 623f6623b5
commit 13961abd16
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
154 changed files with 409 additions and 308 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2014, spswerling'
'''
@ -162,7 +162,7 @@ class AlMonitor(BasicNewsRecipe):
def scrape_article_date(self, soup):
for span in soup.findAll('span'):
txt = self.text(span)
rgx = re.compile(unicode(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*'))
rgx = re.compile(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*')
hit = rgx.match(txt)
if hit:
return self.date_from_string(txt)

View File

@ -126,7 +126,7 @@ class Ambito_Financiero(BasicNewsRecipe):
cfind = smallsoup.find('div', id="contenido_data")
if cfind:
p.append(cfind)
return unicode(soup)
return type(u'')(soup)
return raw_html
def cleanup(self):

View File

@ -34,7 +34,7 @@ class AmericanThinker(BasicNewsRecipe):
namespaceHTMLElements=False)
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
x.getparent().remove(x)
return etree.tostring(root, encoding=unicode)
return etree.tostring(root, encoding='unicode')
feeds = [(u'http://feeds.feedburner.com/americanthinker'),
(u'http://feeds.feedburner.com/AmericanThinkerBlog')

View File

@ -161,7 +161,7 @@ class AppleDaily(BasicNewsRecipe):
article_titles.append(force_unicode(a.title, 'utf-8'))
mi.comments = self.description
if not isinstance(mi.comments, unicode):
if not isinstance(mi.comments, type(u'')):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
'\n\n'.join(article_titles))
@ -272,7 +272,7 @@ class AppleDaily(BasicNewsRecipe):
elem = BeautifulSoup(translatedTempl).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')

View File

@ -104,7 +104,7 @@ class AppledailyTW(BasicNewsRecipe):
]
def preprocess_raw_html(self, raw_html, url):
raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
raw_html = re.sub((r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
raw_html = re.sub(
unicode(r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html)
(r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html)
return raw_html

View File

@ -85,7 +85,7 @@ class BerlinPolicyJournal(BasicNewsRecipe):
div.find('h3', {'class': 'entry-title'}).a)
article_url = div.find(
'h3', {'class': 'entry-title'}).a['href']
article_date = unicode(time.strftime(
article_date = type(u'')(time.strftime(
' [%a, %d %b %H:%M]', timestamp))
article_desc = self.tag_to_string(
div.find('div', {'class': 'i-summary'}).p)

View File

@ -47,7 +47,7 @@ class BigOven(BasicNewsRecipe):
preprocess_regexps = [
(re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''),
(re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
(re.compile(r'\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
]
def preprocess_html(self, soup):

View File

@ -60,7 +60,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
'http://images.icnetwork.co.uk/upl/birm')})
cov = str(cov)
cov2 = re.findall(
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
cov = str(cov2)
cov = cov[2:len(cov) - 2]

View File

@ -13,6 +13,7 @@ class AdvancedUserRecipe1331729727(BasicNewsRecipe):
feeds = [(u'Camera di Commercio di Bari',
u'http://feed43.com/4715147488845101.xml')]
__license__ = 'GPL v3'
__copyright__ = '2012, faber1971'
__version__ = 'v1.00'

View File

@ -22,7 +22,7 @@ class cdrinfo(BasicNewsRecipe):
remove_empty_feeds = True
remove_javascript = True
remove_attributes = ['style', 'onmouseover']
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: ''),
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\\.gravatar\\.com</a>\\.</p>', re.DOTALL), lambda match: ''),
(re.compile(u'<p[^>]*?>.{,2}</p>', re.DOTALL), lambda match: '')]
ignore_duplicate_articles = {'title', 'url'}

View File

@ -16,11 +16,11 @@ class CNetJapan(BasicNewsRecipe):
remove_javascript = True
preprocess_regexps = [
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
(re.compile(type(u'')(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
lambda match: '</body>'),
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'),
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
(re.compile(type(u'')(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
lambda match: '<!-- removed -->'),
]

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -14,11 +15,11 @@ class CNetJapanDigital(BasicNewsRecipe):
remove_javascript = True
preprocess_regexps = [
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
(re.compile((r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
lambda match: '</body>'),
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'),
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
(re.compile((r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
lambda match: '<!-- removed -->'),
]

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import re
from calibre.web.feeds.news import BasicNewsRecipe
@ -14,11 +15,11 @@ class CNetJapanRelease(BasicNewsRecipe):
remove_javascript = True
preprocess_regexps = [
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
(re.compile((r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
lambda match: '</body>'),
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'),
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
(re.compile((r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
lambda match: '<!-- removed -->'),
]

View File

@ -82,7 +82,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
cov = soup.find(attrs={'id': 'large'})
cov = str(cov)
cov2 = re.findall(
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
cov2 = str(cov2)
cov2 = cov2[2:len(cov2) - 2]
# cov2 now is pic url, now go back to original function

View File

@ -16,7 +16,7 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe):
articles = []
feeds = []
soup = self.index_to_soup("http://www.democracyjournal.org")
for x in soup.findAll(href=re.compile("http://www\.democracyjournal\.org/\d*/.*php$")):
for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")):
url = x.get('href')
title = self.tag_to_string(x)
articles.append({'title': title, 'url': url,

View File

@ -69,6 +69,6 @@ class AdvancedUserRecipe1297291961(BasicNewsRecipe):
]
def print_version(self, url):
p = re.compile('(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
p = re.compile(r'(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
m = p.search(url)
return url.replace(m.group(), '&template=printart')

View File

@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
max_articles_per_feed = 100
remove_attrs = ['style', 'width', 'height']
preprocess_regexps = [(re.compile(
unicode(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')]
type(u'')(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')]
keep_only_tags = [dict(name='h1'), dict(
attrs={'class': ['entry single']}), dict(id='phContent_divArticle')]
remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa

View File

@ -92,7 +92,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
if raw:
return _raw
if not isinstance(_raw, unicode) and self.encoding:
if not isinstance(_raw, type(u'')) and self.encoding:
if callable(self.encoding):
_raw = self.encoding(_raw)
else:
@ -101,7 +101,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
from calibre.utils.cleantext import clean_xml_chars
if isinstance(_raw, unicode):
if isinstance(_raw, type(u'')):
_raw = strip_encoding_declarations(_raw)
else:
_raw = xml_to_unicode(

View File

@ -26,7 +26,7 @@ class EcoGeek(BasicNewsRecipe):
for i, article in enumerate(soup.findAll('div', attrs={'class': 'article'})):
fname = os.path.join(tdir, '%d.html' % i)
with open(fname, 'wb') as f:
f.write(unicode(article).encode('utf-8'))
f.write(type(u'')(article).encode('utf-8'))
articles.append({
'title': self.tag_to_string(article.find('h2')),
'url': 'file://' + fname.replace(os.sep, '/'),

View File

@ -154,7 +154,7 @@ class Economist(BasicNewsRecipe):
p.remove(noscript[0])
for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
x.getparent().remove(x)
raw = etree.tostring(root, encoding=unicode)
raw = etree.tostring(root, encoding='unicode')
return raw
def populate_article_metadata(self, article, soup, first):
@ -165,7 +165,7 @@ class Economist(BasicNewsRecipe):
if el is not None and el.contents:
for descendant in el.contents:
if isinstance(descendant, NavigableString):
result.append(unicode(descendant))
result.append(type(u'')(descendant))
article.summary = u'. '.join(result) + u'.'
article.text_summary = clean_ascii_chars(article.summary)

View File

@ -154,7 +154,7 @@ class Economist(BasicNewsRecipe):
p.remove(noscript[0])
for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
x.getparent().remove(x)
raw = etree.tostring(root, encoding=unicode)
raw = etree.tostring(root, encoding='unicode')
return raw
def populate_article_metadata(self, article, soup, first):
@ -165,7 +165,7 @@ class Economist(BasicNewsRecipe):
if el is not None and el.contents:
for descendant in el.contents:
if isinstance(descendant, NavigableString):
result.append(unicode(descendant))
result.append(type(u'')(descendant))
article.summary = u'. '.join(result) + u'.'
article.text_summary = clean_ascii_chars(article.summary)

View File

@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe):
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
(re.compile(u'(?:<sup>)?\\(kliknij\\,\\ aby powiększyć\\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
(re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
(re.compile(type(u'')(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
]
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]

View File

@ -33,7 +33,7 @@ class FirstThings(BasicNewsRecipe):
'''
def preprocess_raw_html(self, raw, url):
return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding=unicode)
return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding='unicode')
def parse_index(self):
soup = self.index_to_soup(self.INDEX)

View File

@ -32,7 +32,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
# rules for wyborcza.biz
preprocess_regexps.append((re.compile(
u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\.?<br>', re.DOTALL), lambda m: ''))
u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\\.?<br>', re.DOTALL), lambda m: ''))
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'),
(u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),

View File

@ -11,7 +11,7 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
__author__ = 'Anonymous'
language = 'en_GB'
remove_tags = [
dict(name='div', attrs={'class': 'articles_footer', 'class': 'printoptions'})]
dict(name='div', attrs={'class': ['articles_footer', 'printoptions']})]
def print_version(self, url):
return url + '/print/1'

View File

@ -49,9 +49,9 @@ def solve_captcha(captcha):
# Parse into parts
pattern = re.compile(
u'(?P<first_component>[0-9]+)?'
u'\s*(?P<operator>[+×])\s*'
u'\\s*(?P<operator>[+×])\\s*'
u'(?P<second_component>[0-9]+)'
u'\s*(=)\s*'
u'\\s*(=)\\s*'
u'(?P<result>[0-9]+)?', re.UNICODE)
calculationParts = re.search(pattern, numeric_problem)
@ -230,7 +230,7 @@ class Granta(BasicNewsRecipe):
if image is not None and image.attrs is not None:
style = dict(image.attrs)['style']
if style is not None:
m = re.search('url\(([^\)]*)\)', style)
m = re.search(r'url\(([^\)]*)\)', style)
if m.group(1) is not None:
stripstyle(image)
image.name = 'img'

View File

@ -67,7 +67,7 @@ class Guardian(BasicNewsRecipe):
def preprocess_raw_html(self, raw, url):
import html5lib
from lxml import html
return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding=unicode)
return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding='unicode')
def preprocess_html(self, soup):
for img in soup.findAll('img', srcset=True):

View File

@ -18,5 +18,6 @@ class AdvancedUserRecipe1336289226(BasicNewsRecipe):
__author__ = 'faber1971'
language = 'it'
__version__ = 'v1.0'
__date__ = '6, May 2012'

View File

@ -160,7 +160,7 @@ class HoustonChronicle(BasicNewsRecipe):
result = []
for descendant in el.contents:
if isinstance(descendant, NavigableString):
result.append(unicode(descendant).strip())
result.append(type(u'')(descendant).strip())
all_text = u' '.join(result).encode('utf-8')
if len(all_text) > 1:
sentences = re.findall(sentence_regex, all_text)

View File

@ -33,7 +33,7 @@ class jazzpress(BasicNewsRecipe):
# find the link
epublink = browser.find_link(
url_regex=re.compile('e_jazzpress\d\d\d\d\_epub'))
url_regex=re.compile(r'e_jazzpress\d\d\d\d\_epub'))
# download ebook
self.report_progress(0, _('Downloading ePUB'))

View File

@ -15,5 +15,6 @@ class AdvancedUserRecipe1336504510(BasicNewsRecipe):
description = 'News about Juventus from La Stampa'
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '8, May 2012'

View File

@ -11,5 +11,7 @@ class AdvancedUserRecipe1326135232(BasicNewsRecipe):
feeds = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')]
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '9, January 2011'

View File

@ -31,7 +31,7 @@ class AListApart (BasicNewsRecipe):
]
def image_url_processor(self, baseurl, url):
if re.findall('alistapart\.com', url):
if re.findall(r'alistapart\.com', url):
return 'http:' + url
else:
return url

View File

@ -1147,7 +1147,7 @@ class MPRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')

View File

@ -985,7 +985,7 @@ class MPRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')

View File

@ -985,7 +985,7 @@ class MPRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')

View File

@ -70,7 +70,7 @@ class ModorosBlogHu(BasicNewsRecipe):
past_items = set()
if os.path.exists(feed_fn):
with file(feed_fn) as f:
with open(feed_fn) as f:
for h in f:
past_items.add(h.strip())
@ -87,7 +87,7 @@ class ModorosBlogHu(BasicNewsRecipe):
cur_items.add(item_hash)
if item_hash in past_items:
feed.articles.remove(article)
with file(feed_fn, 'w') as f:
with open(feed_fn, 'w') as f:
for h in cur_items:
f.write(h + '\n')

View File

@ -24,7 +24,7 @@ class FocusRecipe(BasicNewsRecipe):
simultaneous_downloads = 2
r = re.compile(
'.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
r'.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
keep_only_tags = []
keep_only_tags.append(dict(name='div', attrs={'class': 'artykul'}))
remove_tags = [dict(name='ul', attrs={'class': 'socialStuff'})]

View File

@ -7,6 +7,7 @@ class AdvancedUserRecipe1360354988(BasicNewsRecipe):
max_articles_per_feed = 100
auto_cleanup = True
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -93,7 +93,7 @@ class Newsweek(BasicNewsRecipe):
strs.append("".join(str(content)))
# return contents as a string
return unicode("".join(strs))
return u"".join(strs)
#
# Articles can be divided into several pages, this method parses them recursevely
@ -113,7 +113,7 @@ class Newsweek(BasicNewsRecipe):
if page == 0:
title = main_section.find('h1')
html = html + unicode(title)
html = html + type(u'')(title)
authors = ''
authorBox = main_section.find('div', attrs={'class': 'AuthorBox'})
@ -121,10 +121,10 @@ class Newsweek(BasicNewsRecipe):
authorH4 = authorBox.find('h4')
if authorH4 is not None:
authors = self.tag_to_string(authorH4)
html = html + unicode(authors)
html = html + type(u'')(authors)
info = main_section.find('p', attrs={'class': 'lead'})
html = html + unicode(info)
html = html + type(u'')(info)
html = html + self.get_article_divs(
'3917dc34e07c9c7180df2ea9ef103361845c8af42b71f51b960059226090a1ac articleStart', main_section)

View File

@ -14,5 +14,7 @@ class AdvancedUserRecipe1335362999(BasicNewsRecipe):
description = 'An Italian satirical blog'
language = 'it'
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '24, April 2012'

View File

@ -94,7 +94,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
past_items = set()
if os.path.exists(feed_fn):
with file(feed_fn) as f:
with open(feed_fn) as f:
for h in f:
past_items.add(h.strip())
@ -111,7 +111,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
cur_items.add(item_hash)
if item_hash in past_items:
feed.articles.remove(article)
with file(feed_fn, 'w') as f:
with open(feed_fn, 'w') as f:
for h in cur_items:
f.write(h + '\n')

View File

@ -188,7 +188,7 @@ class OReillyPremium(BasicNewsRecipe):
# feeds = self.parse_feeds()
# Now add regular feeds.
feedsRSS = self.parse_feeds()
print ("feedsRSS is type " + feedsRSS.__class__.__name__)
print("feedsRSS is type " + feedsRSS.__class__.__name__)
for articles in feedsRSS:
print("articles is type " + articles.__class__.__name__)

View File

@ -65,7 +65,7 @@ class OurDailyBread(BasicNewsRecipe):
hr = div.makeelement('hr')
div.insert(0, hr)
# print html.tostring(div)
raw = html.tostring(root, encoding=unicode)
raw = html.tostring(root, encoding='unicode')
return raw
def preprocess_html(self, soup):

View File

@ -58,7 +58,7 @@ class Pagina12(BasicNewsRecipe):
seen_titles = set([])
for section in soup.findAll('div', 'seccionx'):
numero += 1
print (numero)
print(numero)
section_title = self.tag_to_string(section.find(
'div', 'desplegable_titulo on_principal right'))
self.log('Found section:', section_title)

View File

@ -11,5 +11,7 @@ class AdvancedUserRecipe1326135591(BasicNewsRecipe):
feeds = [(u'Pambianco', u'http://feeds.feedburner.com/pambianconews/YGXu')]
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '9, January 2011'

View File

@ -28,7 +28,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
('Eastern Arsenal', 'https://www.popsci.com/rss-eastern-arsenal.xml'),
]
pane_node_body = re.compile('pane-node-(?:\w+-){0,9}body')
pane_node_body = re.compile('pane-node-(?:\\w+-){0,9}body')
keep_only_tags = [
dict(attrs={'class': lambda x: x and frozenset('pane-node-header'.split()).issubset(frozenset(x.split()))}),

View File

@ -205,7 +205,7 @@ class Pocket(BasicNewsRecipe):
"""
try:
from calibre.ebooks.covers import calibre_cover2
title = self.title if isinstance(self.title, unicode) else \
title = self.title if isinstance(self.title, type(u'')) else \
self.title.decode('utf-8', 'replace')
date = strftime(self.timefmt)
time = strftime('[%I:%M %p]')

View File

@ -32,5 +32,5 @@ class RebelionRecipe (BasicNewsRecipe):
# See http://www.mobileread.com/forums/showthread.php?t=174501
def print_version(self, url):
id = re.compile('\d*$').search(url).group()
id = re.compile(r'\d*$').search(url).group()
return u'http://www.rebelion.org/noticia.php?id=%s' % id

View File

@ -107,7 +107,7 @@ class respektRecipe(BasicNewsRecipe):
self.browser.open('https://www.respekt.cz/?do=logout')
def preprocess_html(self,soup):
raw = u''.join(unicode(a) for a in soup.contents)
raw = u''.join(type(u'')(a) for a in soup.contents)
root = lxml.html.fromstring(raw)
# Fix Letem světem
if "Letem sv" in root.xpath("//title")[0].text:
@ -169,4 +169,4 @@ class respektRecipe(BasicNewsRecipe):
o.getparent().replace(o,e)
except:
pass
return(BeautifulSoup(lxml.etree.tostring(root,encoding=unicode)))
return(BeautifulSoup(lxml.etree.tostring(root,encoding='unicode')))

View File

@ -31,8 +31,8 @@ class RevistaMuyInteresante(BasicNewsRecipe):
for img_tag in soup.findAll('img'):
imagen = img_tag
new_tag = new_tag(soup, 'p')
img_tag.replaceWith(new_tag)
nt = new_tag(soup, 'p')
img_tag.replaceWith(nt)
div = soup.find(attrs={'class': 'article_category'})
div.insert(0, imagen)
break

View File

@ -497,7 +497,7 @@ class STHKRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')

View File

@ -59,9 +59,9 @@ class SolHaberRecipe(BasicNewsRecipe):
cover_margins = (20, 20, '#ffffff')
storybody_reg_exp = '^\s*(haber|kose)\s*$'
storybody_reg_exp = r'^\s*(haber|kose)\s*$'
comments_reg_exp = '^\s*makale-elestiri\s*$'
comments_reg_exp = r'^\s*makale-elestiri\s*$'
remove_tags = [
dict(name='div', attrs={'class': re.compile(comments_reg_exp, re.IGNORECASE)})]

View File

@ -14,7 +14,7 @@ class tanuki(BasicNewsRecipe):
autocleanup = True
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
type(u'')(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
remove_empty_feeds = True
no_stylesheets = True
keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa

View File

@ -99,7 +99,7 @@ class TheAge(BasicNewsRecipe):
# Collapse the paragraph by joining the non-tag contents
contents = [i for i in p.contents if isinstance(i, unicode)]
contents = [i for i in p.contents if isinstance(i, type(u''))]
if len(contents):
contents = ''.join(contents)

View File

@ -9,6 +9,7 @@ def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -108,7 +108,7 @@ class TimesOnline(BasicNewsRecipe):
return html.tostring(
html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False),
method='html',
encoding=unicode)
encoding='unicode')
def preprocess_html(self, soup):
for item in soup.findAll(style=True):

View File

@ -17,5 +17,6 @@ class AdvancedUserRecipe1334935485(BasicNewsRecipe):
language = 'it'
__author__ = 'faber1971'
__version__ = 'v1.0'
__date__ = '24, April 2012'

View File

@ -137,7 +137,7 @@ class ZAOBAO(BasicNewsRecipe):
# workaorund a strange problem: Somethimes the xml encoding is not
# apllied correctly by parse()
weired_encoding_detected = False
if not isinstance(feed.description, unicode) and self.encoding and feed.description:
if not isinstance(feed.description, type(u'')) and self.encoding and feed.description:
self.log(
_('Feed %s is not encoded correctly, manually replace it') % (feed.title))
feed.description = feed.description.decode(
@ -150,14 +150,14 @@ class ZAOBAO(BasicNewsRecipe):
weired_encoding_detected = True
for a, article in enumerate(feed):
if not isinstance(article.title, unicode) and self.encoding:
if not isinstance(article.title, type(u'')) and self.encoding:
article.title = article.title.decode(
self.encoding, 'replace')
if not isinstance(article.summary, unicode) and self.encoding and article.summary:
if not isinstance(article.summary, type(u'')) and self.encoding and article.summary:
article.summary = article.summary.decode(
self.encoding, 'replace')
article.text_summary = article.summary
if not isinstance(article.text_summary, unicode) and self.encoding and article.text_summary:
if not isinstance(article.text_summary, type(u'')) and self.encoding and article.text_summary:
article.text_summary = article.text_summary.decode(
self.encoding, 'replace')
article.summary = article.text_summary

View File

@ -77,12 +77,14 @@ class Check(Command):
def file_has_errors(self, f):
ext = os.path.splitext(f)[1]
if ext in {'.py', '.recipe'}:
p = subprocess.Popen(['flake8-python2', '--filename', '*.py,*.recipe', f])
return p.wait() != 0
elif ext == '.pyj':
p1 = subprocess.Popen(['flake8-python2', '--filename', '*.py,*.recipe', f])
p2 = subprocess.Popen(['flake8', '--filename', '*.py,*.recipe', f])
codes = p1.wait(), p2.wait()
return codes != (0, 0)
if ext == '.pyj':
p = subprocess.Popen(['rapydscript', 'lint', f])
return p.wait() != 0
elif ext == '.yaml':
if ext == '.yaml':
sys.path.insert(0, self.wn_path)
import whats_new
whats_new.render_changelog(self.j(self.d(self.SRC), 'Changelog.yaml'))

View File

@ -16,7 +16,7 @@ from calibre.constants import ispy3
from calibre.customize import (Plugin, numeric_version, platform,
InvalidPlugin, PluginNotFound)
from polyglot.builtins import (itervalues, map, string_or_bytes,
unicode_type)
unicode_type, reload)
# PEP 302 based plugin loading mechanism, works around the bug in zipimport in
# python 2.x that prevents importing from zip files in locations whose paths

View File

@ -17,7 +17,7 @@ from polyglot.builtins import (iteritems, itervalues,
from calibre import isbytestring, force_unicode, prints, as_unicode
from calibre.constants import (iswindows, filesystem_encoding,
preferred_encoding)
preferred_encoding, ispy3)
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre.db import SPOOL_SIZE
from calibre.db.schema_upgrades import SchemaUpgrade
@ -209,9 +209,14 @@ def Concatenate(sep=','):
ctxt.append(value)
def finalize(ctxt):
try:
if not ctxt:
return None
return sep.join(ctxt)
except Exception:
import traceback
traceback.print_exc()
raise
return ([], step, finalize)
@ -224,9 +229,14 @@ def SortedConcatenate(sep=','):
ctxt[ndx] = value
def finalize(ctxt):
try:
if len(ctxt) == 0:
return None
return sep.join(map(ctxt.get, sorted(ctxt)))
except Exception:
import traceback
traceback.print_exc()
raise
return ({}, step, finalize)
@ -238,7 +248,12 @@ def IdentifiersConcat():
ctxt.append(u'%s:%s'%(key, val))
def finalize(ctxt):
try:
return ','.join(ctxt)
except Exception:
import traceback
traceback.print_exc()
raise
return ([], step, finalize)
@ -251,6 +266,7 @@ def AumSortedConcatenate():
ctxt[ndx] = ':::'.join((author, sort, link))
def finalize(ctxt):
try:
keys = list(ctxt)
l = len(keys)
if l == 0:
@ -258,6 +274,10 @@ def AumSortedConcatenate():
if l == 1:
return ctxt[keys[0]]
return ':#:'.join([ctxt[v] for v in sorted(keys)])
except Exception:
import traceback
traceback.print_exc()
raise
return ({}, step, finalize)
@ -1724,8 +1744,13 @@ class DB(object):
[(book_id, fmt.upper()) for book_id in book_ids])
def set_conversion_options(self, options, fmt):
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data)))
for book_id, data in iteritems(options)]
def map_data(x):
x = x.encode('utf-8') if isinstance(x, unicode_type) else x
x = pickle_binary_string(x)
if not ispy3:
x = buffer(x) # noqa
return x
options = [(book_id, fmt.upper(), map_data(data)) for book_id, data in iteritems(options)]
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
def get_top_level_move_items(self, all_paths):

View File

@ -237,20 +237,20 @@ class DebugRWLockWrapper(RWLockWrapper):
RWLockWrapper.__init__(self, *args, **kwargs)
def acquire(self):
print ('#' * 120, file=sys.stderr)
print ('acquire called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr)
print('#' * 120, file=sys.stderr)
print('acquire called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr)
traceback.print_stack()
RWLockWrapper.acquire(self)
print ('acquire done: thread id:', current_thread(), file=sys.stderr)
print ('_' * 120, file=sys.stderr)
print('acquire done: thread id:', current_thread(), file=sys.stderr)
print('_' * 120, file=sys.stderr)
def release(self, *args):
print ('*' * 120, file=sys.stderr)
print ('release called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr)
print('*' * 120, file=sys.stderr)
print('release called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr)
traceback.print_stack()
RWLockWrapper.release(self)
print ('release done: thread id:', current_thread(), 'is_shared:', self._shlock.is_shared, 'is_exclusive:', self._shlock.is_exclusive, file=sys.stderr)
print ('_' * 120, file=sys.stderr)
print('release done: thread id:', current_thread(), 'is_shared:', self._shlock.is_shared, 'is_exclusive:', self._shlock.is_exclusive, file=sys.stderr)
print('_' * 120, file=sys.stderr)
__enter__ = acquire
__exit__ = release

View File

@ -15,6 +15,7 @@ def find_tests():
base = os.path.dirname(os.path.abspath(__file__))
return find_tests_in_dir(base)
if __name__ == '__main__':
try:
import init_calibre # noqa

View File

@ -712,3 +712,14 @@ class ReadingTest(BaseTest):
cache.set_last_read_position(1, 'EPUB', 'user', 'device')
self.assertFalse(cache.get_last_read_positions(1, 'ePuB', 'user'))
# }}}
def test_storing_conversion_options(self): # {{{
cache = self.init_cache(self.library_path)
opts = {1: b'binary', 2: 'unicode'}
cache.set_conversion_options(opts, 'PIPE')
for book_id, val in iteritems(opts):
got = cache.conversion_options(book_id, 'PIPE')
if not isinstance(val, bytes):
val = val.encode('utf-8')
self.assertEqual(got, val)
# }}}

View File

@ -45,11 +45,11 @@ class HANLINV3(USBMS):
card = names.get('carda', None)
try:
main_num = int(re.findall('\d+', main)[0]) if main else None
main_num = int(re.findall(r'\d+', main)[0]) if main else None
except:
main_num = None
try:
card_num = int(re.findall('\d+', card)[0]) if card else None
card_num = int(re.findall(r'\d+', card)[0]) if card else None
except:
card_num = None

View File

@ -3081,7 +3081,6 @@ class KOBOTOUCH(KOBO):
update_values.append(newmi.isbn)
set_clause += ', ISBN = ? '
library_language = normalize_languages(kobo_metadata.languages, newmi.languages)
library_language = library_language[0] if library_language is not None and len(library_language) > 0 else None
if not (library_language == kobo_metadata.language):

View File

@ -196,8 +196,8 @@ class MTP_DEVICE(MTPDeviceBase):
p = plugins['libmtp']
self.libmtp = p[0]
if self.libmtp is None:
print ('Failed to load libmtp, MTP device detection disabled')
print (p[1])
print('Failed to load libmtp, MTP device detection disabled')
print(p[1])
else:
self.known_devices = frozenset(self.libmtp.known_devices())

View File

@ -143,7 +143,7 @@ class PRST1(USBMS):
main, carda, cardb = self.find_device_nodes(detected_device=dev)
if main is None and carda is None and cardb is None:
if debug:
print ('\tPRS-T1: Appears to be in non data mode'
print('\tPRS-T1: Appears to be in non data mode'
' or was ejected, ignoring')
return False
return True

View File

@ -701,7 +701,7 @@ class Device(DeviceConfig, DevicePlugin):
except dbus.exceptions.DBusException as e:
print(e)
continue
except dbus.exceptions.DBusException as e:
except dbus.exceptions.DBusException:
continue
vols.sort(key=lambda x: x['node'])

View File

@ -773,7 +773,7 @@ def get_drive_letters_for_device_single(usbdev, storage_number_map, debug=False)
if debug:
try:
devid = get_device_id(devinfo.DevInst)[0]
except Exception as err:
except Exception:
devid = 'Unknown'
try:
storage_number = get_storage_number(devpath)

View File

@ -13,5 +13,6 @@ import sys
def main(args=sys.argv):
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -23,6 +23,7 @@ class ConversionUserFeedBack(Exception):
self.title, self.msg, self.det_msg = title, msg, det_msg
self.level = level
# Ensure exception uses fully qualified name as this is used to detect it in
# the GUI.
ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack')

View File

@ -46,10 +46,10 @@ class DjvuChunk(object):
# self.headersize += 4
self.datastart = pos
if verbose > 0:
print ('found', self.type, self.subtype, pos, self.size)
print('found', self.type, self.subtype, pos, self.size)
if self.type in b'FORM'.split():
if verbose > 0:
print ('processing substuff %d %d (%x)' % (pos, self.dataend,
print('processing substuff %d %d (%x)' % (pos, self.dataend,
self.dataend))
numchunks = 0
while pos < self.dataend:
@ -58,11 +58,11 @@ class DjvuChunk(object):
self._subchunks.append(x)
newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0)
if verbose > 0:
print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend,
print('newpos %d %d (%x, %x) %d' % (newpos, self.dataend,
newpos, self.dataend, x.headersize))
pos = newpos
if verbose > 0:
print (' end of chunk %d (%x)' % (pos, pos))
print(' end of chunk %d (%x)' % (pos, pos))
def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100):
if out:
@ -89,7 +89,7 @@ class DjvuChunk(object):
l <<= 8
l += ord(x)
if verbose > 0 and out:
print (l, file=out)
print(l, file=out)
txtout.write(res[3:3+l])
txtout.write(b'\037')
if txtout and self.type == b'TXTa':
@ -99,7 +99,7 @@ class DjvuChunk(object):
l <<= 8
l += ord(x)
if verbose > 0 and out:
print (l, file=out)
print(l, file=out)
txtout.write(res[3:3+l])
txtout.write(b'\037')
if indent >= maxlevel:
@ -126,7 +126,8 @@ class DJVUFile(object):
def main():
f = DJVUFile(open(sys.argv[-1], 'rb'))
print (f.get_text(sys.stdout))
print(f.get_text(sys.stdout))
if __name__ == '__main__':
main()

View File

@ -735,9 +735,9 @@ class BZZDecoder():
def main():
import sys
from calibre.constants import plugins
raw = file(sys.argv[1], "rb").read()
raw = open(sys.argv[1], "rb").read()
d = plugins['bzzdec'][0]
print (d.decompress(raw))
print(d.decompress(raw))
if __name__ == "__main__":

View File

@ -197,7 +197,7 @@ def cfi_sort_key(cfi, only_path=True):
return ()
if not pcfi:
import sys
print ('Failed to parse CFI: %r' % pcfi, file=sys.stderr)
print('Failed to parse CFI: %r' % pcfi, file=sys.stderr)
return ()
steps = get_steps(pcfi)
step_nums = tuple(s.get('num', 0) for s in steps)
@ -217,7 +217,7 @@ def decode_cfi(root, cfi):
return
if not pcfi:
import sys
print ('Failed to parse CFI: %r' % pcfi, file=sys.stderr)
print('Failed to parse CFI: %r' % pcfi, file=sys.stderr)
return
steps = get_steps(pcfi)
ans = root

View File

@ -380,7 +380,7 @@ ATTRS35 = {
0x804a: "align",
0x8bbd: "palette",
0x8bbe: "pluginspage",
0x8bbf: "codebase",
# 0x8bbf: "codebase",
0x8bbf: "src",
0x8bc1: "units",
0x8bc2: "type",
@ -640,7 +640,7 @@ ATTRS66 = {
0x03f5: "n",
}
ATTRS71 = {
0x8000: "border",
# 0x8000: "border",
0x8000: "usemap",
0x8001: "name",
0x8006: "width",
@ -682,8 +682,8 @@ ATTRS74 = {
0x9399: "clear",
}
ATTRS75 = {
0x8000: "name",
0x8000: "value",
# 0x8000: "name",
# 0x8000: "value",
0x8000: "type",
}
ATTRS76 = {

View File

@ -96,8 +96,8 @@ NAME_MAP = {
u'yellowgreen': u'#9ACD32'
}
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})')
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})')
rgb_pat = re.compile(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
def lrs_color(html_color):
@ -111,5 +111,3 @@ def lrs_color(html_color):
if hcol in NAME_MAP:
return NAME_MAP[hcol].replace('#', '0x00')
return '0x00000000'

View File

@ -490,7 +490,7 @@ class LrfFileStream(LrfStreamBase):
def __init__(self, streamFlags, filename):
LrfStreamBase.__init__(self, streamFlags)
f = file(filename, "rb")
f = open(filename, "rb")
self.streamData = f.read()
f.close()
@ -686,7 +686,7 @@ class LrfWriter(object):
self.tocObjId = obj.objId
def setThumbnailFile(self, filename, encoding=None):
f = file(filename, "rb")
f = open(filename, "rb")
self.thumbnailData = f.read()
f.close()

View File

@ -2268,7 +2268,7 @@ class ImageStream(LrsObject, LrsContainer):
self.encoding = encoding
def toLrf(self, lrfWriter):
imageFile = file(self.filename, "rb")
imageFile = open(self.filename, "rb")
imageData = imageFile.read()
imageFile.close()

View File

@ -77,7 +77,8 @@ def set_metadata(stream, mi):
stream.seek(0)
safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements)
if __name__ == '__main__':
import sys
with open(sys.argv[-1], 'rb') as stream:
print (get_metadata(stream))
print(get_metadata(stream))

View File

@ -376,7 +376,7 @@ class TestOPF3(unittest.TestCase):
&quot;value&quot;, &quot;#value#&quot;:
&quot;&lt;div&gt;&lt;b&gt;&lt;i&gt;Testing&lt;/i&gt;&lt;/b&gt; extra
&lt;font
color=\&quot;#aa0000\&quot;&gt;comments&lt;/font&gt;&lt;/div&gt;&quot;,
color=\\&quot;#aa0000\\&quot;&gt;comments&lt;/font&gt;&lt;/div&gt;&quot;,
&quot;is_custom&quot;: true, &quot;label&quot;: &quot;commetns&quot;,
&quot;table&quot;: &quot;custom_column_13&quot;,
&quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>

View File

@ -353,7 +353,7 @@ class Worker(Thread): # Get details {{{
with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4())) + '_',
suffix='.html', delete=False) as f:
f.write(raw)
print ('Downloaded html for', asin, 'saved in', f.name)
print('Downloaded html for', asin, 'saved in', f.name)
try:
title = self.parse_title(root)
@ -1256,7 +1256,7 @@ class Amazon(Source):
with tempfile.NamedTemporaryFile(prefix='amazon_results_',
suffix='.html', delete=False) as f:
f.write(raw.encode('utf-8'))
print ('Downloaded html for results page saved in', f.name)
print('Downloaded html for results page saved in', f.name)
matches = []
found = '<title>404 - ' not in raw

View File

@ -82,7 +82,7 @@ def main(args=sys.argv):
allowed_plugins=allowed_plugins or None)
if not results:
print (log, file=sys.stderr)
print(log, file=sys.stderr)
prints('No results found', file=sys.stderr)
raise SystemExit(1)
result = results[0]
@ -103,9 +103,9 @@ def main(args=sys.argv):
unicode_type(result).encode('utf-8'))
if opts.verbose:
print (log, file=sys.stderr)
print(log, file=sys.stderr)
print (result)
print(result)
if not opts.opf and opts.cover:
prints('Cover :', cf)

View File

@ -8,8 +8,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class MobiError(Exception):
pass
# That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW
MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240)

View File

@ -276,7 +276,7 @@ class Tag(object): # {{{
if tag_type in self.TAG_MAP:
self.attr, self.desc = self.TAG_MAP[tag_type]
else:
print ('Unknown tag value: %%s'%tag_type)
print('Unknown tag value: %%s'%tag_type)
self.desc = '??Unknown (tag value: %d)'%tag_type
self.attr = 'unknown'
@ -461,7 +461,7 @@ class CNCX(object): # {{{
except:
byts = raw[pos:]
r = format_bytes(byts)
print ('CNCX entry at offset %d has unknown format %s'%(
print('CNCX entry at offset %d has unknown format %s'%(
pos+record_offset, r))
self.records[pos+record_offset] = r
pos = len(raw)
@ -629,7 +629,7 @@ class TBSIndexing(object): # {{{
import traceback
traceback.print_exc()
a = []
print ('Failed to decode TBS bytes for record: %d'%r.idx)
print('Failed to decode TBS bytes for record: %d'%r.idx)
ans += a
if byts:
sbyts = tuple(hex(b)[2:] for b in byts)
@ -789,14 +789,14 @@ class MOBIFile(object): # {{{
self.index_record.indices, self.mobi_header.type_raw)
def print_header(self, f=sys.stdout):
print (str(self.palmdb).encode('utf-8'), file=f)
print (file=f)
print ('Record headers:', file=f)
print(str(self.palmdb).encode('utf-8'), file=f)
print(file=f)
print('Record headers:', file=f)
for i, r in enumerate(self.records):
print ('%6d. %s'%(i, r.header), file=f)
print('%6d. %s'%(i, r.header), file=f)
print (file=f)
print (str(self.mobi_header).encode('utf-8'), file=f)
print(file=f)
print(str(self.mobi_header).encode('utf-8'), file=f)
# }}}

View File

@ -23,7 +23,7 @@ main_language = {
2 : "BULGARIAN",
3 : "CATALAN",
4 : "CHINESE",
26 : "CROATIAN",
# 26 : "CROATIAN",
5 : "CZECH",
6 : "DANISH",
19 : "DUTCH",
@ -91,55 +91,55 @@ main_language = {
sub_language = {
0 : "NEUTRAL",
1 : "ARABIC_SAUDI_ARABIA",
2 : "ARABIC_IRAQ",
3 : "ARABIC_EGYPT",
4 : "ARABIC_LIBYA",
5 : "ARABIC_ALGERIA",
6 : "ARABIC_MOROCCO",
7 : "ARABIC_TUNISIA",
8 : "ARABIC_OMAN",
9 : "ARABIC_YEMEN",
10 : "ARABIC_SYRIA",
11 : "ARABIC_JORDAN",
12 : "ARABIC_LEBANON",
13 : "ARABIC_KUWAIT",
14 : "ARABIC_UAE",
15 : "ARABIC_BAHRAIN",
16 : "ARABIC_QATAR",
1 : "AZERI_LATIN",
2 : "AZERI_CYRILLIC",
1 : "CHINESE_TRADITIONAL",
2 : "CHINESE_SIMPLIFIED",
3 : "CHINESE_HONGKONG",
4 : "CHINESE_SINGAPORE",
1 : "DUTCH",
2 : "DUTCH_BELGIAN",
1 : "FRENCH",
2 : "FRENCH_BELGIAN",
3 : "FRENCH_CANADIAN",
4 : "FRENCH_SWISS",
5 : "FRENCH_LUXEMBOURG",
6 : "FRENCH_MONACO",
1 : "GERMAN",
2 : "GERMAN_SWISS",
3 : "GERMAN_AUSTRIAN",
4 : "GERMAN_LUXEMBOURG",
5 : "GERMAN_LIECHTENSTEIN",
1 : "ITALIAN",
2 : "ITALIAN_SWISS",
1 : "KOREAN",
1 : "LITHUANIAN",
1 : "MALAY_MALAYSIA",
2 : "MALAY_BRUNEI_DARUSSALAM",
1 : "NORWEGIAN_BOKMAL",
2 : "NORWEGIAN_NYNORSK",
2 : "PORTUGUESE",
1 : "PORTUGUESE_BRAZILIAN",
2 : "SERBIAN_LATIN",
# 1 : "ARABIC_SAUDI_ARABIA",
# 2 : "ARABIC_IRAQ",
# 3 : "ARABIC_EGYPT",
# 4 : "ARABIC_LIBYA",
# 5 : "ARABIC_ALGERIA",
# 6 : "ARABIC_MOROCCO",
# 7 : "ARABIC_TUNISIA",
# 8 : "ARABIC_OMAN",
# 9 : "ARABIC_YEMEN",
# 10 : "ARABIC_SYRIA",
# 11 : "ARABIC_JORDAN",
# 12 : "ARABIC_LEBANON",
# 13 : "ARABIC_KUWAIT",
# 14 : "ARABIC_UAE",
# 15 : "ARABIC_BAHRAIN",
# 16 : "ARABIC_QATAR",
# 1 : "AZERI_LATIN",
# 2 : "AZERI_CYRILLIC",
# 1 : "CHINESE_TRADITIONAL",
# 2 : "CHINESE_SIMPLIFIED",
# 3 : "CHINESE_HONGKONG",
# 4 : "CHINESE_SINGAPORE",
# 1 : "DUTCH",
# 2 : "DUTCH_BELGIAN",
# 1 : "FRENCH",
# 2 : "FRENCH_BELGIAN",
# 3 : "FRENCH_CANADIAN",
# 4 : "FRENCH_SWISS",
# 5 : "FRENCH_LUXEMBOURG",
# 6 : "FRENCH_MONACO",
# 1 : "GERMAN",
# 2 : "GERMAN_SWISS",
# 3 : "GERMAN_AUSTRIAN",
# 4 : "GERMAN_LUXEMBOURG",
# 5 : "GERMAN_LIECHTENSTEIN",
# 1 : "ITALIAN",
# 2 : "ITALIAN_SWISS",
# 1 : "KOREAN",
# 1 : "LITHUANIAN",
# 1 : "MALAY_MALAYSIA",
# 2 : "MALAY_BRUNEI_DARUSSALAM",
# 1 : "NORWEGIAN_BOKMAL",
# 2 : "NORWEGIAN_NYNORSK",
# 2 : "PORTUGUESE",
# 1 : "PORTUGUESE_BRAZILIAN",
# 2 : "SERBIAN_LATIN",
3 : "SERBIAN_CYRILLIC",
1 : "SPANISH",
2 : "SPANISH_MEXICAN",
# 1 : "SPANISH",
# 2 : "SPANISH_MEXICAN",
4 : "SPANISH_GUATEMALA",
5 : "SPANISH_COSTA_RICA",
6 : "SPANISH_PANAMA",
@ -157,8 +157,8 @@ sub_language = {
18 : "SPANISH_HONDURAS",
19 : "SPANISH_NICARAGUA",
20 : "SPANISH_PUERTO_RICO",
1 : "SWEDISH",
2 : "SWEDISH_FINLAND",
# 1 : "SWEDISH",
# 2 : "SWEDISH_FINLAND",
1 : "UZBEK_LATIN",
2 : "UZBEK_CYRILLIC",
}

View File

@ -15,9 +15,8 @@ from xml.sax.saxutils import escape
from lxml import etree
from calibre.ebooks.oeb.base import XHTML_NS, extract
from calibre.constants import ispy3
from calibre.ebooks.mobi.utils import to_base
from polyglot.builtins import iteritems, unicode_type
from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr as mychr
CHUNK_SIZE = 8192
@ -61,9 +60,6 @@ def node_from_path(root, path):
return parent
mychr = chr if ispy3 else unichr
def tostring(raw, **kwargs):
''' lxml *sometimes* represents non-ascii characters as hex entities in
attribute values. I can't figure out exactly what circumstances cause it.

View File

@ -22,6 +22,6 @@ def run_devel_server():
os.chdir(os.path.dirname(os.path.abspath(__file__)))
serve(resources={'cfi.coffee':'../cfi.coffee', '/':'index.html'})
if __name__ == '__main__':
run_devel_server()

View File

@ -95,5 +95,5 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
if __name__ == '__main__':
from lxml import etree
root = parse_html5('\n<html><head><title>a\n</title><p b=1 c=2 a=0>&nbsp;\n<b>b<svg ass="wipe" viewbox="0">', discard_namespaces=False)
print (etree.tostring(root, encoding='utf-8'))
print(etree.tostring(root, encoding='utf-8'))
print()

View File

@ -14,6 +14,7 @@ def find_tests():
base = os.path.dirname(os.path.abspath(__file__))
return find_tests_in_dir(base)
if __name__ == '__main__':
try:
import init_calibre # noqa

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
class PDBError(Exception):
pass
FORMAT_READERS = None
@ -31,6 +32,7 @@ def _import_readers():
'BOOKMTIU': haodoo_reader,
}
ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
FORMAT_WRITERS = None
@ -47,6 +49,7 @@ def _import_writers():
'ereader': ereader_writer,
}
IDENTITY_TO_NAME = {
'PNPdPPrs': 'eReader',
'PNRdPPrs': 'eReader',
@ -100,4 +103,3 @@ def get_writer(extension):
if FORMAT_WRITERS is None:
_import_writers()
return FORMAT_WRITERS.get(extension, None)

View File

@ -19,6 +19,7 @@ UNITS = {
def unit(unit):
return UNITS.get(unit, QPrinter.Inch)
PAPER_SIZES = {
'a0' : QPrinter.A0, # 841 x 1189 mm
'a1' : QPrinter.A1, # 594 x 841 mm
@ -57,6 +58,7 @@ PAPER_SIZES = {
def paper_size(size):
return PAPER_SIZES.get(size, QPrinter.Letter)
ORIENTATIONS = {
'portrait' : QPrinter.Portrait,
'landscape' : QPrinter.Landscape,

View File

@ -28,6 +28,7 @@ def normalize_spaces(s):
characters with a single space"""
return ' '.join(s.split())
html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
style=True, links=True, meta=False, add_nofollow=False,
page_structure=False, processing_instructions=True, embedded=False,

View File

@ -4,6 +4,7 @@ def save_to_file(text, filename):
f.write(text.encode('utf-8'))
f.close()
uids = {}

View File

@ -504,7 +504,7 @@ def main():
enc = sys.__stdout__.encoding or 'utf-8'
if options.verbose:
default_log.filter_level = default_log.DEBUG
print (Document(raw, default_log,
print(Document(raw, default_log,
debug=options.verbose,
keep_elements=options.keep_elements).summary().encode(enc,
'replace'))

View File

@ -367,7 +367,7 @@ class RtfTokenizer():
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print ("Usage %prog rtfFileToConvert")
print("Usage %prog rtfFileToConvert")
sys.exit()
f = open(sys.argv[1], 'rb')
data = f.read()
@ -381,5 +381,3 @@ if __name__ == "__main__":
f = open(sys.argv[1], 'w')
f.write(data)
f.close()

View File

@ -59,4 +59,3 @@ class GetCharMap:
msg = 'no map found\nmap is "%s"\n'%(map,)
raise self.__bug_handler(msg)
return map_dict

View File

@ -31,11 +31,11 @@ class Paragraphs:
In order to make paragraphs out of this limited info, the parser starts in the
body of the documents and assumes it is not in a paragraph. It looks for clues
to begin a paragraph. Text starts a paragraph; so does an inline field or
list-text. If an end of paragraph marker (\par) is found, then this indicates
list-text. If an end of paragraph marker (\\par) is found, then this indicates
a blank paragraph.
Once a paragraph is found, the state changes to 'paragraph.' In this state,
clues are looked to for the end of a paragraph. The end of a paragraph marker
(\par) marks the end of a paragraph. So does the end of a footnote or heading;
(\\par) marks the end of a paragraph. So does the end of a footnote or heading;
a paragraph definition; the end of a field-block; and the beginning of a
section. (How about the end of a section or the end of a field-block?)
"""
@ -224,7 +224,7 @@ class Paragraphs:
Returns:
nothing
Logic:
if a \pard occurs in a paragraph, I want to ignore it. (I believe)
if a \\pard occurs in a paragraph, I want to ignore it. (I believe)
"""
self.__write_obj.write('mi<mk<bogus-pard\n')

View File

@ -272,17 +272,17 @@ class MyApplication(Gtk.Application):
seen = seen or set()
seen.add(group)
print = self.print
print ('\nMenu description (Group %d)' % group)
print('\nMenu description (Group %d)' % group)
for item in bus.call_blocking(self.bus_name, self.object_path, 'org.gtk.Menus', 'Start', 'au', ([group],)):
print ('Subscription group:', item[0])
print ('Menu number:', item[1])
print('Subscription group:', item[0])
print('Menu number:', item[1])
for menu_item in item[2]:
menu_item = {unicode_type(k):convert(v) for k, v in iteritems(menu_item)}
if ':submenu' in menu_item:
groups.add(menu_item[':submenu'][0])
if ':section' in menu_item:
groups.add(menu_item[':section'][0])
print (pformat(menu_item))
print(pformat(menu_item))
for other_group in sorted(groups - seen):
self.print_menu_start(bus, other_group, seen)
@ -303,8 +303,8 @@ class MyApplication(Gtk.Application):
for name in sorted(adata):
data = adata[name]
d[name] = {'enabled':convert(data[0]), 'param type': convert(data[1]), 'state':convert(data[2])}
print ('Name:', name)
print (pformat(d[name]))
print('Name:', name)
print(pformat(d[name]))
def do_startup(self):
Gtk.Application.do_startup(self)

Some files were not shown because too many files have changed in this diff Show More