Also check python files using python 3 flake8

This commit is contained in:
Kovid Goyal 2019-04-13 07:49:43 +05:30
parent 623f6623b5
commit 13961abd16
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
154 changed files with 409 additions and 308 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import print_function from __future__ import print_function, unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, spswerling' __copyright__ = '2014, spswerling'
''' '''
@ -162,7 +162,7 @@ class AlMonitor(BasicNewsRecipe):
def scrape_article_date(self, soup): def scrape_article_date(self, soup):
for span in soup.findAll('span'): for span in soup.findAll('span'):
txt = self.text(span) txt = self.text(span)
rgx = re.compile(unicode(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*')) rgx = re.compile(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*')
hit = rgx.match(txt) hit = rgx.match(txt)
if hit: if hit:
return self.date_from_string(txt) return self.date_from_string(txt)

View File

@ -126,7 +126,7 @@ class Ambito_Financiero(BasicNewsRecipe):
cfind = smallsoup.find('div', id="contenido_data") cfind = smallsoup.find('div', id="contenido_data")
if cfind: if cfind:
p.append(cfind) p.append(cfind)
return unicode(soup) return type(u'')(soup)
return raw_html return raw_html
def cleanup(self): def cleanup(self):

View File

@ -34,7 +34,7 @@ class AmericanThinker(BasicNewsRecipe):
namespaceHTMLElements=False) namespaceHTMLElements=False)
for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa
x.getparent().remove(x) x.getparent().remove(x)
return etree.tostring(root, encoding=unicode) return etree.tostring(root, encoding='unicode')
feeds = [(u'http://feeds.feedburner.com/americanthinker'), feeds = [(u'http://feeds.feedburner.com/americanthinker'),
(u'http://feeds.feedburner.com/AmericanThinkerBlog') (u'http://feeds.feedburner.com/AmericanThinkerBlog')

View File

@ -161,7 +161,7 @@ class AppleDaily(BasicNewsRecipe):
article_titles.append(force_unicode(a.title, 'utf-8')) article_titles.append(force_unicode(a.title, 'utf-8'))
mi.comments = self.description mi.comments = self.description
if not isinstance(mi.comments, unicode): if not isinstance(mi.comments, type(u'')):
mi.comments = mi.comments.decode('utf-8', 'replace') mi.comments = mi.comments.decode('utf-8', 'replace')
mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
'\n\n'.join(article_titles)) '\n\n'.join(article_titles))
@ -272,7 +272,7 @@ class AppleDaily(BasicNewsRecipe):
elem = BeautifulSoup(translatedTempl).find('div') elem = BeautifulSoup(translatedTempl).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')

View File

@ -104,7 +104,7 @@ class AppledailyTW(BasicNewsRecipe):
] ]
def preprocess_raw_html(self, raw_html, url): def preprocess_raw_html(self, raw_html, url):
raw_html = re.sub(unicode(r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html) raw_html = re.sub((r'<a href=".*?<br><br>.*?<\/a>'), '', raw_html)
raw_html = re.sub( raw_html = re.sub(
unicode(r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html) (r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html)
return raw_html return raw_html

View File

@ -85,7 +85,7 @@ class BerlinPolicyJournal(BasicNewsRecipe):
div.find('h3', {'class': 'entry-title'}).a) div.find('h3', {'class': 'entry-title'}).a)
article_url = div.find( article_url = div.find(
'h3', {'class': 'entry-title'}).a['href'] 'h3', {'class': 'entry-title'}).a['href']
article_date = unicode(time.strftime( article_date = type(u'')(time.strftime(
' [%a, %d %b %H:%M]', timestamp)) ' [%a, %d %b %H:%M]', timestamp))
article_desc = self.tag_to_string( article_desc = self.tag_to_string(
div.find('div', {'class': 'i-summary'}).p) div.find('div', {'class': 'i-summary'}).p)

View File

@ -47,7 +47,7 @@ class BigOven(BasicNewsRecipe):
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''), (re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''),
(re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''), (re.compile(r'\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -60,7 +60,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
'http://images.icnetwork.co.uk/upl/birm')}) 'http://images.icnetwork.co.uk/upl/birm')})
cov = str(cov) cov = str(cov)
cov2 = re.findall( cov2 = re.findall(
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
cov = str(cov2) cov = str(cov2)
cov = cov[2:len(cov) - 2] cov = cov[2:len(cov) - 2]

View File

@ -13,6 +13,7 @@ class AdvancedUserRecipe1331729727(BasicNewsRecipe):
feeds = [(u'Camera di Commercio di Bari', feeds = [(u'Camera di Commercio di Bari',
u'http://feed43.com/4715147488845101.xml')] u'http://feed43.com/4715147488845101.xml')]
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2012, faber1971' __copyright__ = '2012, faber1971'
__version__ = 'v1.00' __version__ = 'v1.00'

View File

@ -22,7 +22,7 @@ class cdrinfo(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
remove_javascript = True remove_javascript = True
remove_attributes = ['style', 'onmouseover'] remove_attributes = ['style', 'onmouseover']
preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: ''), preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\\.gravatar\\.com</a>\\.</p>', re.DOTALL), lambda match: ''),
(re.compile(u'<p[^>]*?>.{,2}</p>', re.DOTALL), lambda match: '')] (re.compile(u'<p[^>]*?>.{,2}</p>', re.DOTALL), lambda match: '')]
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}

View File

@ -16,11 +16,11 @@ class CNetJapan(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
preprocess_regexps = [ preprocess_regexps = [
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), (re.compile(type(u'')(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
lambda match: '</body>'), lambda match: '</body>'),
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE), (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'), lambda match: '</body>'),
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), (re.compile(type(u'')(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
lambda match: '<!-- removed -->'), lambda match: '<!-- removed -->'),
] ]

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -14,11 +15,11 @@ class CNetJapanDigital(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
preprocess_regexps = [ preprocess_regexps = [
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), (re.compile((r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
lambda match: '</body>'), lambda match: '</body>'),
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE), (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'), lambda match: '</body>'),
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), (re.compile((r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
lambda match: '<!-- removed -->'), lambda match: '<!-- removed -->'),
] ]

View File

@ -1,3 +1,4 @@
from __future__ import unicode_literals
import re import re
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
@ -14,11 +15,11 @@ class CNetJapanRelease(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
preprocess_regexps = [ preprocess_regexps = [
(re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), (re.compile((r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE),
lambda match: '</body>'), lambda match: '</body>'),
(re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE), (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE),
lambda match: '</body>'), lambda match: '</body>'),
(re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), (re.compile((r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE),
lambda match: '<!-- removed -->'), lambda match: '<!-- removed -->'),
] ]

View File

@ -82,7 +82,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
cov = soup.find(attrs={'id': 'large'}) cov = soup.find(attrs={'id': 'large'})
cov = str(cov) cov = str(cov)
cov2 = re.findall( cov2 = re.findall(
'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov)
cov2 = str(cov2) cov2 = str(cov2)
cov2 = cov2[2:len(cov2) - 2] cov2 = cov2[2:len(cov2) - 2]
# cov2 now is pic url, now go back to original function # cov2 now is pic url, now go back to original function

View File

@ -16,7 +16,7 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe):
articles = [] articles = []
feeds = [] feeds = []
soup = self.index_to_soup("http://www.democracyjournal.org") soup = self.index_to_soup("http://www.democracyjournal.org")
for x in soup.findAll(href=re.compile("http://www\.democracyjournal\.org/\d*/.*php$")): for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")):
url = x.get('href') url = x.get('href')
title = self.tag_to_string(x) title = self.tag_to_string(x)
articles.append({'title': title, 'url': url, articles.append({'title': title, 'url': url,

View File

@ -69,6 +69,6 @@ class AdvancedUserRecipe1297291961(BasicNewsRecipe):
] ]
def print_version(self, url): def print_version(self, url):
p = re.compile('(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*') p = re.compile(r'(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*')
m = p.search(url) m = p.search(url)
return url.replace(m.group(), '&template=printart') return url.replace(m.group(), '&template=printart')

View File

@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
remove_attrs = ['style', 'width', 'height'] remove_attrs = ['style', 'width', 'height']
preprocess_regexps = [(re.compile( preprocess_regexps = [(re.compile(
unicode(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')] type(u'')(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')]
keep_only_tags = [dict(name='h1'), dict( keep_only_tags = [dict(name='h1'), dict(
attrs={'class': ['entry single']}), dict(id='phContent_divArticle')] attrs={'class': ['entry single']}), dict(id='phContent_divArticle')]
remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa

View File

@ -92,7 +92,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
if raw: if raw:
return _raw return _raw
if not isinstance(_raw, unicode) and self.encoding: if not isinstance(_raw, type(u'')) and self.encoding:
if callable(self.encoding): if callable(self.encoding):
_raw = self.encoding(_raw) _raw = self.encoding(_raw)
else: else:
@ -101,7 +101,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe):
from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode
from calibre.utils.cleantext import clean_xml_chars from calibre.utils.cleantext import clean_xml_chars
if isinstance(_raw, unicode): if isinstance(_raw, type(u'')):
_raw = strip_encoding_declarations(_raw) _raw = strip_encoding_declarations(_raw)
else: else:
_raw = xml_to_unicode( _raw = xml_to_unicode(

View File

@ -26,7 +26,7 @@ class EcoGeek(BasicNewsRecipe):
for i, article in enumerate(soup.findAll('div', attrs={'class': 'article'})): for i, article in enumerate(soup.findAll('div', attrs={'class': 'article'})):
fname = os.path.join(tdir, '%d.html' % i) fname = os.path.join(tdir, '%d.html' % i)
with open(fname, 'wb') as f: with open(fname, 'wb') as f:
f.write(unicode(article).encode('utf-8')) f.write(type(u'')(article).encode('utf-8'))
articles.append({ articles.append({
'title': self.tag_to_string(article.find('h2')), 'title': self.tag_to_string(article.find('h2')),
'url': 'file://' + fname.replace(os.sep, '/'), 'url': 'file://' + fname.replace(os.sep, '/'),

View File

@ -154,7 +154,7 @@ class Economist(BasicNewsRecipe):
p.remove(noscript[0]) p.remove(noscript[0])
for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'): for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
x.getparent().remove(x) x.getparent().remove(x)
raw = etree.tostring(root, encoding=unicode) raw = etree.tostring(root, encoding='unicode')
return raw return raw
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
@ -165,7 +165,7 @@ class Economist(BasicNewsRecipe):
if el is not None and el.contents: if el is not None and el.contents:
for descendant in el.contents: for descendant in el.contents:
if isinstance(descendant, NavigableString): if isinstance(descendant, NavigableString):
result.append(unicode(descendant)) result.append(type(u'')(descendant))
article.summary = u'. '.join(result) + u'.' article.summary = u'. '.join(result) + u'.'
article.text_summary = clean_ascii_chars(article.summary) article.text_summary = clean_ascii_chars(article.summary)

View File

@ -154,7 +154,7 @@ class Economist(BasicNewsRecipe):
p.remove(noscript[0]) p.remove(noscript[0])
for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'): for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
x.getparent().remove(x) x.getparent().remove(x)
raw = etree.tostring(root, encoding=unicode) raw = etree.tostring(root, encoding='unicode')
return raw return raw
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
@ -165,7 +165,7 @@ class Economist(BasicNewsRecipe):
if el is not None and el.contents: if el is not None and el.contents:
for descendant in el.contents: for descendant in el.contents:
if isinstance(descendant, NavigableString): if isinstance(descendant, NavigableString):
result.append(unicode(descendant)) result.append(type(u'')(descendant))
article.summary = u'. '.join(result) + u'.' article.summary = u'. '.join(result) + u'.'
article.text_summary = clean_ascii_chars(article.summary) article.text_summary = clean_ascii_chars(article.summary)

View File

@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe):
'ul.inline {padding:0px;} .vertical-align {display: inline-block;}') 'ul.inline {padding:0px;} .vertical-align {display: inline-block;}')
preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags... preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags...
(re.compile(u'(?:<sup>)?\\(kliknij\\,\\ aby powiększyć\\)(?:</sup>)?', re.IGNORECASE), lambda m: ''), (re.compile(u'(?:<sup>)?\\(kliknij\\,\\ aby powiększyć\\)(?:</sup>)?', re.IGNORECASE), lambda m: ''),
(re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />') (re.compile(type(u'')(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />')
] ]
remove_tags = [dict(attrs={'class':['infoParent', 'likeBar', remove_tags = [dict(attrs={'class':['infoParent', 'likeBar',
'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})] 'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})]

View File

@ -33,7 +33,7 @@ class FirstThings(BasicNewsRecipe):
''' '''
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):
return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding=unicode) return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding='unicode')
def parse_index(self): def parse_index(self):
soup = self.index_to_soup(self.INDEX) soup = self.index_to_soup(self.INDEX)

View File

@ -32,7 +32,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
# rules for wyborcza.biz # rules for wyborcza.biz
preprocess_regexps.append((re.compile( preprocess_regexps.append((re.compile(
u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\.?<br>', re.DOTALL), lambda m: '')) u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\\.?<br>', re.DOTALL), lambda m: ''))
feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'),
(u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'),

View File

@ -11,7 +11,7 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
__author__ = 'Anonymous' __author__ = 'Anonymous'
language = 'en_GB' language = 'en_GB'
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': 'articles_footer', 'class': 'printoptions'})] dict(name='div', attrs={'class': ['articles_footer', 'printoptions']})]
def print_version(self, url): def print_version(self, url):
return url + '/print/1' return url + '/print/1'

View File

@ -49,9 +49,9 @@ def solve_captcha(captcha):
# Parse into parts # Parse into parts
pattern = re.compile( pattern = re.compile(
u'(?P<first_component>[0-9]+)?' u'(?P<first_component>[0-9]+)?'
u'\s*(?P<operator>[+×])\s*' u'\\s*(?P<operator>[+×])\\s*'
u'(?P<second_component>[0-9]+)' u'(?P<second_component>[0-9]+)'
u'\s*(=)\s*' u'\\s*(=)\\s*'
u'(?P<result>[0-9]+)?', re.UNICODE) u'(?P<result>[0-9]+)?', re.UNICODE)
calculationParts = re.search(pattern, numeric_problem) calculationParts = re.search(pattern, numeric_problem)
@ -230,7 +230,7 @@ class Granta(BasicNewsRecipe):
if image is not None and image.attrs is not None: if image is not None and image.attrs is not None:
style = dict(image.attrs)['style'] style = dict(image.attrs)['style']
if style is not None: if style is not None:
m = re.search('url\(([^\)]*)\)', style) m = re.search(r'url\(([^\)]*)\)', style)
if m.group(1) is not None: if m.group(1) is not None:
stripstyle(image) stripstyle(image)
image.name = 'img' image.name = 'img'

View File

@ -67,7 +67,7 @@ class Guardian(BasicNewsRecipe):
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):
import html5lib import html5lib
from lxml import html from lxml import html
return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding=unicode) return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding='unicode')
def preprocess_html(self, soup): def preprocess_html(self, soup):
for img in soup.findAll('img', srcset=True): for img in soup.findAll('img', srcset=True):

View File

@ -18,5 +18,6 @@ class AdvancedUserRecipe1336289226(BasicNewsRecipe):
__author__ = 'faber1971' __author__ = 'faber1971'
language = 'it' language = 'it'
__version__ = 'v1.0' __version__ = 'v1.0'
__date__ = '6, May 2012' __date__ = '6, May 2012'

View File

@ -160,7 +160,7 @@ class HoustonChronicle(BasicNewsRecipe):
result = [] result = []
for descendant in el.contents: for descendant in el.contents:
if isinstance(descendant, NavigableString): if isinstance(descendant, NavigableString):
result.append(unicode(descendant).strip()) result.append(type(u'')(descendant).strip())
all_text = u' '.join(result).encode('utf-8') all_text = u' '.join(result).encode('utf-8')
if len(all_text) > 1: if len(all_text) > 1:
sentences = re.findall(sentence_regex, all_text) sentences = re.findall(sentence_regex, all_text)

View File

@ -33,7 +33,7 @@ class jazzpress(BasicNewsRecipe):
# find the link # find the link
epublink = browser.find_link( epublink = browser.find_link(
url_regex=re.compile('e_jazzpress\d\d\d\d\_epub')) url_regex=re.compile(r'e_jazzpress\d\d\d\d\_epub'))
# download ebook # download ebook
self.report_progress(0, _('Downloading ePUB')) self.report_progress(0, _('Downloading ePUB'))

View File

@ -15,5 +15,6 @@ class AdvancedUserRecipe1336504510(BasicNewsRecipe):
description = 'News about Juventus from La Stampa' description = 'News about Juventus from La Stampa'
__author__ = 'faber1971' __author__ = 'faber1971'
__version__ = 'v1.0' __version__ = 'v1.0'
__date__ = '8, May 2012' __date__ = '8, May 2012'

View File

@ -11,5 +11,7 @@ class AdvancedUserRecipe1326135232(BasicNewsRecipe):
feeds = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')] feeds = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')]
__author__ = 'faber1971' __author__ = 'faber1971'
__version__ = 'v1.0' __version__ = 'v1.0'
__date__ = '9, January 2011' __date__ = '9, January 2011'

View File

@ -31,7 +31,7 @@ class AListApart (BasicNewsRecipe):
] ]
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):
if re.findall('alistapart\.com', url): if re.findall(r'alistapart\.com', url):
return 'http:' + url return 'http:' + url
else: else:
return url return url

View File

@ -1147,7 +1147,7 @@ class MPRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div') doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')

View File

@ -985,7 +985,7 @@ class MPRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div') doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')

View File

@ -985,7 +985,7 @@ class MPRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div') doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')

View File

@ -70,7 +70,7 @@ class ModorosBlogHu(BasicNewsRecipe):
past_items = set() past_items = set()
if os.path.exists(feed_fn): if os.path.exists(feed_fn):
with file(feed_fn) as f: with open(feed_fn) as f:
for h in f: for h in f:
past_items.add(h.strip()) past_items.add(h.strip())
@ -87,7 +87,7 @@ class ModorosBlogHu(BasicNewsRecipe):
cur_items.add(item_hash) cur_items.add(item_hash)
if item_hash in past_items: if item_hash in past_items:
feed.articles.remove(article) feed.articles.remove(article)
with file(feed_fn, 'w') as f: with open(feed_fn, 'w') as f:
for h in cur_items: for h in cur_items:
f.write(h + '\n') f.write(h + '\n')

View File

@ -24,7 +24,7 @@ class FocusRecipe(BasicNewsRecipe):
simultaneous_downloads = 2 simultaneous_downloads = 2
r = re.compile( r = re.compile(
'.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*') r'.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
keep_only_tags = [] keep_only_tags = []
keep_only_tags.append(dict(name='div', attrs={'class': 'artykul'})) keep_only_tags.append(dict(name='div', attrs={'class': 'artykul'}))
remove_tags = [dict(name='ul', attrs={'class': 'socialStuff'})] remove_tags = [dict(name='ul', attrs={'class': 'socialStuff'})]

View File

@ -7,6 +7,7 @@ class AdvancedUserRecipe1360354988(BasicNewsRecipe):
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = True auto_cleanup = True
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -93,7 +93,7 @@ class Newsweek(BasicNewsRecipe):
strs.append("".join(str(content))) strs.append("".join(str(content)))
# return contents as a string # return contents as a string
return unicode("".join(strs)) return u"".join(strs)
# #
# Articles can be divided into several pages, this method parses them recursevely # Articles can be divided into several pages, this method parses them recursevely
@ -113,7 +113,7 @@ class Newsweek(BasicNewsRecipe):
if page == 0: if page == 0:
title = main_section.find('h1') title = main_section.find('h1')
html = html + unicode(title) html = html + type(u'')(title)
authors = '' authors = ''
authorBox = main_section.find('div', attrs={'class': 'AuthorBox'}) authorBox = main_section.find('div', attrs={'class': 'AuthorBox'})
@ -121,10 +121,10 @@ class Newsweek(BasicNewsRecipe):
authorH4 = authorBox.find('h4') authorH4 = authorBox.find('h4')
if authorH4 is not None: if authorH4 is not None:
authors = self.tag_to_string(authorH4) authors = self.tag_to_string(authorH4)
html = html + unicode(authors) html = html + type(u'')(authors)
info = main_section.find('p', attrs={'class': 'lead'}) info = main_section.find('p', attrs={'class': 'lead'})
html = html + unicode(info) html = html + type(u'')(info)
html = html + self.get_article_divs( html = html + self.get_article_divs(
'3917dc34e07c9c7180df2ea9ef103361845c8af42b71f51b960059226090a1ac articleStart', main_section) '3917dc34e07c9c7180df2ea9ef103361845c8af42b71f51b960059226090a1ac articleStart', main_section)

View File

@ -14,5 +14,7 @@ class AdvancedUserRecipe1335362999(BasicNewsRecipe):
description = 'An Italian satirical blog' description = 'An Italian satirical blog'
language = 'it' language = 'it'
__author__ = 'faber1971' __author__ = 'faber1971'
__version__ = 'v1.0' __version__ = 'v1.0'
__date__ = '24, April 2012' __date__ = '24, April 2012'

View File

@ -94,7 +94,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
past_items = set() past_items = set()
if os.path.exists(feed_fn): if os.path.exists(feed_fn):
with file(feed_fn) as f: with open(feed_fn) as f:
for h in f: for h in f:
past_items.add(h.strip()) past_items.add(h.strip())
@ -111,7 +111,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe):
cur_items.add(item_hash) cur_items.add(item_hash)
if item_hash in past_items: if item_hash in past_items:
feed.articles.remove(article) feed.articles.remove(article)
with file(feed_fn, 'w') as f: with open(feed_fn, 'w') as f:
for h in cur_items: for h in cur_items:
f.write(h + '\n') f.write(h + '\n')

View File

@ -188,7 +188,7 @@ class OReillyPremium(BasicNewsRecipe):
# feeds = self.parse_feeds() # feeds = self.parse_feeds()
# Now add regular feeds. # Now add regular feeds.
feedsRSS = self.parse_feeds() feedsRSS = self.parse_feeds()
print ("feedsRSS is type " + feedsRSS.__class__.__name__) print("feedsRSS is type " + feedsRSS.__class__.__name__)
for articles in feedsRSS: for articles in feedsRSS:
print("articles is type " + articles.__class__.__name__) print("articles is type " + articles.__class__.__name__)

View File

@ -65,7 +65,7 @@ class OurDailyBread(BasicNewsRecipe):
hr = div.makeelement('hr') hr = div.makeelement('hr')
div.insert(0, hr) div.insert(0, hr)
# print html.tostring(div) # print html.tostring(div)
raw = html.tostring(root, encoding=unicode) raw = html.tostring(root, encoding='unicode')
return raw return raw
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -58,7 +58,7 @@ class Pagina12(BasicNewsRecipe):
seen_titles = set([]) seen_titles = set([])
for section in soup.findAll('div', 'seccionx'): for section in soup.findAll('div', 'seccionx'):
numero += 1 numero += 1
print (numero) print(numero)
section_title = self.tag_to_string(section.find( section_title = self.tag_to_string(section.find(
'div', 'desplegable_titulo on_principal right')) 'div', 'desplegable_titulo on_principal right'))
self.log('Found section:', section_title) self.log('Found section:', section_title)

View File

@ -11,5 +11,7 @@ class AdvancedUserRecipe1326135591(BasicNewsRecipe):
feeds = [(u'Pambianco', u'http://feeds.feedburner.com/pambianconews/YGXu')] feeds = [(u'Pambianco', u'http://feeds.feedburner.com/pambianconews/YGXu')]
__author__ = 'faber1971' __author__ = 'faber1971'
__version__ = 'v1.0' __version__ = 'v1.0'
__date__ = '9, January 2011' __date__ = '9, January 2011'

View File

@ -28,7 +28,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
('Eastern Arsenal', 'https://www.popsci.com/rss-eastern-arsenal.xml'), ('Eastern Arsenal', 'https://www.popsci.com/rss-eastern-arsenal.xml'),
] ]
pane_node_body = re.compile('pane-node-(?:\w+-){0,9}body') pane_node_body = re.compile('pane-node-(?:\\w+-){0,9}body')
keep_only_tags = [ keep_only_tags = [
dict(attrs={'class': lambda x: x and frozenset('pane-node-header'.split()).issubset(frozenset(x.split()))}), dict(attrs={'class': lambda x: x and frozenset('pane-node-header'.split()).issubset(frozenset(x.split()))}),

View File

@ -205,7 +205,7 @@ class Pocket(BasicNewsRecipe):
""" """
try: try:
from calibre.ebooks.covers import calibre_cover2 from calibre.ebooks.covers import calibre_cover2
title = self.title if isinstance(self.title, unicode) else \ title = self.title if isinstance(self.title, type(u'')) else \
self.title.decode('utf-8', 'replace') self.title.decode('utf-8', 'replace')
date = strftime(self.timefmt) date = strftime(self.timefmt)
time = strftime('[%I:%M %p]') time = strftime('[%I:%M %p]')

View File

@ -32,5 +32,5 @@ class RebelionRecipe (BasicNewsRecipe):
# See http://www.mobileread.com/forums/showthread.php?t=174501 # See http://www.mobileread.com/forums/showthread.php?t=174501
def print_version(self, url): def print_version(self, url):
id = re.compile('\d*$').search(url).group() id = re.compile(r'\d*$').search(url).group()
return u'http://www.rebelion.org/noticia.php?id=%s' % id return u'http://www.rebelion.org/noticia.php?id=%s' % id

View File

@ -107,7 +107,7 @@ class respektRecipe(BasicNewsRecipe):
self.browser.open('https://www.respekt.cz/?do=logout') self.browser.open('https://www.respekt.cz/?do=logout')
def preprocess_html(self,soup): def preprocess_html(self,soup):
raw = u''.join(unicode(a) for a in soup.contents) raw = u''.join(type(u'')(a) for a in soup.contents)
root = lxml.html.fromstring(raw) root = lxml.html.fromstring(raw)
# Fix Letem světem # Fix Letem světem
if "Letem sv" in root.xpath("//title")[0].text: if "Letem sv" in root.xpath("//title")[0].text:
@ -169,4 +169,4 @@ class respektRecipe(BasicNewsRecipe):
o.getparent().replace(o,e) o.getparent().replace(o,e)
except: except:
pass pass
return(BeautifulSoup(lxml.etree.tostring(root,encoding=unicode))) return(BeautifulSoup(lxml.etree.tostring(root,encoding='unicode')))

View File

@ -31,8 +31,8 @@ class RevistaMuyInteresante(BasicNewsRecipe):
for img_tag in soup.findAll('img'): for img_tag in soup.findAll('img'):
imagen = img_tag imagen = img_tag
new_tag = new_tag(soup, 'p') nt = new_tag(soup, 'p')
img_tag.replaceWith(new_tag) img_tag.replaceWith(nt)
div = soup.find(attrs={'class': 'article_category'}) div = soup.find(attrs={'class': 'article_category'})
div.insert(0, imagen) div.insert(0, imagen)
break break

View File

@ -497,7 +497,7 @@ class STHKRecipe(BasicNewsRecipe):
doctype='xhtml').decode('utf-8')).find('div') doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem) body.insert(len(body.contents), elem)
with open(last, 'wb') as fi: with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8')) fi.write(type(u'')(soup).encode('utf-8'))
if len(feeds) == 0: if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.') raise Exception('All feeds are empty, aborting.')

View File

@ -59,9 +59,9 @@ class SolHaberRecipe(BasicNewsRecipe):
cover_margins = (20, 20, '#ffffff') cover_margins = (20, 20, '#ffffff')
storybody_reg_exp = '^\s*(haber|kose)\s*$' storybody_reg_exp = r'^\s*(haber|kose)\s*$'
comments_reg_exp = '^\s*makale-elestiri\s*$' comments_reg_exp = r'^\s*makale-elestiri\s*$'
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class': re.compile(comments_reg_exp, re.IGNORECASE)})] dict(name='div', attrs={'class': re.compile(comments_reg_exp, re.IGNORECASE)})]

View File

@ -14,7 +14,7 @@ class tanuki(BasicNewsRecipe):
autocleanup = True autocleanup = True
extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}' extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}'
preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile( preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile(
unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')] type(u'')(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')]
remove_empty_feeds = True remove_empty_feeds = True
no_stylesheets = True no_stylesheets = True
keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa

View File

@ -99,7 +99,7 @@ class TheAge(BasicNewsRecipe):
# Collapse the paragraph by joining the non-tag contents # Collapse the paragraph by joining the non-tag contents
contents = [i for i in p.contents if isinstance(i, unicode)] contents = [i for i in p.contents if isinstance(i, type(u''))]
if len(contents): if len(contents):
contents = ''.join(contents) contents = ''.join(contents)

View File

@ -9,6 +9,7 @@ def classes(classes):
q = frozenset(classes.split(' ')) q = frozenset(classes.split(' '))
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe

View File

@ -108,7 +108,7 @@ class TimesOnline(BasicNewsRecipe):
return html.tostring( return html.tostring(
html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False),
method='html', method='html',
encoding=unicode) encoding='unicode')
def preprocess_html(self, soup): def preprocess_html(self, soup):
for item in soup.findAll(style=True): for item in soup.findAll(style=True):

View File

@ -17,5 +17,6 @@ class AdvancedUserRecipe1334935485(BasicNewsRecipe):
language = 'it' language = 'it'
__author__ = 'faber1971' __author__ = 'faber1971'
__version__ = 'v1.0' __version__ = 'v1.0'
__date__ = '24, April 2012' __date__ = '24, April 2012'

View File

@ -137,7 +137,7 @@ class ZAOBAO(BasicNewsRecipe):
# workaorund a strange problem: Somethimes the xml encoding is not # workaorund a strange problem: Somethimes the xml encoding is not
# apllied correctly by parse() # apllied correctly by parse()
weired_encoding_detected = False weired_encoding_detected = False
if not isinstance(feed.description, unicode) and self.encoding and feed.description: if not isinstance(feed.description, type(u'')) and self.encoding and feed.description:
self.log( self.log(
_('Feed %s is not encoded correctly, manually replace it') % (feed.title)) _('Feed %s is not encoded correctly, manually replace it') % (feed.title))
feed.description = feed.description.decode( feed.description = feed.description.decode(
@ -150,14 +150,14 @@ class ZAOBAO(BasicNewsRecipe):
weired_encoding_detected = True weired_encoding_detected = True
for a, article in enumerate(feed): for a, article in enumerate(feed):
if not isinstance(article.title, unicode) and self.encoding: if not isinstance(article.title, type(u'')) and self.encoding:
article.title = article.title.decode( article.title = article.title.decode(
self.encoding, 'replace') self.encoding, 'replace')
if not isinstance(article.summary, unicode) and self.encoding and article.summary: if not isinstance(article.summary, type(u'')) and self.encoding and article.summary:
article.summary = article.summary.decode( article.summary = article.summary.decode(
self.encoding, 'replace') self.encoding, 'replace')
article.text_summary = article.summary article.text_summary = article.summary
if not isinstance(article.text_summary, unicode) and self.encoding and article.text_summary: if not isinstance(article.text_summary, type(u'')) and self.encoding and article.text_summary:
article.text_summary = article.text_summary.decode( article.text_summary = article.text_summary.decode(
self.encoding, 'replace') self.encoding, 'replace')
article.summary = article.text_summary article.summary = article.text_summary

View File

@ -77,12 +77,14 @@ class Check(Command):
def file_has_errors(self, f): def file_has_errors(self, f):
ext = os.path.splitext(f)[1] ext = os.path.splitext(f)[1]
if ext in {'.py', '.recipe'}: if ext in {'.py', '.recipe'}:
p = subprocess.Popen(['flake8-python2', '--filename', '*.py,*.recipe', f]) p1 = subprocess.Popen(['flake8-python2', '--filename', '*.py,*.recipe', f])
return p.wait() != 0 p2 = subprocess.Popen(['flake8', '--filename', '*.py,*.recipe', f])
elif ext == '.pyj': codes = p1.wait(), p2.wait()
return codes != (0, 0)
if ext == '.pyj':
p = subprocess.Popen(['rapydscript', 'lint', f]) p = subprocess.Popen(['rapydscript', 'lint', f])
return p.wait() != 0 return p.wait() != 0
elif ext == '.yaml': if ext == '.yaml':
sys.path.insert(0, self.wn_path) sys.path.insert(0, self.wn_path)
import whats_new import whats_new
whats_new.render_changelog(self.j(self.d(self.SRC), 'Changelog.yaml')) whats_new.render_changelog(self.j(self.d(self.SRC), 'Changelog.yaml'))

View File

@ -16,7 +16,7 @@ from calibre.constants import ispy3
from calibre.customize import (Plugin, numeric_version, platform, from calibre.customize import (Plugin, numeric_version, platform,
InvalidPlugin, PluginNotFound) InvalidPlugin, PluginNotFound)
from polyglot.builtins import (itervalues, map, string_or_bytes, from polyglot.builtins import (itervalues, map, string_or_bytes,
unicode_type) unicode_type, reload)
# PEP 302 based plugin loading mechanism, works around the bug in zipimport in # PEP 302 based plugin loading mechanism, works around the bug in zipimport in
# python 2.x that prevents importing from zip files in locations whose paths # python 2.x that prevents importing from zip files in locations whose paths

View File

@ -17,7 +17,7 @@ from polyglot.builtins import (iteritems, itervalues,
from calibre import isbytestring, force_unicode, prints, as_unicode from calibre import isbytestring, force_unicode, prints, as_unicode
from calibre.constants import (iswindows, filesystem_encoding, from calibre.constants import (iswindows, filesystem_encoding,
preferred_encoding) preferred_encoding, ispy3)
from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile
from calibre.db import SPOOL_SIZE from calibre.db import SPOOL_SIZE
from calibre.db.schema_upgrades import SchemaUpgrade from calibre.db.schema_upgrades import SchemaUpgrade
@ -209,9 +209,14 @@ def Concatenate(sep=','):
ctxt.append(value) ctxt.append(value)
def finalize(ctxt): def finalize(ctxt):
if not ctxt: try:
return None if not ctxt:
return sep.join(ctxt) return None
return sep.join(ctxt)
except Exception:
import traceback
traceback.print_exc()
raise
return ([], step, finalize) return ([], step, finalize)
@ -224,9 +229,14 @@ def SortedConcatenate(sep=','):
ctxt[ndx] = value ctxt[ndx] = value
def finalize(ctxt): def finalize(ctxt):
if len(ctxt) == 0: try:
return None if len(ctxt) == 0:
return sep.join(map(ctxt.get, sorted(ctxt))) return None
return sep.join(map(ctxt.get, sorted(ctxt)))
except Exception:
import traceback
traceback.print_exc()
raise
return ({}, step, finalize) return ({}, step, finalize)
@ -238,7 +248,12 @@ def IdentifiersConcat():
ctxt.append(u'%s:%s'%(key, val)) ctxt.append(u'%s:%s'%(key, val))
def finalize(ctxt): def finalize(ctxt):
return ','.join(ctxt) try:
return ','.join(ctxt)
except Exception:
import traceback
traceback.print_exc()
raise
return ([], step, finalize) return ([], step, finalize)
@ -251,13 +266,18 @@ def AumSortedConcatenate():
ctxt[ndx] = ':::'.join((author, sort, link)) ctxt[ndx] = ':::'.join((author, sort, link))
def finalize(ctxt): def finalize(ctxt):
keys = list(ctxt) try:
l = len(keys) keys = list(ctxt)
if l == 0: l = len(keys)
return None if l == 0:
if l == 1: return None
return ctxt[keys[0]] if l == 1:
return ':#:'.join([ctxt[v] for v in sorted(keys)]) return ctxt[keys[0]]
return ':#:'.join([ctxt[v] for v in sorted(keys)])
except Exception:
import traceback
traceback.print_exc()
raise
return ({}, step, finalize) return ({}, step, finalize)
@ -1724,8 +1744,13 @@ class DB(object):
[(book_id, fmt.upper()) for book_id in book_ids]) [(book_id, fmt.upper()) for book_id in book_ids])
def set_conversion_options(self, options, fmt): def set_conversion_options(self, options, fmt):
options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data))) def map_data(x):
for book_id, data in iteritems(options)] x = x.encode('utf-8') if isinstance(x, unicode_type) else x
x = pickle_binary_string(x)
if not ispy3:
x = buffer(x) # noqa
return x
options = [(book_id, fmt.upper(), map_data(data)) for book_id, data in iteritems(options)]
self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options) self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options)
def get_top_level_move_items(self, all_paths): def get_top_level_move_items(self, all_paths):

View File

@ -237,20 +237,20 @@ class DebugRWLockWrapper(RWLockWrapper):
RWLockWrapper.__init__(self, *args, **kwargs) RWLockWrapper.__init__(self, *args, **kwargs)
def acquire(self): def acquire(self):
print ('#' * 120, file=sys.stderr) print('#' * 120, file=sys.stderr)
print ('acquire called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr) print('acquire called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr)
traceback.print_stack() traceback.print_stack()
RWLockWrapper.acquire(self) RWLockWrapper.acquire(self)
print ('acquire done: thread id:', current_thread(), file=sys.stderr) print('acquire done: thread id:', current_thread(), file=sys.stderr)
print ('_' * 120, file=sys.stderr) print('_' * 120, file=sys.stderr)
def release(self, *args): def release(self, *args):
print ('*' * 120, file=sys.stderr) print('*' * 120, file=sys.stderr)
print ('release called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr) print('release called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr)
traceback.print_stack() traceback.print_stack()
RWLockWrapper.release(self) RWLockWrapper.release(self)
print ('release done: thread id:', current_thread(), 'is_shared:', self._shlock.is_shared, 'is_exclusive:', self._shlock.is_exclusive, file=sys.stderr) print('release done: thread id:', current_thread(), 'is_shared:', self._shlock.is_shared, 'is_exclusive:', self._shlock.is_exclusive, file=sys.stderr)
print ('_' * 120, file=sys.stderr) print('_' * 120, file=sys.stderr)
__enter__ = acquire __enter__ = acquire
__exit__ = release __exit__ = release

View File

@ -15,6 +15,7 @@ def find_tests():
base = os.path.dirname(os.path.abspath(__file__)) base = os.path.dirname(os.path.abspath(__file__))
return find_tests_in_dir(base) return find_tests_in_dir(base)
if __name__ == '__main__': if __name__ == '__main__':
try: try:
import init_calibre # noqa import init_calibre # noqa

View File

@ -712,3 +712,14 @@ class ReadingTest(BaseTest):
cache.set_last_read_position(1, 'EPUB', 'user', 'device') cache.set_last_read_position(1, 'EPUB', 'user', 'device')
self.assertFalse(cache.get_last_read_positions(1, 'ePuB', 'user')) self.assertFalse(cache.get_last_read_positions(1, 'ePuB', 'user'))
# }}} # }}}
def test_storing_conversion_options(self): # {{{
cache = self.init_cache(self.library_path)
opts = {1: b'binary', 2: 'unicode'}
cache.set_conversion_options(opts, 'PIPE')
for book_id, val in iteritems(opts):
got = cache.conversion_options(book_id, 'PIPE')
if not isinstance(val, bytes):
val = val.encode('utf-8')
self.assertEqual(got, val)
# }}}

View File

@ -45,11 +45,11 @@ class HANLINV3(USBMS):
card = names.get('carda', None) card = names.get('carda', None)
try: try:
main_num = int(re.findall('\d+', main)[0]) if main else None main_num = int(re.findall(r'\d+', main)[0]) if main else None
except: except:
main_num = None main_num = None
try: try:
card_num = int(re.findall('\d+', card)[0]) if card else None card_num = int(re.findall(r'\d+', card)[0]) if card else None
except: except:
card_num = None card_num = None

View File

@ -3081,7 +3081,6 @@ class KOBOTOUCH(KOBO):
update_values.append(newmi.isbn) update_values.append(newmi.isbn)
set_clause += ', ISBN = ? ' set_clause += ', ISBN = ? '
library_language = normalize_languages(kobo_metadata.languages, newmi.languages) library_language = normalize_languages(kobo_metadata.languages, newmi.languages)
library_language = library_language[0] if library_language is not None and len(library_language) > 0 else None library_language = library_language[0] if library_language is not None and len(library_language) > 0 else None
if not (library_language == kobo_metadata.language): if not (library_language == kobo_metadata.language):

View File

@ -196,8 +196,8 @@ class MTP_DEVICE(MTPDeviceBase):
p = plugins['libmtp'] p = plugins['libmtp']
self.libmtp = p[0] self.libmtp = p[0]
if self.libmtp is None: if self.libmtp is None:
print ('Failed to load libmtp, MTP device detection disabled') print('Failed to load libmtp, MTP device detection disabled')
print (p[1]) print(p[1])
else: else:
self.known_devices = frozenset(self.libmtp.known_devices()) self.known_devices = frozenset(self.libmtp.known_devices())

View File

@ -143,7 +143,7 @@ class PRST1(USBMS):
main, carda, cardb = self.find_device_nodes(detected_device=dev) main, carda, cardb = self.find_device_nodes(detected_device=dev)
if main is None and carda is None and cardb is None: if main is None and carda is None and cardb is None:
if debug: if debug:
print ('\tPRS-T1: Appears to be in non data mode' print('\tPRS-T1: Appears to be in non data mode'
' or was ejected, ignoring') ' or was ejected, ignoring')
return False return False
return True return True

View File

@ -701,7 +701,7 @@ class Device(DeviceConfig, DevicePlugin):
except dbus.exceptions.DBusException as e: except dbus.exceptions.DBusException as e:
print(e) print(e)
continue continue
except dbus.exceptions.DBusException as e: except dbus.exceptions.DBusException:
continue continue
vols.sort(key=lambda x: x['node']) vols.sort(key=lambda x: x['node'])

View File

@ -773,7 +773,7 @@ def get_drive_letters_for_device_single(usbdev, storage_number_map, debug=False)
if debug: if debug:
try: try:
devid = get_device_id(devinfo.DevInst)[0] devid = get_device_id(devinfo.DevInst)[0]
except Exception as err: except Exception:
devid = 'Unknown' devid = 'Unknown'
try: try:
storage_number = get_storage_number(devpath) storage_number = get_storage_number(devpath)

View File

@ -13,5 +13,6 @@ import sys
def main(args=sys.argv): def main(args=sys.argv):
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main()) sys.exit(main())

View File

@ -23,6 +23,7 @@ class ConversionUserFeedBack(Exception):
self.title, self.msg, self.det_msg = title, msg, det_msg self.title, self.msg, self.det_msg = title, msg, det_msg
self.level = level self.level = level
# Ensure exception uses fully qualified name as this is used to detect it in # Ensure exception uses fully qualified name as this is used to detect it in
# the GUI. # the GUI.
ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack') ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack')

View File

@ -46,10 +46,10 @@ class DjvuChunk(object):
# self.headersize += 4 # self.headersize += 4
self.datastart = pos self.datastart = pos
if verbose > 0: if verbose > 0:
print ('found', self.type, self.subtype, pos, self.size) print('found', self.type, self.subtype, pos, self.size)
if self.type in b'FORM'.split(): if self.type in b'FORM'.split():
if verbose > 0: if verbose > 0:
print ('processing substuff %d %d (%x)' % (pos, self.dataend, print('processing substuff %d %d (%x)' % (pos, self.dataend,
self.dataend)) self.dataend))
numchunks = 0 numchunks = 0
while pos < self.dataend: while pos < self.dataend:
@ -58,11 +58,11 @@ class DjvuChunk(object):
self._subchunks.append(x) self._subchunks.append(x)
newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0) newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0)
if verbose > 0: if verbose > 0:
print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend, print('newpos %d %d (%x, %x) %d' % (newpos, self.dataend,
newpos, self.dataend, x.headersize)) newpos, self.dataend, x.headersize))
pos = newpos pos = newpos
if verbose > 0: if verbose > 0:
print (' end of chunk %d (%x)' % (pos, pos)) print(' end of chunk %d (%x)' % (pos, pos))
def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100): def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100):
if out: if out:
@ -89,7 +89,7 @@ class DjvuChunk(object):
l <<= 8 l <<= 8
l += ord(x) l += ord(x)
if verbose > 0 and out: if verbose > 0 and out:
print (l, file=out) print(l, file=out)
txtout.write(res[3:3+l]) txtout.write(res[3:3+l])
txtout.write(b'\037') txtout.write(b'\037')
if txtout and self.type == b'TXTa': if txtout and self.type == b'TXTa':
@ -99,7 +99,7 @@ class DjvuChunk(object):
l <<= 8 l <<= 8
l += ord(x) l += ord(x)
if verbose > 0 and out: if verbose > 0 and out:
print (l, file=out) print(l, file=out)
txtout.write(res[3:3+l]) txtout.write(res[3:3+l])
txtout.write(b'\037') txtout.write(b'\037')
if indent >= maxlevel: if indent >= maxlevel:
@ -126,7 +126,8 @@ class DJVUFile(object):
def main(): def main():
f = DJVUFile(open(sys.argv[-1], 'rb')) f = DJVUFile(open(sys.argv[-1], 'rb'))
print (f.get_text(sys.stdout)) print(f.get_text(sys.stdout))
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -735,9 +735,9 @@ class BZZDecoder():
def main(): def main():
import sys import sys
from calibre.constants import plugins from calibre.constants import plugins
raw = file(sys.argv[1], "rb").read() raw = open(sys.argv[1], "rb").read()
d = plugins['bzzdec'][0] d = plugins['bzzdec'][0]
print (d.decompress(raw)) print(d.decompress(raw))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -197,7 +197,7 @@ def cfi_sort_key(cfi, only_path=True):
return () return ()
if not pcfi: if not pcfi:
import sys import sys
print ('Failed to parse CFI: %r' % pcfi, file=sys.stderr) print('Failed to parse CFI: %r' % pcfi, file=sys.stderr)
return () return ()
steps = get_steps(pcfi) steps = get_steps(pcfi)
step_nums = tuple(s.get('num', 0) for s in steps) step_nums = tuple(s.get('num', 0) for s in steps)
@ -217,7 +217,7 @@ def decode_cfi(root, cfi):
return return
if not pcfi: if not pcfi:
import sys import sys
print ('Failed to parse CFI: %r' % pcfi, file=sys.stderr) print('Failed to parse CFI: %r' % pcfi, file=sys.stderr)
return return
steps = get_steps(pcfi) steps = get_steps(pcfi)
ans = root ans = root

View File

@ -380,7 +380,7 @@ ATTRS35 = {
0x804a: "align", 0x804a: "align",
0x8bbd: "palette", 0x8bbd: "palette",
0x8bbe: "pluginspage", 0x8bbe: "pluginspage",
0x8bbf: "codebase", # 0x8bbf: "codebase",
0x8bbf: "src", 0x8bbf: "src",
0x8bc1: "units", 0x8bc1: "units",
0x8bc2: "type", 0x8bc2: "type",
@ -640,7 +640,7 @@ ATTRS66 = {
0x03f5: "n", 0x03f5: "n",
} }
ATTRS71 = { ATTRS71 = {
0x8000: "border", # 0x8000: "border",
0x8000: "usemap", 0x8000: "usemap",
0x8001: "name", 0x8001: "name",
0x8006: "width", 0x8006: "width",
@ -682,8 +682,8 @@ ATTRS74 = {
0x9399: "clear", 0x9399: "clear",
} }
ATTRS75 = { ATTRS75 = {
0x8000: "name", # 0x8000: "name",
0x8000: "value", # 0x8000: "value",
0x8000: "type", 0x8000: "type",
} }
ATTRS76 = { ATTRS76 = {

View File

@ -96,8 +96,8 @@ NAME_MAP = {
u'yellowgreen': u'#9ACD32' u'yellowgreen': u'#9ACD32'
} }
hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})') hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})')
rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) rgb_pat = re.compile(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE)
def lrs_color(html_color): def lrs_color(html_color):
@ -111,5 +111,3 @@ def lrs_color(html_color):
if hcol in NAME_MAP: if hcol in NAME_MAP:
return NAME_MAP[hcol].replace('#', '0x00') return NAME_MAP[hcol].replace('#', '0x00')
return '0x00000000' return '0x00000000'

View File

@ -490,7 +490,7 @@ class LrfFileStream(LrfStreamBase):
def __init__(self, streamFlags, filename): def __init__(self, streamFlags, filename):
LrfStreamBase.__init__(self, streamFlags) LrfStreamBase.__init__(self, streamFlags)
f = file(filename, "rb") f = open(filename, "rb")
self.streamData = f.read() self.streamData = f.read()
f.close() f.close()
@ -686,7 +686,7 @@ class LrfWriter(object):
self.tocObjId = obj.objId self.tocObjId = obj.objId
def setThumbnailFile(self, filename, encoding=None): def setThumbnailFile(self, filename, encoding=None):
f = file(filename, "rb") f = open(filename, "rb")
self.thumbnailData = f.read() self.thumbnailData = f.read()
f.close() f.close()

View File

@ -2268,7 +2268,7 @@ class ImageStream(LrsObject, LrsContainer):
self.encoding = encoding self.encoding = encoding
def toLrf(self, lrfWriter): def toLrf(self, lrfWriter):
imageFile = file(self.filename, "rb") imageFile = open(self.filename, "rb")
imageData = imageFile.read() imageData = imageFile.read()
imageFile.close() imageFile.close()

View File

@ -77,7 +77,8 @@ def set_metadata(stream, mi):
stream.seek(0) stream.seek(0)
safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements) safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements)
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
with open(sys.argv[-1], 'rb') as stream: with open(sys.argv[-1], 'rb') as stream:
print (get_metadata(stream)) print(get_metadata(stream))

View File

@ -376,7 +376,7 @@ class TestOPF3(unittest.TestCase):
&quot;value&quot;, &quot;#value#&quot;: &quot;value&quot;, &quot;#value#&quot;:
&quot;&lt;div&gt;&lt;b&gt;&lt;i&gt;Testing&lt;/i&gt;&lt;/b&gt; extra &quot;&lt;div&gt;&lt;b&gt;&lt;i&gt;Testing&lt;/i&gt;&lt;/b&gt; extra
&lt;font &lt;font
color=\&quot;#aa0000\&quot;&gt;comments&lt;/font&gt;&lt;/div&gt;&quot;, color=\\&quot;#aa0000\\&quot;&gt;comments&lt;/font&gt;&lt;/div&gt;&quot;,
&quot;is_custom&quot;: true, &quot;label&quot;: &quot;commetns&quot;, &quot;is_custom&quot;: true, &quot;label&quot;: &quot;commetns&quot;,
&quot;table&quot;: &quot;custom_column_13&quot;, &quot;table&quot;: &quot;custom_column_13&quot;,
&quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/> &quot;is_multiple&quot;: null, &quot;is_category&quot;: false}"/>

View File

@ -353,7 +353,7 @@ class Worker(Thread): # Get details {{{
with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4())) + '_', with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4())) + '_',
suffix='.html', delete=False) as f: suffix='.html', delete=False) as f:
f.write(raw) f.write(raw)
print ('Downloaded html for', asin, 'saved in', f.name) print('Downloaded html for', asin, 'saved in', f.name)
try: try:
title = self.parse_title(root) title = self.parse_title(root)
@ -1256,7 +1256,7 @@ class Amazon(Source):
with tempfile.NamedTemporaryFile(prefix='amazon_results_', with tempfile.NamedTemporaryFile(prefix='amazon_results_',
suffix='.html', delete=False) as f: suffix='.html', delete=False) as f:
f.write(raw.encode('utf-8')) f.write(raw.encode('utf-8'))
print ('Downloaded html for results page saved in', f.name) print('Downloaded html for results page saved in', f.name)
matches = [] matches = []
found = '<title>404 - ' not in raw found = '<title>404 - ' not in raw

View File

@ -82,7 +82,7 @@ def main(args=sys.argv):
allowed_plugins=allowed_plugins or None) allowed_plugins=allowed_plugins or None)
if not results: if not results:
print (log, file=sys.stderr) print(log, file=sys.stderr)
prints('No results found', file=sys.stderr) prints('No results found', file=sys.stderr)
raise SystemExit(1) raise SystemExit(1)
result = results[0] result = results[0]
@ -103,9 +103,9 @@ def main(args=sys.argv):
unicode_type(result).encode('utf-8')) unicode_type(result).encode('utf-8'))
if opts.verbose: if opts.verbose:
print (log, file=sys.stderr) print(log, file=sys.stderr)
print (result) print(result)
if not opts.opf and opts.cover: if not opts.opf and opts.cover:
prints('Cover :', cf) prints('Cover :', cf)

View File

@ -8,8 +8,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class MobiError(Exception): class MobiError(Exception):
pass pass
# That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW # That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW
MAX_THUMB_SIZE = 16 * 1024 MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240) MAX_THUMB_DIMEN = (180, 240)

View File

@ -276,7 +276,7 @@ class Tag(object): # {{{
if tag_type in self.TAG_MAP: if tag_type in self.TAG_MAP:
self.attr, self.desc = self.TAG_MAP[tag_type] self.attr, self.desc = self.TAG_MAP[tag_type]
else: else:
print ('Unknown tag value: %%s'%tag_type) print('Unknown tag value: %%s'%tag_type)
self.desc = '??Unknown (tag value: %d)'%tag_type self.desc = '??Unknown (tag value: %d)'%tag_type
self.attr = 'unknown' self.attr = 'unknown'
@ -461,7 +461,7 @@ class CNCX(object): # {{{
except: except:
byts = raw[pos:] byts = raw[pos:]
r = format_bytes(byts) r = format_bytes(byts)
print ('CNCX entry at offset %d has unknown format %s'%( print('CNCX entry at offset %d has unknown format %s'%(
pos+record_offset, r)) pos+record_offset, r))
self.records[pos+record_offset] = r self.records[pos+record_offset] = r
pos = len(raw) pos = len(raw)
@ -629,7 +629,7 @@ class TBSIndexing(object): # {{{
import traceback import traceback
traceback.print_exc() traceback.print_exc()
a = [] a = []
print ('Failed to decode TBS bytes for record: %d'%r.idx) print('Failed to decode TBS bytes for record: %d'%r.idx)
ans += a ans += a
if byts: if byts:
sbyts = tuple(hex(b)[2:] for b in byts) sbyts = tuple(hex(b)[2:] for b in byts)
@ -789,14 +789,14 @@ class MOBIFile(object): # {{{
self.index_record.indices, self.mobi_header.type_raw) self.index_record.indices, self.mobi_header.type_raw)
def print_header(self, f=sys.stdout): def print_header(self, f=sys.stdout):
print (str(self.palmdb).encode('utf-8'), file=f) print(str(self.palmdb).encode('utf-8'), file=f)
print (file=f) print(file=f)
print ('Record headers:', file=f) print('Record headers:', file=f)
for i, r in enumerate(self.records): for i, r in enumerate(self.records):
print ('%6d. %s'%(i, r.header), file=f) print('%6d. %s'%(i, r.header), file=f)
print (file=f) print(file=f)
print (str(self.mobi_header).encode('utf-8'), file=f) print(str(self.mobi_header).encode('utf-8'), file=f)
# }}} # }}}

View File

@ -23,7 +23,7 @@ main_language = {
2 : "BULGARIAN", 2 : "BULGARIAN",
3 : "CATALAN", 3 : "CATALAN",
4 : "CHINESE", 4 : "CHINESE",
26 : "CROATIAN", # 26 : "CROATIAN",
5 : "CZECH", 5 : "CZECH",
6 : "DANISH", 6 : "DANISH",
19 : "DUTCH", 19 : "DUTCH",
@ -91,55 +91,55 @@ main_language = {
sub_language = { sub_language = {
0 : "NEUTRAL", 0 : "NEUTRAL",
1 : "ARABIC_SAUDI_ARABIA", # 1 : "ARABIC_SAUDI_ARABIA",
2 : "ARABIC_IRAQ", # 2 : "ARABIC_IRAQ",
3 : "ARABIC_EGYPT", # 3 : "ARABIC_EGYPT",
4 : "ARABIC_LIBYA", # 4 : "ARABIC_LIBYA",
5 : "ARABIC_ALGERIA", # 5 : "ARABIC_ALGERIA",
6 : "ARABIC_MOROCCO", # 6 : "ARABIC_MOROCCO",
7 : "ARABIC_TUNISIA", # 7 : "ARABIC_TUNISIA",
8 : "ARABIC_OMAN", # 8 : "ARABIC_OMAN",
9 : "ARABIC_YEMEN", # 9 : "ARABIC_YEMEN",
10 : "ARABIC_SYRIA", # 10 : "ARABIC_SYRIA",
11 : "ARABIC_JORDAN", # 11 : "ARABIC_JORDAN",
12 : "ARABIC_LEBANON", # 12 : "ARABIC_LEBANON",
13 : "ARABIC_KUWAIT", # 13 : "ARABIC_KUWAIT",
14 : "ARABIC_UAE", # 14 : "ARABIC_UAE",
15 : "ARABIC_BAHRAIN", # 15 : "ARABIC_BAHRAIN",
16 : "ARABIC_QATAR", # 16 : "ARABIC_QATAR",
1 : "AZERI_LATIN", # 1 : "AZERI_LATIN",
2 : "AZERI_CYRILLIC", # 2 : "AZERI_CYRILLIC",
1 : "CHINESE_TRADITIONAL", # 1 : "CHINESE_TRADITIONAL",
2 : "CHINESE_SIMPLIFIED", # 2 : "CHINESE_SIMPLIFIED",
3 : "CHINESE_HONGKONG", # 3 : "CHINESE_HONGKONG",
4 : "CHINESE_SINGAPORE", # 4 : "CHINESE_SINGAPORE",
1 : "DUTCH", # 1 : "DUTCH",
2 : "DUTCH_BELGIAN", # 2 : "DUTCH_BELGIAN",
1 : "FRENCH", # 1 : "FRENCH",
2 : "FRENCH_BELGIAN", # 2 : "FRENCH_BELGIAN",
3 : "FRENCH_CANADIAN", # 3 : "FRENCH_CANADIAN",
4 : "FRENCH_SWISS", # 4 : "FRENCH_SWISS",
5 : "FRENCH_LUXEMBOURG", # 5 : "FRENCH_LUXEMBOURG",
6 : "FRENCH_MONACO", # 6 : "FRENCH_MONACO",
1 : "GERMAN", # 1 : "GERMAN",
2 : "GERMAN_SWISS", # 2 : "GERMAN_SWISS",
3 : "GERMAN_AUSTRIAN", # 3 : "GERMAN_AUSTRIAN",
4 : "GERMAN_LUXEMBOURG", # 4 : "GERMAN_LUXEMBOURG",
5 : "GERMAN_LIECHTENSTEIN", # 5 : "GERMAN_LIECHTENSTEIN",
1 : "ITALIAN", # 1 : "ITALIAN",
2 : "ITALIAN_SWISS", # 2 : "ITALIAN_SWISS",
1 : "KOREAN", # 1 : "KOREAN",
1 : "LITHUANIAN", # 1 : "LITHUANIAN",
1 : "MALAY_MALAYSIA", # 1 : "MALAY_MALAYSIA",
2 : "MALAY_BRUNEI_DARUSSALAM", # 2 : "MALAY_BRUNEI_DARUSSALAM",
1 : "NORWEGIAN_BOKMAL", # 1 : "NORWEGIAN_BOKMAL",
2 : "NORWEGIAN_NYNORSK", # 2 : "NORWEGIAN_NYNORSK",
2 : "PORTUGUESE", # 2 : "PORTUGUESE",
1 : "PORTUGUESE_BRAZILIAN", # 1 : "PORTUGUESE_BRAZILIAN",
2 : "SERBIAN_LATIN", # 2 : "SERBIAN_LATIN",
3 : "SERBIAN_CYRILLIC", 3 : "SERBIAN_CYRILLIC",
1 : "SPANISH", # 1 : "SPANISH",
2 : "SPANISH_MEXICAN", # 2 : "SPANISH_MEXICAN",
4 : "SPANISH_GUATEMALA", 4 : "SPANISH_GUATEMALA",
5 : "SPANISH_COSTA_RICA", 5 : "SPANISH_COSTA_RICA",
6 : "SPANISH_PANAMA", 6 : "SPANISH_PANAMA",
@ -157,8 +157,8 @@ sub_language = {
18 : "SPANISH_HONDURAS", 18 : "SPANISH_HONDURAS",
19 : "SPANISH_NICARAGUA", 19 : "SPANISH_NICARAGUA",
20 : "SPANISH_PUERTO_RICO", 20 : "SPANISH_PUERTO_RICO",
1 : "SWEDISH", # 1 : "SWEDISH",
2 : "SWEDISH_FINLAND", # 2 : "SWEDISH_FINLAND",
1 : "UZBEK_LATIN", 1 : "UZBEK_LATIN",
2 : "UZBEK_CYRILLIC", 2 : "UZBEK_CYRILLIC",
} }

View File

@ -15,9 +15,8 @@ from xml.sax.saxutils import escape
from lxml import etree from lxml import etree
from calibre.ebooks.oeb.base import XHTML_NS, extract from calibre.ebooks.oeb.base import XHTML_NS, extract
from calibre.constants import ispy3
from calibre.ebooks.mobi.utils import to_base from calibre.ebooks.mobi.utils import to_base
from polyglot.builtins import iteritems, unicode_type from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr as mychr
CHUNK_SIZE = 8192 CHUNK_SIZE = 8192
@ -61,9 +60,6 @@ def node_from_path(root, path):
return parent return parent
mychr = chr if ispy3 else unichr
def tostring(raw, **kwargs): def tostring(raw, **kwargs):
''' lxml *sometimes* represents non-ascii characters as hex entities in ''' lxml *sometimes* represents non-ascii characters as hex entities in
attribute values. I can't figure out exactly what circumstances cause it. attribute values. I can't figure out exactly what circumstances cause it.

View File

@ -22,6 +22,6 @@ def run_devel_server():
os.chdir(os.path.dirname(os.path.abspath(__file__))) os.chdir(os.path.dirname(os.path.abspath(__file__)))
serve(resources={'cfi.coffee':'../cfi.coffee', '/':'index.html'}) serve(resources={'cfi.coffee':'../cfi.coffee', '/':'index.html'})
if __name__ == '__main__': if __name__ == '__main__':
run_devel_server() run_devel_server()

View File

@ -95,5 +95,5 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
if __name__ == '__main__': if __name__ == '__main__':
from lxml import etree from lxml import etree
root = parse_html5('\n<html><head><title>a\n</title><p b=1 c=2 a=0>&nbsp;\n<b>b<svg ass="wipe" viewbox="0">', discard_namespaces=False) root = parse_html5('\n<html><head><title>a\n</title><p b=1 c=2 a=0>&nbsp;\n<b>b<svg ass="wipe" viewbox="0">', discard_namespaces=False)
print (etree.tostring(root, encoding='utf-8')) print(etree.tostring(root, encoding='utf-8'))
print() print()

View File

@ -14,6 +14,7 @@ def find_tests():
base = os.path.dirname(os.path.abspath(__file__)) base = os.path.dirname(os.path.abspath(__file__))
return find_tests_in_dir(base) return find_tests_in_dir(base)
if __name__ == '__main__': if __name__ == '__main__':
try: try:
import init_calibre # noqa import init_calibre # noqa

View File

@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en'
class PDBError(Exception): class PDBError(Exception):
pass pass
FORMAT_READERS = None FORMAT_READERS = None
@ -31,6 +32,7 @@ def _import_readers():
'BOOKMTIU': haodoo_reader, 'BOOKMTIU': haodoo_reader,
} }
ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'} ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
FORMAT_WRITERS = None FORMAT_WRITERS = None
@ -47,6 +49,7 @@ def _import_writers():
'ereader': ereader_writer, 'ereader': ereader_writer,
} }
IDENTITY_TO_NAME = { IDENTITY_TO_NAME = {
'PNPdPPrs': 'eReader', 'PNPdPPrs': 'eReader',
'PNRdPPrs': 'eReader', 'PNRdPPrs': 'eReader',
@ -100,4 +103,3 @@ def get_writer(extension):
if FORMAT_WRITERS is None: if FORMAT_WRITERS is None:
_import_writers() _import_writers()
return FORMAT_WRITERS.get(extension, None) return FORMAT_WRITERS.get(extension, None)

View File

@ -19,6 +19,7 @@ UNITS = {
def unit(unit): def unit(unit):
return UNITS.get(unit, QPrinter.Inch) return UNITS.get(unit, QPrinter.Inch)
PAPER_SIZES = { PAPER_SIZES = {
'a0' : QPrinter.A0, # 841 x 1189 mm 'a0' : QPrinter.A0, # 841 x 1189 mm
'a1' : QPrinter.A1, # 594 x 841 mm 'a1' : QPrinter.A1, # 594 x 841 mm
@ -57,6 +58,7 @@ PAPER_SIZES = {
def paper_size(size): def paper_size(size):
return PAPER_SIZES.get(size, QPrinter.Letter) return PAPER_SIZES.get(size, QPrinter.Letter)
ORIENTATIONS = { ORIENTATIONS = {
'portrait' : QPrinter.Portrait, 'portrait' : QPrinter.Portrait,
'landscape' : QPrinter.Landscape, 'landscape' : QPrinter.Landscape,

View File

@ -28,6 +28,7 @@ def normalize_spaces(s):
characters with a single space""" characters with a single space"""
return ' '.join(s.split()) return ' '.join(s.split())
html_cleaner = Cleaner(scripts=True, javascript=True, comments=True, html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
style=True, links=True, meta=False, add_nofollow=False, style=True, links=True, meta=False, add_nofollow=False,
page_structure=False, processing_instructions=True, embedded=False, page_structure=False, processing_instructions=True, embedded=False,

View File

@ -4,6 +4,7 @@ def save_to_file(text, filename):
f.write(text.encode('utf-8')) f.write(text.encode('utf-8'))
f.close() f.close()
uids = {} uids = {}

View File

@ -504,7 +504,7 @@ def main():
enc = sys.__stdout__.encoding or 'utf-8' enc = sys.__stdout__.encoding or 'utf-8'
if options.verbose: if options.verbose:
default_log.filter_level = default_log.DEBUG default_log.filter_level = default_log.DEBUG
print (Document(raw, default_log, print(Document(raw, default_log,
debug=options.verbose, debug=options.verbose,
keep_elements=options.keep_elements).summary().encode(enc, keep_elements=options.keep_elements).summary().encode(enc,
'replace')) 'replace'))

View File

@ -367,7 +367,7 @@ class RtfTokenizer():
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
if len(sys.argv) < 2: if len(sys.argv) < 2:
print ("Usage %prog rtfFileToConvert") print("Usage %prog rtfFileToConvert")
sys.exit() sys.exit()
f = open(sys.argv[1], 'rb') f = open(sys.argv[1], 'rb')
data = f.read() data = f.read()
@ -381,5 +381,3 @@ if __name__ == "__main__":
f = open(sys.argv[1], 'w') f = open(sys.argv[1], 'w')
f.write(data) f.write(data)
f.close() f.close()

View File

@ -36,7 +36,7 @@ class GetCharMap:
def get_char_map(self, map): def get_char_map(self, map):
# if map == 'ansicpg10000': # if map == 'ansicpg10000':
# map = 'mac_roman' # map = 'mac_roman'
found_map = False found_map = False
map_dict = {} map_dict = {}
self.__char_file.seek(0) self.__char_file.seek(0)
@ -59,4 +59,3 @@ class GetCharMap:
msg = 'no map found\nmap is "%s"\n'%(map,) msg = 'no map found\nmap is "%s"\n'%(map,)
raise self.__bug_handler(msg) raise self.__bug_handler(msg)
return map_dict return map_dict

View File

@ -31,11 +31,11 @@ class Paragraphs:
In order to make paragraphs out of this limited info, the parser starts in the In order to make paragraphs out of this limited info, the parser starts in the
body of the documents and assumes it is not in a paragraph. It looks for clues body of the documents and assumes it is not in a paragraph. It looks for clues
to begin a paragraph. Text starts a paragraph; so does an inline field or to begin a paragraph. Text starts a paragraph; so does an inline field or
list-text. If an end of paragraph marker (\par) is found, then this indicates list-text. If an end of paragraph marker (\\par) is found, then this indicates
a blank paragraph. a blank paragraph.
Once a paragraph is found, the state changes to 'paragraph.' In this state, Once a paragraph is found, the state changes to 'paragraph.' In this state,
clues are looked to for the end of a paragraph. The end of a paragraph marker clues are looked to for the end of a paragraph. The end of a paragraph marker
(\par) marks the end of a paragraph. So does the end of a footnote or heading; (\\par) marks the end of a paragraph. So does the end of a footnote or heading;
a paragraph definition; the end of a field-block; and the beginning of a a paragraph definition; the end of a field-block; and the beginning of a
section. (How about the end of a section or the end of a field-block?) section. (How about the end of a section or the end of a field-block?)
""" """
@ -224,7 +224,7 @@ class Paragraphs:
Returns: Returns:
nothing nothing
Logic: Logic:
if a \pard occurs in a paragraph, I want to ignore it. (I believe) if a \\pard occurs in a paragraph, I want to ignore it. (I believe)
""" """
self.__write_obj.write('mi<mk<bogus-pard\n') self.__write_obj.write('mi<mk<bogus-pard\n')

View File

@ -272,17 +272,17 @@ class MyApplication(Gtk.Application):
seen = seen or set() seen = seen or set()
seen.add(group) seen.add(group)
print = self.print print = self.print
print ('\nMenu description (Group %d)' % group) print('\nMenu description (Group %d)' % group)
for item in bus.call_blocking(self.bus_name, self.object_path, 'org.gtk.Menus', 'Start', 'au', ([group],)): for item in bus.call_blocking(self.bus_name, self.object_path, 'org.gtk.Menus', 'Start', 'au', ([group],)):
print ('Subscription group:', item[0]) print('Subscription group:', item[0])
print ('Menu number:', item[1]) print('Menu number:', item[1])
for menu_item in item[2]: for menu_item in item[2]:
menu_item = {unicode_type(k):convert(v) for k, v in iteritems(menu_item)} menu_item = {unicode_type(k):convert(v) for k, v in iteritems(menu_item)}
if ':submenu' in menu_item: if ':submenu' in menu_item:
groups.add(menu_item[':submenu'][0]) groups.add(menu_item[':submenu'][0])
if ':section' in menu_item: if ':section' in menu_item:
groups.add(menu_item[':section'][0]) groups.add(menu_item[':section'][0])
print (pformat(menu_item)) print(pformat(menu_item))
for other_group in sorted(groups - seen): for other_group in sorted(groups - seen):
self.print_menu_start(bus, other_group, seen) self.print_menu_start(bus, other_group, seen)
@ -303,8 +303,8 @@ class MyApplication(Gtk.Application):
for name in sorted(adata): for name in sorted(adata):
data = adata[name] data = adata[name]
d[name] = {'enabled':convert(data[0]), 'param type': convert(data[1]), 'state':convert(data[2])} d[name] = {'enabled':convert(data[0]), 'param type': convert(data[1]), 'state':convert(data[2])}
print ('Name:', name) print('Name:', name)
print (pformat(d[name])) print(pformat(d[name]))
def do_startup(self): def do_startup(self):
Gtk.Application.do_startup(self) Gtk.Application.do_startup(self)

Some files were not shown because too many files have changed in this diff Show More