diff --git a/recipes/al_monitor.recipe b/recipes/al_monitor.recipe index 0067f9ed2f..99b1ca2c76 100644 --- a/recipes/al_monitor.recipe +++ b/recipes/al_monitor.recipe @@ -1,6 +1,6 @@ #!/usr/bin/env python2 # -*- coding: utf-8 -*- -from __future__ import print_function +from __future__ import print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2014, spswerling' ''' @@ -162,7 +162,7 @@ class AlMonitor(BasicNewsRecipe): def scrape_article_date(self, soup): for span in soup.findAll('span'): txt = self.text(span) - rgx = re.compile(unicode(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*')) + rgx = re.compile(r'Posted ([a-zA-Z]+ \d\d?, \d\d\d\d).*') hit = rgx.match(txt) if hit: return self.date_from_string(txt) diff --git a/recipes/ambito_financiero.recipe b/recipes/ambito_financiero.recipe index 5718401e8f..e7605163b7 100644 --- a/recipes/ambito_financiero.recipe +++ b/recipes/ambito_financiero.recipe @@ -126,7 +126,7 @@ class Ambito_Financiero(BasicNewsRecipe): cfind = smallsoup.find('div', id="contenido_data") if cfind: p.append(cfind) - return unicode(soup) + return type(u'')(soup) return raw_html def cleanup(self): diff --git a/recipes/american_thinker.recipe b/recipes/american_thinker.recipe index ef022451c5..2917f2f322 100644 --- a/recipes/american_thinker.recipe +++ b/recipes/american_thinker.recipe @@ -34,7 +34,7 @@ class AmericanThinker(BasicNewsRecipe): namespaceHTMLElements=False) for x in root.xpath('''descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), ' article_body ') and (@class and contains(concat(' ', normalize-space(@class), ' '), ' bottom '))]'''): # noqa x.getparent().remove(x) - return etree.tostring(root, encoding=unicode) + return etree.tostring(root, encoding='unicode') feeds = [(u'http://feeds.feedburner.com/americanthinker'), (u'http://feeds.feedburner.com/AmericanThinkerBlog') diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index ebf7c33330..eaf284ab0e 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -161,7 +161,7 @@ class AppleDaily(BasicNewsRecipe): article_titles.append(force_unicode(a.title, 'utf-8')) mi.comments = self.description - if not isinstance(mi.comments, unicode): + if not isinstance(mi.comments, type(u'')): mi.comments = mi.comments.decode('utf-8', 'replace') mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' + '\n\n'.join(article_titles)) @@ -272,7 +272,7 @@ class AppleDaily(BasicNewsRecipe): elem = BeautifulSoup(translatedTempl).find('div') body.insert(len(body.contents), elem) with open(last, 'wb') as fi: - fi.write(unicode(soup).encode('utf-8')) + fi.write(type(u'')(soup).encode('utf-8')) if len(feeds) == 0: raise Exception('All feeds are empty, aborting.') diff --git a/recipes/appledaily_tw.recipe b/recipes/appledaily_tw.recipe index 700ee4c7f1..6964ed0191 100644 --- a/recipes/appledaily_tw.recipe +++ b/recipes/appledaily_tw.recipe @@ -104,7 +104,7 @@ class AppledailyTW(BasicNewsRecipe): ] def preprocess_raw_html(self, raw_html, url): - raw_html = re.sub(unicode(r'
.*?<\/a>'), '', raw_html) raw_html = re.sub( - unicode(r'(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html) + (r'<title>(.*?)[\\s]+\|.*<\/title>', r'<title>\1<\/title>'), raw_html) return raw_html diff --git a/recipes/berlin_policy_journal.recipe b/recipes/berlin_policy_journal.recipe index 9e832c90a5..b820a9f00c 100644 --- a/recipes/berlin_policy_journal.recipe +++ b/recipes/berlin_policy_journal.recipe @@ -85,7 +85,7 @@ class BerlinPolicyJournal(BasicNewsRecipe): div.find('h3', {'class': 'entry-title'}).a) article_url = div.find( 'h3', {'class': 'entry-title'}).a['href'] - article_date = unicode(time.strftime( + article_date = type(u'')(time.strftime( ' [%a, %d %b %H:%M]', timestamp)) article_desc = self.tag_to_string( div.find('div', {'class': 'i-summary'}).p) diff --git a/recipes/big_oven.recipe b/recipes/big_oven.recipe index 4e1d1b48a3..cc0ebdb543 100644 --- a/recipes/big_oven.recipe +++ b/recipes/big_oven.recipe @@ -47,7 +47,7 @@ class BigOven(BasicNewsRecipe): preprocess_regexps = [ (re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''), - (re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''), + (re.compile(r'\(You could win \$100 in our ', re.DOTALL), lambda match: ''), ] def preprocess_html(self, soup): diff --git a/recipes/birmingham_evening_mail.recipe b/recipes/birmingham_evening_mail.recipe index b7cef45085..98d4fb38cf 100644 --- a/recipes/birmingham_evening_mail.recipe +++ b/recipes/birmingham_evening_mail.recipe @@ -60,7 +60,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): 'http://images.icnetwork.co.uk/upl/birm')}) cov = str(cov) cov2 = re.findall( - 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) + r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) cov = str(cov2) cov = cov[2:len(cov) - 2] diff --git a/recipes/camera_di_commercio_di_bari.recipe b/recipes/camera_di_commercio_di_bari.recipe index 343291b749..f81bbea2a3 100644 --- a/recipes/camera_di_commercio_di_bari.recipe +++ b/recipes/camera_di_commercio_di_bari.recipe @@ -13,6 +13,7 @@ class AdvancedUserRecipe1331729727(BasicNewsRecipe): feeds = [(u'Camera di Commercio di Bari', u'http://feed43.com/4715147488845101.xml')] + __license__ = 'GPL v3' __copyright__ = '2012, faber1971' __version__ = 'v1.00' diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe index 23b5178e7d..9fca8ce8af 100644 --- a/recipes/cdrinfo_pl.recipe +++ b/recipes/cdrinfo_pl.recipe @@ -22,7 +22,7 @@ class cdrinfo(BasicNewsRecipe): remove_empty_feeds = True remove_javascript = True remove_attributes = ['style', 'onmouseover'] - preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com</a>\.</p>', re.DOTALL), lambda match: ''), + preprocess_regexps = [(re.compile(u'<p[^>]*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\\.gravatar\\.com</a>\\.</p>', re.DOTALL), lambda match: ''), (re.compile(u'<p[^>]*?>.{,2}</p>', re.DOTALL), lambda match: '')] ignore_duplicate_articles = {'title', 'url'} diff --git a/recipes/cnetjapan.recipe b/recipes/cnetjapan.recipe index fa4494fc99..20d12fa56d 100644 --- a/recipes/cnetjapan.recipe +++ b/recipes/cnetjapan.recipe @@ -16,11 +16,11 @@ class CNetJapan(BasicNewsRecipe): remove_javascript = True preprocess_regexps = [ - (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), + (re.compile(type(u'')(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), lambda match: '</body>'), (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE), lambda match: '</body>'), - (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), + (re.compile(type(u'')(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), lambda match: '<!-- removed -->'), ] diff --git a/recipes/cnetjapan_digital.recipe b/recipes/cnetjapan_digital.recipe index cb16741a5d..8a92d83369 100644 --- a/recipes/cnetjapan_digital.recipe +++ b/recipes/cnetjapan_digital.recipe @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import re from calibre.web.feeds.news import BasicNewsRecipe @@ -14,11 +15,11 @@ class CNetJapanDigital(BasicNewsRecipe): remove_javascript = True preprocess_regexps = [ - (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), + (re.compile((r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), lambda match: '</body>'), (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE), lambda match: '</body>'), - (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), + (re.compile((r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), lambda match: '<!-- removed -->'), ] diff --git a/recipes/cnetjapan_release.recipe b/recipes/cnetjapan_release.recipe index a21d69e43b..200749ed4e 100644 --- a/recipes/cnetjapan_release.recipe +++ b/recipes/cnetjapan_release.recipe @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import re from calibre.web.feeds.news import BasicNewsRecipe @@ -14,11 +15,11 @@ class CNetJapanRelease(BasicNewsRecipe): remove_javascript = True preprocess_regexps = [ - (re.compile(unicode(r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), + (re.compile((r'<!--\u25B2contents_left END\u25B2-->.*</body>'), re.DOTALL | re.IGNORECASE | re.UNICODE), lambda match: '</body>'), (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL | re.IGNORECASE), lambda match: '</body>'), - (re.compile(unicode(r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), + (re.compile((r'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->'), re.UNICODE), lambda match: '<!-- removed -->'), ] diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe index a174d15652..812824cdf9 100644 --- a/recipes/daily_mirror.recipe +++ b/recipes/daily_mirror.recipe @@ -82,7 +82,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): cov = soup.find(attrs={'id': 'large'}) cov = str(cov) cov2 = re.findall( - 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) + r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', cov) cov2 = str(cov2) cov2 = cov2[2:len(cov2) - 2] # cov2 now is pic url, now go back to original function diff --git a/recipes/democracy_journal.recipe b/recipes/democracy_journal.recipe index a40e0657a5..77d891124a 100644 --- a/recipes/democracy_journal.recipe +++ b/recipes/democracy_journal.recipe @@ -16,7 +16,7 @@ class AdvancedUserRecipe1361743898(BasicNewsRecipe): articles = [] feeds = [] soup = self.index_to_soup("http://www.democracyjournal.org") - for x in soup.findAll(href=re.compile("http://www\.democracyjournal\.org/\d*/.*php$")): + for x in soup.findAll(href=re.compile(r"http://www\.democracyjournal\.org/\d*/.*php$")): url = x.get('href') title = self.tag_to_string(x) articles.append({'title': title, 'url': url, diff --git a/recipes/detroit_news.recipe b/recipes/detroit_news.recipe index ed6803b6bf..5a15f7a14f 100644 --- a/recipes/detroit_news.recipe +++ b/recipes/detroit_news.recipe @@ -69,6 +69,6 @@ class AdvancedUserRecipe1297291961(BasicNewsRecipe): ] def print_version(self, url): - p = re.compile('(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*') + p = re.compile(r'(/\d{4}|/-1)/(rss|ENT|LIFESTYLE|OPINION|METRO)\d*') m = p.search(url) return url.replace(m.group(), '&template=printart') diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index c97d9c0e8f..9b24e27104 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -19,7 +19,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): max_articles_per_feed = 100 remove_attrs = ['style', 'width', 'height'] preprocess_regexps = [(re.compile( - unicode(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')] + type(u'')(r'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>')), lambda match: '')] keep_only_tags = [dict(name='h1'), dict( attrs={'class': ['entry single']}), dict(id='phContent_divArticle')] remove_tags = [dict(attrs={'class': ['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master', 'social nested-grid grid-margin-px15-top clearfix no-mobile', 'page-info text-h4 font-heading grid-margin-px15-top color-annotation clearfix', 'series grid-margin-px30-top']}), dict(id='komentarze'), dict(id='phContent_ctl02_sBreadcrumb'), dict(name='iframe')] # noqa diff --git a/recipes/dunyahalleri_haftaninozeti.recipe b/recipes/dunyahalleri_haftaninozeti.recipe index 5aa9f662d4..6269e34924 100644 --- a/recipes/dunyahalleri_haftaninozeti.recipe +++ b/recipes/dunyahalleri_haftaninozeti.recipe @@ -92,7 +92,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): if raw: return _raw - if not isinstance(_raw, unicode) and self.encoding: + if not isinstance(_raw, type(u'')) and self.encoding: if callable(self.encoding): _raw = self.encoding(_raw) else: @@ -101,7 +101,7 @@ class DunyaHalleri_HaftaninOzeti(BasicNewsRecipe): from calibre.ebooks.chardet import strip_encoding_declarations, xml_to_unicode from calibre.utils.cleantext import clean_xml_chars - if isinstance(_raw, unicode): + if isinstance(_raw, type(u'')): _raw = strip_encoding_declarations(_raw) else: _raw = xml_to_unicode( diff --git a/recipes/ecogeek.recipe b/recipes/ecogeek.recipe index ce7b4448c8..64211ed483 100644 --- a/recipes/ecogeek.recipe +++ b/recipes/ecogeek.recipe @@ -26,7 +26,7 @@ class EcoGeek(BasicNewsRecipe): for i, article in enumerate(soup.findAll('div', attrs={'class': 'article'})): fname = os.path.join(tdir, '%d.html' % i) with open(fname, 'wb') as f: - f.write(unicode(article).encode('utf-8')) + f.write(type(u'')(article).encode('utf-8')) articles.append({ 'title': self.tag_to_string(article.find('h2')), 'url': 'file://' + fname.replace(os.sep, '/'), diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 0dd9272f18..7fc026de77 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -154,7 +154,7 @@ class Economist(BasicNewsRecipe): p.remove(noscript[0]) for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'): x.getparent().remove(x) - raw = etree.tostring(root, encoding=unicode) + raw = etree.tostring(root, encoding='unicode') return raw def populate_article_metadata(self, article, soup, first): @@ -165,7 +165,7 @@ class Economist(BasicNewsRecipe): if el is not None and el.contents: for descendant in el.contents: if isinstance(descendant, NavigableString): - result.append(unicode(descendant)) + result.append(type(u'')(descendant)) article.summary = u'. '.join(result) + u'.' article.text_summary = clean_ascii_chars(article.summary) diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 0dd9272f18..7fc026de77 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -154,7 +154,7 @@ class Economist(BasicNewsRecipe): p.remove(noscript[0]) for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'): x.getparent().remove(x) - raw = etree.tostring(root, encoding=unicode) + raw = etree.tostring(root, encoding='unicode') return raw def populate_article_metadata(self, article, soup, first): @@ -165,7 +165,7 @@ class Economist(BasicNewsRecipe): if el is not None and el.contents: for descendant in el.contents: if isinstance(descendant, NavigableString): - result.append(unicode(descendant)) + result.append(type(u'')(descendant)) article.summary = u'. '.join(result) + u'.' article.text_summary = clean_ascii_chars(article.summary) diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index effd4e92cd..ef71e548d2 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -23,7 +23,7 @@ class FilmWebPl(BasicNewsRecipe): 'ul.inline {padding:0px;} .vertical-align {display: inline-block;}') preprocess_regexps = [(re.compile(r'<body.+?</head>', re.DOTALL), lambda match: ''), # fix malformed HTML with 2 body tags... (re.compile(u'(?:<sup>)?\\(kliknij\\,\\ aby powiększyć\\)(?:</sup>)?', re.IGNORECASE), lambda m: ''), - (re.compile(unicode(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />') + (re.compile(type(u'')(r'(<br ?/?>\s*?<br ?/?>\s*?)+'), re.IGNORECASE), lambda m: '<br />') ] remove_tags = [dict(attrs={'class':['infoParent', 'likeBar', 'droptions-box pull-right', 'photoDesc', 'imageLicense', 'play big', 'shadow embed__icon--svg']})] diff --git a/recipes/first_things.recipe b/recipes/first_things.recipe index 963d4d1a8e..f3d8282339 100644 --- a/recipes/first_things.recipe +++ b/recipes/first_things.recipe @@ -33,7 +33,7 @@ class FirstThings(BasicNewsRecipe): ''' def preprocess_raw_html(self, raw, url): - return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding=unicode) + return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding='unicode') def parse_index(self): soup = self.index_to_soup(self.INDEX) diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe index a9b209938d..a530aa09e0 100644 --- a/recipes/gazeta_wyborcza.recipe +++ b/recipes/gazeta_wyborcza.recipe @@ -32,7 +32,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe): # rules for wyborcza.biz preprocess_regexps.append((re.compile( - u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\.?<br>', re.DOTALL), lambda m: '')) + u'(<br>)?(<br>)? Czytaj (także|też):.*?</a>\\.?<br>', re.DOTALL), lambda m: '')) feeds = [(u'Kraj', u'http://rss.feedsportal.com/c/32739/f/530266/index.rss'), (u'\u015awiat', u'http://rss.feedsportal.com/c/32739/f/530270/index.rss'), diff --git a/recipes/good_to_know.recipe b/recipes/good_to_know.recipe index 06b72a3b6d..cd35d03db3 100644 --- a/recipes/good_to_know.recipe +++ b/recipes/good_to_know.recipe @@ -11,7 +11,7 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe): __author__ = 'Anonymous' language = 'en_GB' remove_tags = [ - dict(name='div', attrs={'class': 'articles_footer', 'class': 'printoptions'})] + dict(name='div', attrs={'class': ['articles_footer', 'printoptions']})] def print_version(self, url): return url + '/print/1' diff --git a/recipes/granta.recipe b/recipes/granta.recipe index bd3d5d8b5c..0f3064d7b4 100644 --- a/recipes/granta.recipe +++ b/recipes/granta.recipe @@ -49,9 +49,9 @@ def solve_captcha(captcha): # Parse into parts pattern = re.compile( u'(?P<first_component>[0-9]+)?' - u'\s*(?P<operator>[+×−])\s*' + u'\\s*(?P<operator>[+×−])\\s*' u'(?P<second_component>[0-9]+)' - u'\s*(=)\s*' + u'\\s*(=)\\s*' u'(?P<result>[0-9]+)?', re.UNICODE) calculationParts = re.search(pattern, numeric_problem) @@ -230,7 +230,7 @@ class Granta(BasicNewsRecipe): if image is not None and image.attrs is not None: style = dict(image.attrs)['style'] if style is not None: - m = re.search('url\(([^\)]*)\)', style) + m = re.search(r'url\(([^\)]*)\)', style) if m.group(1) is not None: stripstyle(image) image.name = 'img' diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index e808908c92..972be4be1d 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -67,7 +67,7 @@ class Guardian(BasicNewsRecipe): def preprocess_raw_html(self, raw, url): import html5lib from lxml import html - return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding=unicode) + return html.tostring(html5lib.parse(raw, namespaceHTMLElements=False, treebuilder='lxml'), encoding='unicode') def preprocess_html(self, soup): for img in soup.findAll('img', srcset=True): diff --git a/recipes/heavy_metal_it.recipe b/recipes/heavy_metal_it.recipe index 89de6c998e..bd23923966 100644 --- a/recipes/heavy_metal_it.recipe +++ b/recipes/heavy_metal_it.recipe @@ -18,5 +18,6 @@ class AdvancedUserRecipe1336289226(BasicNewsRecipe): __author__ = 'faber1971' language = 'it' + __version__ = 'v1.0' __date__ = '6, May 2012' diff --git a/recipes/houston_chronicle.recipe b/recipes/houston_chronicle.recipe index 9808e9cd51..2d75395d0d 100644 --- a/recipes/houston_chronicle.recipe +++ b/recipes/houston_chronicle.recipe @@ -160,7 +160,7 @@ class HoustonChronicle(BasicNewsRecipe): result = [] for descendant in el.contents: if isinstance(descendant, NavigableString): - result.append(unicode(descendant).strip()) + result.append(type(u'')(descendant).strip()) all_text = u' '.join(result).encode('utf-8') if len(all_text) > 1: sentences = re.findall(sentence_regex, all_text) diff --git a/recipes/jazzpress.recipe b/recipes/jazzpress.recipe index 50f4b46793..9d89a8dd98 100644 --- a/recipes/jazzpress.recipe +++ b/recipes/jazzpress.recipe @@ -33,7 +33,7 @@ class jazzpress(BasicNewsRecipe): # find the link epublink = browser.find_link( - url_regex=re.compile('e_jazzpress\d\d\d\d\_epub')) + url_regex=re.compile(r'e_jazzpress\d\d\d\d\_epub')) # download ebook self.report_progress(0, _('Downloading ePUB')) diff --git a/recipes/juve_la_stampa.recipe b/recipes/juve_la_stampa.recipe index db455e3ef8..20a047c65f 100644 --- a/recipes/juve_la_stampa.recipe +++ b/recipes/juve_la_stampa.recipe @@ -15,5 +15,6 @@ class AdvancedUserRecipe1336504510(BasicNewsRecipe): description = 'News about Juventus from La Stampa' __author__ = 'faber1971' + __version__ = 'v1.0' __date__ = '8, May 2012' diff --git a/recipes/lega_nerd.recipe b/recipes/lega_nerd.recipe index 345f4f6107..b6fadeb76c 100644 --- a/recipes/lega_nerd.recipe +++ b/recipes/lega_nerd.recipe @@ -11,5 +11,7 @@ class AdvancedUserRecipe1326135232(BasicNewsRecipe): feeds = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')] __author__ = 'faber1971' + + __version__ = 'v1.0' __date__ = '9, January 2011' diff --git a/recipes/list_apart.recipe b/recipes/list_apart.recipe index f5c36f762d..f78e87879e 100644 --- a/recipes/list_apart.recipe +++ b/recipes/list_apart.recipe @@ -31,7 +31,7 @@ class AListApart (BasicNewsRecipe): ] def image_url_processor(self, baseurl, url): - if re.findall('alistapart\.com', url): + if re.findall(r'alistapart\.com', url): return 'http:' + url else: return url diff --git a/recipes/ming_pao.recipe b/recipes/ming_pao.recipe index ecf8420200..fb6e402390 100644 --- a/recipes/ming_pao.recipe +++ b/recipes/ming_pao.recipe @@ -1147,7 +1147,7 @@ class MPRecipe(BasicNewsRecipe): doctype='xhtml').decode('utf-8')).find('div') body.insert(len(body.contents), elem) with open(last, 'wb') as fi: - fi.write(unicode(soup).encode('utf-8')) + fi.write(type(u'')(soup).encode('utf-8')) if len(feeds) == 0: raise Exception('All feeds are empty, aborting.') diff --git a/recipes/ming_pao_toronto.recipe b/recipes/ming_pao_toronto.recipe index fe0a794010..f73c2a72f2 100644 --- a/recipes/ming_pao_toronto.recipe +++ b/recipes/ming_pao_toronto.recipe @@ -985,7 +985,7 @@ class MPRecipe(BasicNewsRecipe): doctype='xhtml').decode('utf-8')).find('div') body.insert(len(body.contents), elem) with open(last, 'wb') as fi: - fi.write(unicode(soup).encode('utf-8')) + fi.write(type(u'')(soup).encode('utf-8')) if len(feeds) == 0: raise Exception('All feeds are empty, aborting.') diff --git a/recipes/ming_pao_vancouver.recipe b/recipes/ming_pao_vancouver.recipe index 8060b37024..b1c5c097f9 100644 --- a/recipes/ming_pao_vancouver.recipe +++ b/recipes/ming_pao_vancouver.recipe @@ -985,7 +985,7 @@ class MPRecipe(BasicNewsRecipe): doctype='xhtml').decode('utf-8')).find('div') body.insert(len(body.contents), elem) with open(last, 'wb') as fi: - fi.write(unicode(soup).encode('utf-8')) + fi.write(type(u'')(soup).encode('utf-8')) if len(feeds) == 0: raise Exception('All feeds are empty, aborting.') diff --git a/recipes/modoros.recipe b/recipes/modoros.recipe index 608afcef28..c95738f9b2 100644 --- a/recipes/modoros.recipe +++ b/recipes/modoros.recipe @@ -70,7 +70,7 @@ class ModorosBlogHu(BasicNewsRecipe): past_items = set() if os.path.exists(feed_fn): - with file(feed_fn) as f: + with open(feed_fn) as f: for h in f: past_items.add(h.strip()) @@ -87,7 +87,7 @@ class ModorosBlogHu(BasicNewsRecipe): cur_items.add(item_hash) if item_hash in past_items: feed.articles.remove(article) - with file(feed_fn, 'w') as f: + with open(feed_fn, 'w') as f: for h in cur_items: f.write(h + '\n') diff --git a/recipes/money_pl.recipe b/recipes/money_pl.recipe index fbb00d197a..bc0275ef8e 100644 --- a/recipes/money_pl.recipe +++ b/recipes/money_pl.recipe @@ -24,7 +24,7 @@ class FocusRecipe(BasicNewsRecipe): simultaneous_downloads = 2 r = re.compile( - '.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*') + r'.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*') keep_only_tags = [] keep_only_tags.append(dict(name='div', attrs={'class': 'artykul'})) remove_tags = [dict(name='ul', attrs={'class': 'socialStuff'})] diff --git a/recipes/navegalo.recipe b/recipes/navegalo.recipe index 0b95ee210c..c93731ac5f 100644 --- a/recipes/navegalo.recipe +++ b/recipes/navegalo.recipe @@ -7,6 +7,7 @@ class AdvancedUserRecipe1360354988(BasicNewsRecipe): max_articles_per_feed = 100 auto_cleanup = True + from calibre.web.feeds.news import BasicNewsRecipe diff --git a/recipes/newsweek_polska.recipe b/recipes/newsweek_polska.recipe index eda4146c4d..6834efe97b 100644 --- a/recipes/newsweek_polska.recipe +++ b/recipes/newsweek_polska.recipe @@ -93,7 +93,7 @@ class Newsweek(BasicNewsRecipe): strs.append("".join(str(content))) # return contents as a string - return unicode("".join(strs)) + return u"".join(strs) # # Articles can be divided into several pages, this method parses them recursevely @@ -113,7 +113,7 @@ class Newsweek(BasicNewsRecipe): if page == 0: title = main_section.find('h1') - html = html + unicode(title) + html = html + type(u'')(title) authors = '' authorBox = main_section.find('div', attrs={'class': 'AuthorBox'}) @@ -121,10 +121,10 @@ class Newsweek(BasicNewsRecipe): authorH4 = authorBox.find('h4') if authorH4 is not None: authors = self.tag_to_string(authorH4) - html = html + unicode(authors) + html = html + type(u'')(authors) info = main_section.find('p', attrs={'class': 'lead'}) - html = html + unicode(info) + html = html + type(u'')(info) html = html + self.get_article_divs( '3917dc34e07c9c7180df2ea9ef103361845c8af42b71f51b960059226090a1ac articleStart', main_section) diff --git a/recipes/non_leggerlo.recipe b/recipes/non_leggerlo.recipe index 9f512baef0..af23efb58a 100644 --- a/recipes/non_leggerlo.recipe +++ b/recipes/non_leggerlo.recipe @@ -14,5 +14,7 @@ class AdvancedUserRecipe1335362999(BasicNewsRecipe): description = 'An Italian satirical blog' language = 'it' __author__ = 'faber1971' + + __version__ = 'v1.0' __date__ = '24, April 2012' diff --git a/recipes/office_space.recipe b/recipes/office_space.recipe index 4a0977809e..ccaad95fc6 100644 --- a/recipes/office_space.recipe +++ b/recipes/office_space.recipe @@ -94,7 +94,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe): past_items = set() if os.path.exists(feed_fn): - with file(feed_fn) as f: + with open(feed_fn) as f: for h in f: past_items.add(h.strip()) @@ -111,7 +111,7 @@ class OfficeSpaceBlogHu(BasicNewsRecipe): cur_items.add(item_hash) if item_hash in past_items: feed.articles.remove(article) - with file(feed_fn, 'w') as f: + with open(feed_fn, 'w') as f: for h in cur_items: f.write(h + '\n') diff --git a/recipes/oreilly_premium.recipe b/recipes/oreilly_premium.recipe index 9649274fd0..cac531b20e 100644 --- a/recipes/oreilly_premium.recipe +++ b/recipes/oreilly_premium.recipe @@ -188,7 +188,7 @@ class OReillyPremium(BasicNewsRecipe): # feeds = self.parse_feeds() # Now add regular feeds. feedsRSS = self.parse_feeds() - print ("feedsRSS is type " + feedsRSS.__class__.__name__) + print("feedsRSS is type " + feedsRSS.__class__.__name__) for articles in feedsRSS: print("articles is type " + articles.__class__.__name__) diff --git a/recipes/ourdailybread.recipe b/recipes/ourdailybread.recipe index 762a2df1c3..e07cc03b8f 100644 --- a/recipes/ourdailybread.recipe +++ b/recipes/ourdailybread.recipe @@ -65,7 +65,7 @@ class OurDailyBread(BasicNewsRecipe): hr = div.makeelement('hr') div.insert(0, hr) # print html.tostring(div) - raw = html.tostring(root, encoding=unicode) + raw = html.tostring(root, encoding='unicode') return raw def preprocess_html(self, soup): diff --git a/recipes/pagina_12_print_ed.recipe b/recipes/pagina_12_print_ed.recipe index 0c1f38bc5b..628c1962fd 100644 --- a/recipes/pagina_12_print_ed.recipe +++ b/recipes/pagina_12_print_ed.recipe @@ -58,7 +58,7 @@ class Pagina12(BasicNewsRecipe): seen_titles = set([]) for section in soup.findAll('div', 'seccionx'): numero += 1 - print (numero) + print(numero) section_title = self.tag_to_string(section.find( 'div', 'desplegable_titulo on_principal right')) self.log('Found section:', section_title) diff --git a/recipes/pambianco.recipe b/recipes/pambianco.recipe index aa76c5cb46..6cd93d9a5a 100644 --- a/recipes/pambianco.recipe +++ b/recipes/pambianco.recipe @@ -11,5 +11,7 @@ class AdvancedUserRecipe1326135591(BasicNewsRecipe): feeds = [(u'Pambianco', u'http://feeds.feedburner.com/pambianconews/YGXu')] __author__ = 'faber1971' + + __version__ = 'v1.0' __date__ = '9, January 2011' diff --git a/recipes/popscience.recipe b/recipes/popscience.recipe index 817da2f6c3..5e05de66c6 100644 --- a/recipes/popscience.recipe +++ b/recipes/popscience.recipe @@ -28,7 +28,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): ('Eastern Arsenal', 'https://www.popsci.com/rss-eastern-arsenal.xml'), ] - pane_node_body = re.compile('pane-node-(?:\w+-){0,9}body') + pane_node_body = re.compile('pane-node-(?:\\w+-){0,9}body') keep_only_tags = [ dict(attrs={'class': lambda x: x and frozenset('pane-node-header'.split()).issubset(frozenset(x.split()))}), diff --git a/recipes/readitlater.recipe b/recipes/readitlater.recipe index c7462be7ff..ae3f608801 100644 --- a/recipes/readitlater.recipe +++ b/recipes/readitlater.recipe @@ -205,7 +205,7 @@ class Pocket(BasicNewsRecipe): """ try: from calibre.ebooks.covers import calibre_cover2 - title = self.title if isinstance(self.title, unicode) else \ + title = self.title if isinstance(self.title, type(u'')) else \ self.title.decode('utf-8', 'replace') date = strftime(self.timefmt) time = strftime('[%I:%M %p]') diff --git a/recipes/rebelion.recipe b/recipes/rebelion.recipe index 620b787f25..8a18c55b15 100644 --- a/recipes/rebelion.recipe +++ b/recipes/rebelion.recipe @@ -32,5 +32,5 @@ class RebelionRecipe (BasicNewsRecipe): # See http://www.mobileread.com/forums/showthread.php?t=174501 def print_version(self, url): - id = re.compile('\d*$').search(url).group() + id = re.compile(r'\d*$').search(url).group() return u'http://www.rebelion.org/noticia.php?id=%s' % id diff --git a/recipes/respekt_magazine.recipe b/recipes/respekt_magazine.recipe index d852472c0b..fcad26fc79 100644 --- a/recipes/respekt_magazine.recipe +++ b/recipes/respekt_magazine.recipe @@ -107,7 +107,7 @@ class respektRecipe(BasicNewsRecipe): self.browser.open('https://www.respekt.cz/?do=logout') def preprocess_html(self,soup): - raw = u''.join(unicode(a) for a in soup.contents) + raw = u''.join(type(u'')(a) for a in soup.contents) root = lxml.html.fromstring(raw) # Fix Letem světem if "Letem sv" in root.xpath("//title")[0].text: @@ -169,4 +169,4 @@ class respektRecipe(BasicNewsRecipe): o.getparent().replace(o,e) except: pass - return(BeautifulSoup(lxml.etree.tostring(root,encoding=unicode))) + return(BeautifulSoup(lxml.etree.tostring(root,encoding='unicode'))) diff --git a/recipes/revista_muy.recipe b/recipes/revista_muy.recipe index ed5b087861..f9936686b1 100644 --- a/recipes/revista_muy.recipe +++ b/recipes/revista_muy.recipe @@ -31,8 +31,8 @@ class RevistaMuyInteresante(BasicNewsRecipe): for img_tag in soup.findAll('img'): imagen = img_tag - new_tag = new_tag(soup, 'p') - img_tag.replaceWith(new_tag) + nt = new_tag(soup, 'p') + img_tag.replaceWith(nt) div = soup.find(attrs={'class': 'article_category'}) div.insert(0, imagen) break diff --git a/recipes/singtaohk.recipe b/recipes/singtaohk.recipe index aecfb06258..ef779fb81d 100644 --- a/recipes/singtaohk.recipe +++ b/recipes/singtaohk.recipe @@ -497,7 +497,7 @@ class STHKRecipe(BasicNewsRecipe): doctype='xhtml').decode('utf-8')).find('div') body.insert(len(body.contents), elem) with open(last, 'wb') as fi: - fi.write(unicode(soup).encode('utf-8')) + fi.write(type(u'')(soup).encode('utf-8')) if len(feeds) == 0: raise Exception('All feeds are empty, aborting.') diff --git a/recipes/sol_haber.recipe b/recipes/sol_haber.recipe index c06def6521..67bd040721 100644 --- a/recipes/sol_haber.recipe +++ b/recipes/sol_haber.recipe @@ -59,9 +59,9 @@ class SolHaberRecipe(BasicNewsRecipe): cover_margins = (20, 20, '#ffffff') - storybody_reg_exp = '^\s*(haber|kose)\s*$' + storybody_reg_exp = r'^\s*(haber|kose)\s*$' - comments_reg_exp = '^\s*makale-elestiri\s*$' + comments_reg_exp = r'^\s*makale-elestiri\s*$' remove_tags = [ dict(name='div', attrs={'class': re.compile(comments_reg_exp, re.IGNORECASE)})] diff --git a/recipes/tanuki.recipe b/recipes/tanuki.recipe index cf85fbdd83..c2ca568760 100644 --- a/recipes/tanuki.recipe +++ b/recipes/tanuki.recipe @@ -14,7 +14,7 @@ class tanuki(BasicNewsRecipe): autocleanup = True extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .kadr{float: left;} .dwazdania {float: right;}' preprocess_regexps = [(re.compile(u'<h3><a class="screen".*?</h3>', re.DOTALL), lambda match: ''), (re.compile( - unicode(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')] + type(u'')(r'<div><a href="/strony/((manga)|(anime))/[0-9]+?/oceny(\-redakcji){0,1}">Zobacz jak ocenili</a></div>'), re.DOTALL), lambda match: '')] remove_empty_feeds = True no_stylesheets = True keep_only_tags = [dict(attrs={'class': ['animename', 'storyname', 'nextarrow', 'sideinfov', 'sidelinfov', 'sideinfo', 'sidelinfo']}), dict(name='table', attrs={ 'summary': 'Technikalia'}), dict(attrs={'class': ['chaptername', 'copycat']}), dict(id='rightcolumn'), dict(attrs={'class': ['headn_tt', 'subtable']})] # noqa diff --git a/recipes/the_age.recipe b/recipes/the_age.recipe index 3728fd4d1a..c987274892 100644 --- a/recipes/the_age.recipe +++ b/recipes/the_age.recipe @@ -99,7 +99,7 @@ class TheAge(BasicNewsRecipe): # Collapse the paragraph by joining the non-tag contents - contents = [i for i in p.contents if isinstance(i, unicode)] + contents = [i for i in p.contents if isinstance(i, type(u''))] if len(contents): contents = ''.join(contents) diff --git a/recipes/thestar.recipe b/recipes/thestar.recipe index 64de75cde2..e08ca0b830 100644 --- a/recipes/thestar.recipe +++ b/recipes/thestar.recipe @@ -9,6 +9,7 @@ def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}) + from calibre.web.feeds.news import BasicNewsRecipe diff --git a/recipes/times_online.recipe b/recipes/times_online.recipe index 720f5ac9dc..76bf09d467 100644 --- a/recipes/times_online.recipe +++ b/recipes/times_online.recipe @@ -108,7 +108,7 @@ class TimesOnline(BasicNewsRecipe): return html.tostring( html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', - encoding=unicode) + encoding='unicode') def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/vignette.recipe b/recipes/vignette.recipe index fb62d2b2d5..1b996cd1ea 100644 --- a/recipes/vignette.recipe +++ b/recipes/vignette.recipe @@ -17,5 +17,6 @@ class AdvancedUserRecipe1334935485(BasicNewsRecipe): language = 'it' __author__ = 'faber1971' + __version__ = 'v1.0' __date__ = '24, April 2012' diff --git a/recipes/zaobao.recipe b/recipes/zaobao.recipe index 4d674c4aaf..8ab6acbe4d 100644 --- a/recipes/zaobao.recipe +++ b/recipes/zaobao.recipe @@ -137,7 +137,7 @@ class ZAOBAO(BasicNewsRecipe): # workaorund a strange problem: Somethimes the xml encoding is not # apllied correctly by parse() weired_encoding_detected = False - if not isinstance(feed.description, unicode) and self.encoding and feed.description: + if not isinstance(feed.description, type(u'')) and self.encoding and feed.description: self.log( _('Feed %s is not encoded correctly, manually replace it') % (feed.title)) feed.description = feed.description.decode( @@ -150,14 +150,14 @@ class ZAOBAO(BasicNewsRecipe): weired_encoding_detected = True for a, article in enumerate(feed): - if not isinstance(article.title, unicode) and self.encoding: + if not isinstance(article.title, type(u'')) and self.encoding: article.title = article.title.decode( self.encoding, 'replace') - if not isinstance(article.summary, unicode) and self.encoding and article.summary: + if not isinstance(article.summary, type(u'')) and self.encoding and article.summary: article.summary = article.summary.decode( self.encoding, 'replace') article.text_summary = article.summary - if not isinstance(article.text_summary, unicode) and self.encoding and article.text_summary: + if not isinstance(article.text_summary, type(u'')) and self.encoding and article.text_summary: article.text_summary = article.text_summary.decode( self.encoding, 'replace') article.summary = article.text_summary diff --git a/setup/check.py b/setup/check.py index fe7cd845bb..96a0be5ae1 100644 --- a/setup/check.py +++ b/setup/check.py @@ -77,12 +77,14 @@ class Check(Command): def file_has_errors(self, f): ext = os.path.splitext(f)[1] if ext in {'.py', '.recipe'}: - p = subprocess.Popen(['flake8-python2', '--filename', '*.py,*.recipe', f]) - return p.wait() != 0 - elif ext == '.pyj': + p1 = subprocess.Popen(['flake8-python2', '--filename', '*.py,*.recipe', f]) + p2 = subprocess.Popen(['flake8', '--filename', '*.py,*.recipe', f]) + codes = p1.wait(), p2.wait() + return codes != (0, 0) + if ext == '.pyj': p = subprocess.Popen(['rapydscript', 'lint', f]) return p.wait() != 0 - elif ext == '.yaml': + if ext == '.yaml': sys.path.insert(0, self.wn_path) import whats_new whats_new.render_changelog(self.j(self.d(self.SRC), 'Changelog.yaml')) diff --git a/src/calibre/customize/zipplugin.py b/src/calibre/customize/zipplugin.py index 91a567c43c..3a8b928346 100644 --- a/src/calibre/customize/zipplugin.py +++ b/src/calibre/customize/zipplugin.py @@ -16,7 +16,7 @@ from calibre.constants import ispy3 from calibre.customize import (Plugin, numeric_version, platform, InvalidPlugin, PluginNotFound) from polyglot.builtins import (itervalues, map, string_or_bytes, - unicode_type) + unicode_type, reload) # PEP 302 based plugin loading mechanism, works around the bug in zipimport in # python 2.x that prevents importing from zip files in locations whose paths diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py index f51fb88e90..ba4056e31e 100644 --- a/src/calibre/db/backend.py +++ b/src/calibre/db/backend.py @@ -17,7 +17,7 @@ from polyglot.builtins import (iteritems, itervalues, from calibre import isbytestring, force_unicode, prints, as_unicode from calibre.constants import (iswindows, filesystem_encoding, - preferred_encoding) + preferred_encoding, ispy3) from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.db import SPOOL_SIZE from calibre.db.schema_upgrades import SchemaUpgrade @@ -209,9 +209,14 @@ def Concatenate(sep=','): ctxt.append(value) def finalize(ctxt): - if not ctxt: - return None - return sep.join(ctxt) + try: + if not ctxt: + return None + return sep.join(ctxt) + except Exception: + import traceback + traceback.print_exc() + raise return ([], step, finalize) @@ -224,9 +229,14 @@ def SortedConcatenate(sep=','): ctxt[ndx] = value def finalize(ctxt): - if len(ctxt) == 0: - return None - return sep.join(map(ctxt.get, sorted(ctxt))) + try: + if len(ctxt) == 0: + return None + return sep.join(map(ctxt.get, sorted(ctxt))) + except Exception: + import traceback + traceback.print_exc() + raise return ({}, step, finalize) @@ -238,7 +248,12 @@ def IdentifiersConcat(): ctxt.append(u'%s:%s'%(key, val)) def finalize(ctxt): - return ','.join(ctxt) + try: + return ','.join(ctxt) + except Exception: + import traceback + traceback.print_exc() + raise return ([], step, finalize) @@ -251,13 +266,18 @@ def AumSortedConcatenate(): ctxt[ndx] = ':::'.join((author, sort, link)) def finalize(ctxt): - keys = list(ctxt) - l = len(keys) - if l == 0: - return None - if l == 1: - return ctxt[keys[0]] - return ':#:'.join([ctxt[v] for v in sorted(keys)]) + try: + keys = list(ctxt) + l = len(keys) + if l == 0: + return None + if l == 1: + return ctxt[keys[0]] + return ':#:'.join([ctxt[v] for v in sorted(keys)]) + except Exception: + import traceback + traceback.print_exc() + raise return ({}, step, finalize) @@ -1724,8 +1744,13 @@ class DB(object): [(book_id, fmt.upper()) for book_id in book_ids]) def set_conversion_options(self, options, fmt): - options = [(book_id, fmt.upper(), buffer(pickle_binary_string(data.encode('utf-8') if isinstance(data, unicode_type) else data))) - for book_id, data in iteritems(options)] + def map_data(x): + x = x.encode('utf-8') if isinstance(x, unicode_type) else x + x = pickle_binary_string(x) + if not ispy3: + x = buffer(x) # noqa + return x + options = [(book_id, fmt.upper(), map_data(data)) for book_id, data in iteritems(options)] self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options) def get_top_level_move_items(self, all_paths): diff --git a/src/calibre/db/locking.py b/src/calibre/db/locking.py index 9cb615b7c9..72438095b8 100644 --- a/src/calibre/db/locking.py +++ b/src/calibre/db/locking.py @@ -237,20 +237,20 @@ class DebugRWLockWrapper(RWLockWrapper): RWLockWrapper.__init__(self, *args, **kwargs) def acquire(self): - print ('#' * 120, file=sys.stderr) - print ('acquire called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr) + print('#' * 120, file=sys.stderr) + print('acquire called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr) traceback.print_stack() RWLockWrapper.acquire(self) - print ('acquire done: thread id:', current_thread(), file=sys.stderr) - print ('_' * 120, file=sys.stderr) + print('acquire done: thread id:', current_thread(), file=sys.stderr) + print('_' * 120, file=sys.stderr) def release(self, *args): - print ('*' * 120, file=sys.stderr) - print ('release called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr) + print('*' * 120, file=sys.stderr) + print('release called: thread id:', current_thread(), 'shared:', self._is_shared, file=sys.stderr) traceback.print_stack() RWLockWrapper.release(self) - print ('release done: thread id:', current_thread(), 'is_shared:', self._shlock.is_shared, 'is_exclusive:', self._shlock.is_exclusive, file=sys.stderr) - print ('_' * 120, file=sys.stderr) + print('release done: thread id:', current_thread(), 'is_shared:', self._shlock.is_shared, 'is_exclusive:', self._shlock.is_exclusive, file=sys.stderr) + print('_' * 120, file=sys.stderr) __enter__ = acquire __exit__ = release diff --git a/src/calibre/db/tests/main.py b/src/calibre/db/tests/main.py index c57c2c8b76..7f8c1a23d3 100644 --- a/src/calibre/db/tests/main.py +++ b/src/calibre/db/tests/main.py @@ -15,6 +15,7 @@ def find_tests(): base = os.path.dirname(os.path.abspath(__file__)) return find_tests_in_dir(base) + if __name__ == '__main__': try: import init_calibre # noqa diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 5c0a138d61..2dffd0d48f 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -712,3 +712,14 @@ class ReadingTest(BaseTest): cache.set_last_read_position(1, 'EPUB', 'user', 'device') self.assertFalse(cache.get_last_read_positions(1, 'ePuB', 'user')) # }}} + + def test_storing_conversion_options(self): # {{{ + cache = self.init_cache(self.library_path) + opts = {1: b'binary', 2: 'unicode'} + cache.set_conversion_options(opts, 'PIPE') + for book_id, val in iteritems(opts): + got = cache.conversion_options(book_id, 'PIPE') + if not isinstance(val, bytes): + val = val.encode('utf-8') + self.assertEqual(got, val) + # }}} diff --git a/src/calibre/devices/hanlin/driver.py b/src/calibre/devices/hanlin/driver.py index a8fdfe480c..641514b7f5 100644 --- a/src/calibre/devices/hanlin/driver.py +++ b/src/calibre/devices/hanlin/driver.py @@ -45,11 +45,11 @@ class HANLINV3(USBMS): card = names.get('carda', None) try: - main_num = int(re.findall('\d+', main)[0]) if main else None + main_num = int(re.findall(r'\d+', main)[0]) if main else None except: main_num = None try: - card_num = int(re.findall('\d+', card)[0]) if card else None + card_num = int(re.findall(r'\d+', card)[0]) if card else None except: card_num = None diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index 70af802a59..ec3bf53620 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -3081,7 +3081,6 @@ class KOBOTOUCH(KOBO): update_values.append(newmi.isbn) set_clause += ', ISBN = ? ' - library_language = normalize_languages(kobo_metadata.languages, newmi.languages) library_language = library_language[0] if library_language is not None and len(library_language) > 0 else None if not (library_language == kobo_metadata.language): diff --git a/src/calibre/devices/mtp/unix/driver.py b/src/calibre/devices/mtp/unix/driver.py index bff84a6320..332b589031 100644 --- a/src/calibre/devices/mtp/unix/driver.py +++ b/src/calibre/devices/mtp/unix/driver.py @@ -196,8 +196,8 @@ class MTP_DEVICE(MTPDeviceBase): p = plugins['libmtp'] self.libmtp = p[0] if self.libmtp is None: - print ('Failed to load libmtp, MTP device detection disabled') - print (p[1]) + print('Failed to load libmtp, MTP device detection disabled') + print(p[1]) else: self.known_devices = frozenset(self.libmtp.known_devices()) diff --git a/src/calibre/devices/prst1/driver.py b/src/calibre/devices/prst1/driver.py index 5b6995b88f..9e939d0ec0 100644 --- a/src/calibre/devices/prst1/driver.py +++ b/src/calibre/devices/prst1/driver.py @@ -143,7 +143,7 @@ class PRST1(USBMS): main, carda, cardb = self.find_device_nodes(detected_device=dev) if main is None and carda is None and cardb is None: if debug: - print ('\tPRS-T1: Appears to be in non data mode' + print('\tPRS-T1: Appears to be in non data mode' ' or was ejected, ignoring') return False return True diff --git a/src/calibre/devices/usbms/device.py b/src/calibre/devices/usbms/device.py index 3e2e57c933..5c5b28f2dc 100644 --- a/src/calibre/devices/usbms/device.py +++ b/src/calibre/devices/usbms/device.py @@ -701,7 +701,7 @@ class Device(DeviceConfig, DevicePlugin): except dbus.exceptions.DBusException as e: print(e) continue - except dbus.exceptions.DBusException as e: + except dbus.exceptions.DBusException: continue vols.sort(key=lambda x: x['node']) diff --git a/src/calibre/devices/winusb.py b/src/calibre/devices/winusb.py index 6b0809714c..a87914dfc1 100644 --- a/src/calibre/devices/winusb.py +++ b/src/calibre/devices/winusb.py @@ -773,7 +773,7 @@ def get_drive_letters_for_device_single(usbdev, storage_number_map, debug=False) if debug: try: devid = get_device_id(devinfo.DevInst)[0] - except Exception as err: + except Exception: devid = 'Unknown' try: storage_number = get_storage_number(devpath) diff --git a/src/calibre/ebooks/comic/__init__.py b/src/calibre/ebooks/comic/__init__.py index f47e177c26..b7738cb2dc 100644 --- a/src/calibre/ebooks/comic/__init__.py +++ b/src/calibre/ebooks/comic/__init__.py @@ -13,5 +13,6 @@ import sys def main(args=sys.argv): return 0 + if __name__ == '__main__': sys.exit(main()) diff --git a/src/calibre/ebooks/conversion/__init__.py b/src/calibre/ebooks/conversion/__init__.py index 86b581992f..b719d01766 100644 --- a/src/calibre/ebooks/conversion/__init__.py +++ b/src/calibre/ebooks/conversion/__init__.py @@ -23,6 +23,7 @@ class ConversionUserFeedBack(Exception): self.title, self.msg, self.det_msg = title, msg, det_msg self.level = level + # Ensure exception uses fully qualified name as this is used to detect it in # the GUI. ConversionUserFeedBack.__name__ = str('calibre.ebooks.conversion.ConversionUserFeedBack') diff --git a/src/calibre/ebooks/djvu/djvu.py b/src/calibre/ebooks/djvu/djvu.py index 03de80acde..bab1f9b147 100644 --- a/src/calibre/ebooks/djvu/djvu.py +++ b/src/calibre/ebooks/djvu/djvu.py @@ -46,10 +46,10 @@ class DjvuChunk(object): # self.headersize += 4 self.datastart = pos if verbose > 0: - print ('found', self.type, self.subtype, pos, self.size) + print('found', self.type, self.subtype, pos, self.size) if self.type in b'FORM'.split(): if verbose > 0: - print ('processing substuff %d %d (%x)' % (pos, self.dataend, + print('processing substuff %d %d (%x)' % (pos, self.dataend, self.dataend)) numchunks = 0 while pos < self.dataend: @@ -58,11 +58,11 @@ class DjvuChunk(object): self._subchunks.append(x) newpos = pos + x.size + x.headersize + (1 if (x.size % 2) else 0) if verbose > 0: - print ('newpos %d %d (%x, %x) %d' % (newpos, self.dataend, + print('newpos %d %d (%x, %x) %d' % (newpos, self.dataend, newpos, self.dataend, x.headersize)) pos = newpos if verbose > 0: - print (' end of chunk %d (%x)' % (pos, pos)) + print(' end of chunk %d (%x)' % (pos, pos)) def dump(self, verbose=0, indent=1, out=None, txtout=None, maxlevel=100): if out: @@ -89,7 +89,7 @@ class DjvuChunk(object): l <<= 8 l += ord(x) if verbose > 0 and out: - print (l, file=out) + print(l, file=out) txtout.write(res[3:3+l]) txtout.write(b'\037') if txtout and self.type == b'TXTa': @@ -99,7 +99,7 @@ class DjvuChunk(object): l <<= 8 l += ord(x) if verbose > 0 and out: - print (l, file=out) + print(l, file=out) txtout.write(res[3:3+l]) txtout.write(b'\037') if indent >= maxlevel: @@ -126,7 +126,8 @@ class DJVUFile(object): def main(): f = DJVUFile(open(sys.argv[-1], 'rb')) - print (f.get_text(sys.stdout)) + print(f.get_text(sys.stdout)) + if __name__ == '__main__': main() diff --git a/src/calibre/ebooks/djvu/djvubzzdec.py b/src/calibre/ebooks/djvu/djvubzzdec.py index 33538a30ad..df79e88789 100644 --- a/src/calibre/ebooks/djvu/djvubzzdec.py +++ b/src/calibre/ebooks/djvu/djvubzzdec.py @@ -735,9 +735,9 @@ class BZZDecoder(): def main(): import sys from calibre.constants import plugins - raw = file(sys.argv[1], "rb").read() + raw = open(sys.argv[1], "rb").read() d = plugins['bzzdec'][0] - print (d.decompress(raw)) + print(d.decompress(raw)) if __name__ == "__main__": diff --git a/src/calibre/ebooks/epub/cfi/parse.py b/src/calibre/ebooks/epub/cfi/parse.py index 25c4f688af..ce7450b4aa 100644 --- a/src/calibre/ebooks/epub/cfi/parse.py +++ b/src/calibre/ebooks/epub/cfi/parse.py @@ -197,7 +197,7 @@ def cfi_sort_key(cfi, only_path=True): return () if not pcfi: import sys - print ('Failed to parse CFI: %r' % pcfi, file=sys.stderr) + print('Failed to parse CFI: %r' % pcfi, file=sys.stderr) return () steps = get_steps(pcfi) step_nums = tuple(s.get('num', 0) for s in steps) @@ -217,7 +217,7 @@ def decode_cfi(root, cfi): return if not pcfi: import sys - print ('Failed to parse CFI: %r' % pcfi, file=sys.stderr) + print('Failed to parse CFI: %r' % pcfi, file=sys.stderr) return steps = get_steps(pcfi) ans = root diff --git a/src/calibre/ebooks/lit/maps/html.py b/src/calibre/ebooks/lit/maps/html.py index 6403ab2e4f..c144d55ea8 100644 --- a/src/calibre/ebooks/lit/maps/html.py +++ b/src/calibre/ebooks/lit/maps/html.py @@ -380,7 +380,7 @@ ATTRS35 = { 0x804a: "align", 0x8bbd: "palette", 0x8bbe: "pluginspage", - 0x8bbf: "codebase", + # 0x8bbf: "codebase", 0x8bbf: "src", 0x8bc1: "units", 0x8bc2: "type", @@ -640,7 +640,7 @@ ATTRS66 = { 0x03f5: "n", } ATTRS71 = { - 0x8000: "border", + # 0x8000: "border", 0x8000: "usemap", 0x8001: "name", 0x8006: "width", @@ -682,8 +682,8 @@ ATTRS74 = { 0x9399: "clear", } ATTRS75 = { - 0x8000: "name", - 0x8000: "value", + # 0x8000: "name", + # 0x8000: "value", 0x8000: "type", } ATTRS76 = { diff --git a/src/calibre/ebooks/lrf/html/color_map.py b/src/calibre/ebooks/lrf/html/color_map.py index c68682aa7b..9c74c61018 100644 --- a/src/calibre/ebooks/lrf/html/color_map.py +++ b/src/calibre/ebooks/lrf/html/color_map.py @@ -96,8 +96,8 @@ NAME_MAP = { u'yellowgreen': u'#9ACD32' } -hex_pat = re.compile('#(\d{2})(\d{2})(\d{2})') -rgb_pat = re.compile('rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) +hex_pat = re.compile(r'#(\d{2})(\d{2})(\d{2})') +rgb_pat = re.compile(r'rgb\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\)', re.IGNORECASE) def lrs_color(html_color): @@ -111,5 +111,3 @@ def lrs_color(html_color): if hcol in NAME_MAP: return NAME_MAP[hcol].replace('#', '0x00') return '0x00000000' - - diff --git a/src/calibre/ebooks/lrf/pylrs/pylrf.py b/src/calibre/ebooks/lrf/pylrs/pylrf.py index d0e58f7f59..6c635d2095 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrf.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrf.py @@ -490,7 +490,7 @@ class LrfFileStream(LrfStreamBase): def __init__(self, streamFlags, filename): LrfStreamBase.__init__(self, streamFlags) - f = file(filename, "rb") + f = open(filename, "rb") self.streamData = f.read() f.close() @@ -686,7 +686,7 @@ class LrfWriter(object): self.tocObjId = obj.objId def setThumbnailFile(self, filename, encoding=None): - f = file(filename, "rb") + f = open(filename, "rb") self.thumbnailData = f.read() f.close() diff --git a/src/calibre/ebooks/lrf/pylrs/pylrs.py b/src/calibre/ebooks/lrf/pylrs/pylrs.py index f13e703a0b..ebe3aa6e37 100644 --- a/src/calibre/ebooks/lrf/pylrs/pylrs.py +++ b/src/calibre/ebooks/lrf/pylrs/pylrs.py @@ -2268,7 +2268,7 @@ class ImageStream(LrsObject, LrsContainer): self.encoding = encoding def toLrf(self, lrfWriter): - imageFile = file(self.filename, "rb") + imageFile = open(self.filename, "rb") imageData = imageFile.read() imageFile.close() diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index ceba4b7708..bca0cd91ad 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -77,7 +77,8 @@ def set_metadata(stream, mi): stream.seek(0) safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements) + if __name__ == '__main__': import sys with open(sys.argv[-1], 'rb') as stream: - print (get_metadata(stream)) + print(get_metadata(stream)) diff --git a/src/calibre/ebooks/metadata/opf3_test.py b/src/calibre/ebooks/metadata/opf3_test.py index 8a7ce4ffe0..2e9b913500 100644 --- a/src/calibre/ebooks/metadata/opf3_test.py +++ b/src/calibre/ebooks/metadata/opf3_test.py @@ -376,7 +376,7 @@ class TestOPF3(unittest.TestCase): "value", "#value#": "<div><b><i>Testing</i></b> extra <font - color=\"#aa0000\">comments</font></div>", + color=\\"#aa0000\\">comments</font></div>", "is_custom": true, "label": "commetns", "table": "custom_column_13", "is_multiple": null, "is_category": false}"/> diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index bb0287b633..3bebaed774 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -353,7 +353,7 @@ class Worker(Thread): # Get details {{{ with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4())) + '_', suffix='.html', delete=False) as f: f.write(raw) - print ('Downloaded html for', asin, 'saved in', f.name) + print('Downloaded html for', asin, 'saved in', f.name) try: title = self.parse_title(root) @@ -1256,7 +1256,7 @@ class Amazon(Source): with tempfile.NamedTemporaryFile(prefix='amazon_results_', suffix='.html', delete=False) as f: f.write(raw.encode('utf-8')) - print ('Downloaded html for results page saved in', f.name) + print('Downloaded html for results page saved in', f.name) matches = [] found = '<title>404 - ' not in raw diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py index 61b250e1db..baa1cc720b 100644 --- a/src/calibre/ebooks/metadata/sources/cli.py +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -82,7 +82,7 @@ def main(args=sys.argv): allowed_plugins=allowed_plugins or None) if not results: - print (log, file=sys.stderr) + print(log, file=sys.stderr) prints('No results found', file=sys.stderr) raise SystemExit(1) result = results[0] @@ -103,9 +103,9 @@ def main(args=sys.argv): unicode_type(result).encode('utf-8')) if opts.verbose: - print (log, file=sys.stderr) + print(log, file=sys.stderr) - print (result) + print(result) if not opts.opf and opts.cover: prints('Cover :', cf) diff --git a/src/calibre/ebooks/mobi/__init__.py b/src/calibre/ebooks/mobi/__init__.py index 01bddd59f2..add994ba37 100644 --- a/src/calibre/ebooks/mobi/__init__.py +++ b/src/calibre/ebooks/mobi/__init__.py @@ -8,8 +8,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' class MobiError(Exception): pass + # That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW MAX_THUMB_SIZE = 16 * 1024 MAX_THUMB_DIMEN = (180, 240) - - diff --git a/src/calibre/ebooks/mobi/debug/mobi6.py b/src/calibre/ebooks/mobi/debug/mobi6.py index 30d206efa1..5884fc1069 100644 --- a/src/calibre/ebooks/mobi/debug/mobi6.py +++ b/src/calibre/ebooks/mobi/debug/mobi6.py @@ -276,7 +276,7 @@ class Tag(object): # {{{ if tag_type in self.TAG_MAP: self.attr, self.desc = self.TAG_MAP[tag_type] else: - print ('Unknown tag value: %%s'%tag_type) + print('Unknown tag value: %%s'%tag_type) self.desc = '??Unknown (tag value: %d)'%tag_type self.attr = 'unknown' @@ -461,7 +461,7 @@ class CNCX(object): # {{{ except: byts = raw[pos:] r = format_bytes(byts) - print ('CNCX entry at offset %d has unknown format %s'%( + print('CNCX entry at offset %d has unknown format %s'%( pos+record_offset, r)) self.records[pos+record_offset] = r pos = len(raw) @@ -629,7 +629,7 @@ class TBSIndexing(object): # {{{ import traceback traceback.print_exc() a = [] - print ('Failed to decode TBS bytes for record: %d'%r.idx) + print('Failed to decode TBS bytes for record: %d'%r.idx) ans += a if byts: sbyts = tuple(hex(b)[2:] for b in byts) @@ -789,14 +789,14 @@ class MOBIFile(object): # {{{ self.index_record.indices, self.mobi_header.type_raw) def print_header(self, f=sys.stdout): - print (str(self.palmdb).encode('utf-8'), file=f) - print (file=f) - print ('Record headers:', file=f) + print(str(self.palmdb).encode('utf-8'), file=f) + print(file=f) + print('Record headers:', file=f) for i, r in enumerate(self.records): - print ('%6d. %s'%(i, r.header), file=f) + print('%6d. %s'%(i, r.header), file=f) - print (file=f) - print (str(self.mobi_header).encode('utf-8'), file=f) + print(file=f) + print(str(self.mobi_header).encode('utf-8'), file=f) # }}} diff --git a/src/calibre/ebooks/mobi/langcodes.py b/src/calibre/ebooks/mobi/langcodes.py index 7d5bef3ccb..16be630f18 100644 --- a/src/calibre/ebooks/mobi/langcodes.py +++ b/src/calibre/ebooks/mobi/langcodes.py @@ -23,7 +23,7 @@ main_language = { 2 : "BULGARIAN", 3 : "CATALAN", 4 : "CHINESE", - 26 : "CROATIAN", + # 26 : "CROATIAN", 5 : "CZECH", 6 : "DANISH", 19 : "DUTCH", @@ -91,55 +91,55 @@ main_language = { sub_language = { 0 : "NEUTRAL", - 1 : "ARABIC_SAUDI_ARABIA", - 2 : "ARABIC_IRAQ", - 3 : "ARABIC_EGYPT", - 4 : "ARABIC_LIBYA", - 5 : "ARABIC_ALGERIA", - 6 : "ARABIC_MOROCCO", - 7 : "ARABIC_TUNISIA", - 8 : "ARABIC_OMAN", - 9 : "ARABIC_YEMEN", - 10 : "ARABIC_SYRIA", - 11 : "ARABIC_JORDAN", - 12 : "ARABIC_LEBANON", - 13 : "ARABIC_KUWAIT", - 14 : "ARABIC_UAE", - 15 : "ARABIC_BAHRAIN", - 16 : "ARABIC_QATAR", - 1 : "AZERI_LATIN", - 2 : "AZERI_CYRILLIC", - 1 : "CHINESE_TRADITIONAL", - 2 : "CHINESE_SIMPLIFIED", - 3 : "CHINESE_HONGKONG", - 4 : "CHINESE_SINGAPORE", - 1 : "DUTCH", - 2 : "DUTCH_BELGIAN", - 1 : "FRENCH", - 2 : "FRENCH_BELGIAN", - 3 : "FRENCH_CANADIAN", - 4 : "FRENCH_SWISS", - 5 : "FRENCH_LUXEMBOURG", - 6 : "FRENCH_MONACO", - 1 : "GERMAN", - 2 : "GERMAN_SWISS", - 3 : "GERMAN_AUSTRIAN", - 4 : "GERMAN_LUXEMBOURG", - 5 : "GERMAN_LIECHTENSTEIN", - 1 : "ITALIAN", - 2 : "ITALIAN_SWISS", - 1 : "KOREAN", - 1 : "LITHUANIAN", - 1 : "MALAY_MALAYSIA", - 2 : "MALAY_BRUNEI_DARUSSALAM", - 1 : "NORWEGIAN_BOKMAL", - 2 : "NORWEGIAN_NYNORSK", - 2 : "PORTUGUESE", - 1 : "PORTUGUESE_BRAZILIAN", - 2 : "SERBIAN_LATIN", + # 1 : "ARABIC_SAUDI_ARABIA", + # 2 : "ARABIC_IRAQ", + # 3 : "ARABIC_EGYPT", + # 4 : "ARABIC_LIBYA", + # 5 : "ARABIC_ALGERIA", + # 6 : "ARABIC_MOROCCO", + # 7 : "ARABIC_TUNISIA", + # 8 : "ARABIC_OMAN", + # 9 : "ARABIC_YEMEN", + # 10 : "ARABIC_SYRIA", + # 11 : "ARABIC_JORDAN", + # 12 : "ARABIC_LEBANON", + # 13 : "ARABIC_KUWAIT", + # 14 : "ARABIC_UAE", + # 15 : "ARABIC_BAHRAIN", + # 16 : "ARABIC_QATAR", + # 1 : "AZERI_LATIN", + # 2 : "AZERI_CYRILLIC", + # 1 : "CHINESE_TRADITIONAL", + # 2 : "CHINESE_SIMPLIFIED", + # 3 : "CHINESE_HONGKONG", + # 4 : "CHINESE_SINGAPORE", + # 1 : "DUTCH", + # 2 : "DUTCH_BELGIAN", + # 1 : "FRENCH", + # 2 : "FRENCH_BELGIAN", + # 3 : "FRENCH_CANADIAN", + # 4 : "FRENCH_SWISS", + # 5 : "FRENCH_LUXEMBOURG", + # 6 : "FRENCH_MONACO", + # 1 : "GERMAN", + # 2 : "GERMAN_SWISS", + # 3 : "GERMAN_AUSTRIAN", + # 4 : "GERMAN_LUXEMBOURG", + # 5 : "GERMAN_LIECHTENSTEIN", + # 1 : "ITALIAN", + # 2 : "ITALIAN_SWISS", + # 1 : "KOREAN", + # 1 : "LITHUANIAN", + # 1 : "MALAY_MALAYSIA", + # 2 : "MALAY_BRUNEI_DARUSSALAM", + # 1 : "NORWEGIAN_BOKMAL", + # 2 : "NORWEGIAN_NYNORSK", + # 2 : "PORTUGUESE", + # 1 : "PORTUGUESE_BRAZILIAN", + # 2 : "SERBIAN_LATIN", 3 : "SERBIAN_CYRILLIC", - 1 : "SPANISH", - 2 : "SPANISH_MEXICAN", + # 1 : "SPANISH", + # 2 : "SPANISH_MEXICAN", 4 : "SPANISH_GUATEMALA", 5 : "SPANISH_COSTA_RICA", 6 : "SPANISH_PANAMA", @@ -157,8 +157,8 @@ sub_language = { 18 : "SPANISH_HONDURAS", 19 : "SPANISH_NICARAGUA", 20 : "SPANISH_PUERTO_RICO", - 1 : "SWEDISH", - 2 : "SWEDISH_FINLAND", + # 1 : "SWEDISH", + # 2 : "SWEDISH_FINLAND", 1 : "UZBEK_LATIN", 2 : "UZBEK_CYRILLIC", } diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py index 7b6cf31a1a..be36f5e54a 100644 --- a/src/calibre/ebooks/mobi/writer8/skeleton.py +++ b/src/calibre/ebooks/mobi/writer8/skeleton.py @@ -15,9 +15,8 @@ from xml.sax.saxutils import escape from lxml import etree from calibre.ebooks.oeb.base import XHTML_NS, extract -from calibre.constants import ispy3 from calibre.ebooks.mobi.utils import to_base -from polyglot.builtins import iteritems, unicode_type +from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr as mychr CHUNK_SIZE = 8192 @@ -61,9 +60,6 @@ def node_from_path(root, path): return parent -mychr = chr if ispy3 else unichr - - def tostring(raw, **kwargs): ''' lxml *sometimes* represents non-ascii characters as hex entities in attribute values. I can't figure out exactly what circumstances cause it. diff --git a/src/calibre/ebooks/oeb/display/test-cfi/run.py b/src/calibre/ebooks/oeb/display/test-cfi/run.py index a2bf830112..259cc10573 100644 --- a/src/calibre/ebooks/oeb/display/test-cfi/run.py +++ b/src/calibre/ebooks/oeb/display/test-cfi/run.py @@ -22,6 +22,6 @@ def run_devel_server(): os.chdir(os.path.dirname(os.path.abspath(__file__))) serve(resources={'cfi.coffee':'../cfi.coffee', '/':'index.html'}) + if __name__ == '__main__': run_devel_server() - diff --git a/src/calibre/ebooks/oeb/polish/parsing.py b/src/calibre/ebooks/oeb/polish/parsing.py index d92ebeb382..f02336249d 100644 --- a/src/calibre/ebooks/oeb/polish/parsing.py +++ b/src/calibre/ebooks/oeb/polish/parsing.py @@ -95,5 +95,5 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N if __name__ == '__main__': from lxml import etree root = parse_html5('\n<html><head><title>a\n

 \nb', discard_namespaces=False) - print (etree.tostring(root, encoding='utf-8')) + print(etree.tostring(root, encoding='utf-8')) print() diff --git a/src/calibre/ebooks/oeb/polish/tests/main.py b/src/calibre/ebooks/oeb/polish/tests/main.py index f0948c1a05..5e30c441a3 100644 --- a/src/calibre/ebooks/oeb/polish/tests/main.py +++ b/src/calibre/ebooks/oeb/polish/tests/main.py @@ -14,6 +14,7 @@ def find_tests(): base = os.path.dirname(os.path.abspath(__file__)) return find_tests_in_dir(base) + if __name__ == '__main__': try: import init_calibre # noqa diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index 378e4998c2..c950f866a5 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -8,6 +8,7 @@ __docformat__ = 'restructuredtext en' class PDBError(Exception): pass + FORMAT_READERS = None @@ -31,6 +32,7 @@ def _import_readers(): 'BOOKMTIU': haodoo_reader, } + ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'} FORMAT_WRITERS = None @@ -47,6 +49,7 @@ def _import_writers(): 'ereader': ereader_writer, } + IDENTITY_TO_NAME = { 'PNPdPPrs': 'eReader', 'PNRdPPrs': 'eReader', @@ -100,4 +103,3 @@ def get_writer(extension): if FORMAT_WRITERS is None: _import_writers() return FORMAT_WRITERS.get(extension, None) - diff --git a/src/calibre/ebooks/pdf/pageoptions.py b/src/calibre/ebooks/pdf/pageoptions.py index d50378ccfe..4cc3effe86 100644 --- a/src/calibre/ebooks/pdf/pageoptions.py +++ b/src/calibre/ebooks/pdf/pageoptions.py @@ -19,6 +19,7 @@ UNITS = { def unit(unit): return UNITS.get(unit, QPrinter.Inch) + PAPER_SIZES = { 'a0' : QPrinter.A0, # 841 x 1189 mm 'a1' : QPrinter.A1, # 594 x 841 mm @@ -57,6 +58,7 @@ PAPER_SIZES = { def paper_size(size): return PAPER_SIZES.get(size, QPrinter.Letter) + ORIENTATIONS = { 'portrait' : QPrinter.Portrait, 'landscape' : QPrinter.Landscape, diff --git a/src/calibre/ebooks/readability/cleaners.py b/src/calibre/ebooks/readability/cleaners.py index 4c98971346..057fcf17b3 100644 --- a/src/calibre/ebooks/readability/cleaners.py +++ b/src/calibre/ebooks/readability/cleaners.py @@ -28,6 +28,7 @@ def normalize_spaces(s): characters with a single space""" return ' '.join(s.split()) + html_cleaner = Cleaner(scripts=True, javascript=True, comments=True, style=True, links=True, meta=False, add_nofollow=False, page_structure=False, processing_instructions=True, embedded=False, diff --git a/src/calibre/ebooks/readability/debug.py b/src/calibre/ebooks/readability/debug.py index 77711270ae..103bb5f9f0 100644 --- a/src/calibre/ebooks/readability/debug.py +++ b/src/calibre/ebooks/readability/debug.py @@ -4,6 +4,7 @@ def save_to_file(text, filename): f.write(text.encode('utf-8')) f.close() + uids = {} diff --git a/src/calibre/ebooks/readability/readability.py b/src/calibre/ebooks/readability/readability.py index 7c20851b41..b7cefa1ad9 100644 --- a/src/calibre/ebooks/readability/readability.py +++ b/src/calibre/ebooks/readability/readability.py @@ -504,7 +504,7 @@ def main(): enc = sys.__stdout__.encoding or 'utf-8' if options.verbose: default_log.filter_level = default_log.DEBUG - print (Document(raw, default_log, + print(Document(raw, default_log, debug=options.verbose, keep_elements=options.keep_elements).summary().encode(enc, 'replace')) diff --git a/src/calibre/ebooks/rtf/preprocess.py b/src/calibre/ebooks/rtf/preprocess.py index ecbacddcb1..aae0a1502d 100644 --- a/src/calibre/ebooks/rtf/preprocess.py +++ b/src/calibre/ebooks/rtf/preprocess.py @@ -367,7 +367,7 @@ class RtfTokenizer(): if __name__ == "__main__": import sys if len(sys.argv) < 2: - print ("Usage %prog rtfFileToConvert") + print("Usage %prog rtfFileToConvert") sys.exit() f = open(sys.argv[1], 'rb') data = f.read() @@ -381,5 +381,3 @@ if __name__ == "__main__": f = open(sys.argv[1], 'w') f.write(data) f.close() - - diff --git a/src/calibre/ebooks/rtf2xml/get_char_map.py b/src/calibre/ebooks/rtf2xml/get_char_map.py index 07ba40b981..4ce42e37c8 100755 --- a/src/calibre/ebooks/rtf2xml/get_char_map.py +++ b/src/calibre/ebooks/rtf2xml/get_char_map.py @@ -36,7 +36,7 @@ class GetCharMap: def get_char_map(self, map): # if map == 'ansicpg10000': - # map = 'mac_roman' + # map = 'mac_roman' found_map = False map_dict = {} self.__char_file.seek(0) @@ -59,4 +59,3 @@ class GetCharMap: msg = 'no map found\nmap is "%s"\n'%(map,) raise self.__bug_handler(msg) return map_dict - diff --git a/src/calibre/ebooks/rtf2xml/paragraphs.py b/src/calibre/ebooks/rtf2xml/paragraphs.py index be4e2d8669..b7c4086a5f 100755 --- a/src/calibre/ebooks/rtf2xml/paragraphs.py +++ b/src/calibre/ebooks/rtf2xml/paragraphs.py @@ -31,11 +31,11 @@ class Paragraphs: In order to make paragraphs out of this limited info, the parser starts in the body of the documents and assumes it is not in a paragraph. It looks for clues to begin a paragraph. Text starts a paragraph; so does an inline field or - list-text. If an end of paragraph marker (\par) is found, then this indicates + list-text. If an end of paragraph marker (\\par) is found, then this indicates a blank paragraph. Once a paragraph is found, the state changes to 'paragraph.' In this state, clues are looked to for the end of a paragraph. The end of a paragraph marker - (\par) marks the end of a paragraph. So does the end of a footnote or heading; + (\\par) marks the end of a paragraph. So does the end of a footnote or heading; a paragraph definition; the end of a field-block; and the beginning of a section. (How about the end of a section or the end of a field-block?) """ @@ -224,7 +224,7 @@ class Paragraphs: Returns: nothing Logic: - if a \pard occurs in a paragraph, I want to ignore it. (I believe) + if a \\pard occurs in a paragraph, I want to ignore it. (I believe) """ self.__write_obj.write('mi 1: if len(args) < 4: - print ('You must specify the from address, to address and body text' + print('You must specify the from address, to address and body text' ' on the command line') return 1 msg = compose_mail(args[1], args[2], args[3], subject=opts.subject, diff --git a/src/calibre/utils/smtplib.py b/src/calibre/utils/smtplib.py index 42ff37d3b0..34634fbffa 100755 --- a/src/calibre/utils/smtplib.py +++ b/src/calibre/utils/smtplib.py @@ -892,14 +892,14 @@ if __name__ == '__main__': fromaddr = prompt("From") toaddrs = prompt("To").split(',') - print ("Enter message, end with ^D:") + print("Enter message, end with ^D:") msg = '' while 1: line = sys.stdin.readline() if not line: break msg = msg + line - print ("Message length is %d" % len(msg)) + print("Message length is %d" % len(msg)) server = SMTP('localhost') server.set_debuglevel(1) diff --git a/src/calibre/utils/socket_inheritance.py b/src/calibre/utils/socket_inheritance.py index 4d73045feb..cc771f9120 100644 --- a/src/calibre/utils/socket_inheritance.py +++ b/src/calibre/utils/socket_inheritance.py @@ -75,7 +75,8 @@ def test(): set_socket_inherit(s, orig ^ True) if orig == get_socket_inherit(s): raise RuntimeError('Failed to change socket inheritance status') - print ('OK!') + print('OK!') + if __name__ == '__main__': test() diff --git a/src/calibre/utils/wmf/parse.py b/src/calibre/utils/wmf/parse.py index 83cae92e73..4573e71044 100644 --- a/src/calibre/utils/wmf/parse.py +++ b/src/calibre/utils/wmf/parse.py @@ -219,9 +219,9 @@ def wmf_unwrap(wmf_data, verbose=0): raise ValueError('No raster image found in the WMF') return w.to_png() + if __name__ == '__main__': wmf = WMF(verbose=4) wmf(open(sys.argv[-1], 'rb')) open('/t/test.bmp', 'wb').write(wmf.bitmaps[0]) open('/t/test.png', 'wb').write(wmf.to_png()) - diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py index b1c5bb5281..bf6e66bcec 100644 --- a/src/calibre/utils/zipfile.py +++ b/src/calibre/utils/zipfile.py @@ -1037,9 +1037,9 @@ class ZipFile: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) if fname != zinfo.orig_filename: - print (('WARNING: Header (%r) and directory (%r) filenames do not' + print(('WARNING: Header (%r) and directory (%r) filenames do not' ' match inside ZipFile')%(fname, zinfo.orig_filename)) - print ('Using directory filename %r'%zinfo.orig_filename) + print('Using directory filename %r'%zinfo.orig_filename) # raise BadZipfile, \ # 'File name in directory "%r" and header "%r" differ.' % ( # zinfo.orig_filename, fname) diff --git a/src/calibre/web/fetch/utils.py b/src/calibre/web/fetch/utils.py index e57a0882e8..b1002e9508 100644 --- a/src/calibre/web/fetch/utils.py +++ b/src/calibre/web/fetch/utils.py @@ -47,6 +47,7 @@ def prepare_masthead_image(path_to_image, out_path, mi_width, mi_height): with lopen(out_path, 'wb') as f: f.write(image_to_data(img)) + if __name__ == '__main__': import sys data = sys.stdin.read() diff --git a/src/polyglot/builtins.py b/src/polyglot/builtins.py index 6616d5173c..40378719c2 100644 --- a/src/polyglot/builtins.py +++ b/src/polyglot/builtins.py @@ -69,6 +69,10 @@ if is_py3: def int_to_byte(x): return bytes((x,)) + def reload(module): + import importlib + return importlib.reload(module) + else: exec("""def reraise(tp, value, tb=None): try: @@ -106,3 +110,6 @@ else: if isinstance(x, unicode_type): x = x.encode('utf-8') return x + + def reload(module): + return builtins.reload(module)