diff --git a/recipes/icons/thenewcriterion.png b/recipes/icons/thenewcriterion.png new file mode 100644 index 0000000000..7fef5dfd97 Binary files /dev/null and b/recipes/icons/thenewcriterion.png differ diff --git a/recipes/thenewcriterion.recipe b/recipes/thenewcriterion.recipe new file mode 100644 index 0000000000..9bb281aa4f --- /dev/null +++ b/recipes/thenewcriterion.recipe @@ -0,0 +1,112 @@ +# -*- mode: python -*- +# -*- coding: utf-8 -*- +# vi: set fenc=utf-8 ft=python : +# kate: encoding utf-8; syntax python; + +__license__ = 'GPL v3' +__copyright__ = '2019, Darko Miletic ' +''' +www.newcriterion.com +''' + +import urllib +import urllib2 +import re +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ptempfile import PersistentTemporaryFile + + +class TheNewCriterion(BasicNewsRecipe): + title = 'The New Criterion' + __author__ = 'Darko Miletic' + description = 'On the front lines of the battle for culture' + publisher = 'The Foundation for Cultural Review' + category = 'art, politics, USA, world' + oldest_article = 40 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'en' + remove_empty_feeds = True + publication_type = 'magazine' + needs_subscription = 'optional' + delay = 1 + simultaneous_downloads = 1 + timeout = 8 + ignore_duplicate_articles = {'url'} + articles_are_obfuscated = True + temp_files = [] + fetch_retries = 10 + auto_cleanup = True + masthead_url = 'https://www.newcriterion.com/themes/thenewcriterion/assets/img/horizontal-logo.svg' + extra_css = """ + body{font-family: Galliard, serif} + """ + + conversion_options = { + 'comment': description, 'tags': category, 'publisher': publisher, 'language': language + } + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + br.open('https://www.newcriterion.com/') + if self.username is not None and self.password is not None: + data = urllib.urlencode({'login': self.username, 'password': self.password}) + header = { + 'X-OCTOBER-REQUEST-HANDLER': 'onSignin', + 'X-Requested-With': 'XMLHttpRequest', + 'DNT':'1', + 'X-OCTOBER-REQUEST-PARTIALS':'', + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' + } + request = urllib2.Request('https://www.newcriterion.com/', data, header) + br.open(request) + return br + + def parse_index(self): + part = strftime('/issues/%Y/') + str(int(strftime('%m'))) + partf = part + '/' + currentIssue_url = 'https://www.newcriterion.com' + part + soup1 = self.index_to_soup(currentIssue_url) + self.log(currentIssue_url) + rsr = re.compile('^' + partf + '.+$') + date = strftime(' %B %Y') + articles = [] + subset = soup1.find('div', id='main') + for item in subset.findAll('a', href=True): + relurl = str(item['href']) + if rsr.search(relurl): + title = '' + description = '' + if item.find('div'): + title = self.tag_to_string(item.div.h1).strip() + description = self.tag_to_string(item.div.p) + else: + title = self.tag_to_string(item.h1).strip() + description = self.tag_to_string(item.p) + articles.append({ + 'title': title, + 'date': date, + 'url': 'https://www.newcriterion.com' + relurl, + 'description': description + }) + return [(self.title, articles)] + + def get_obfuscated_article(self, url): + result = None + count = 0 + while (count < self.fetch_retries): + try: + response = self.browser.open(url, timeout=self.timeout) + html = response.read() + count = self.fetch_retries + tfile = PersistentTemporaryFile('_fa.html') + tfile.write(html) + tfile.close() + self.temp_files.append(tfile) + result = tfile.name + except: + print("Retrying download...") + count += 1 + return result