diff --git a/resources/images/news/motherjones.png b/resources/images/news/motherjones.png new file mode 100644 index 0000000000..45f06e7d3b Binary files /dev/null and b/resources/images/news/motherjones.png differ diff --git a/resources/recipes/motherjones.recipe b/resources/recipes/motherjones.recipe new file mode 100644 index 0000000000..82404e7802 --- /dev/null +++ b/resources/recipes/motherjones.recipe @@ -0,0 +1,103 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag +from calibre.ptempfile import PersistentTemporaryFile + +class MotherJonesRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'en' + version = 1 + + title = u'Mother Jones' + publisher = u'Mother Jones' + category = u'News, Investigative journalism' + description = u'Independent investigative, political, and social justice reporting. Takes no prisoners, cleaves to no dogma, and tells it like it is.' + + oldest_article = 14 + max_articles_per_feed = 100 + use_embedded_content = False + + remove_empty_feeds = True + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 3 + + keep_only_tags = [] + keep_only_tags.append(dict(name = 'h1')) + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'dek'})) + keep_only_tags.append(dict(name = 'p', attrs = {'class': 'submitted'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class': 'print-content'})) + #keep_only_tags.append(dict(name = '', attrs = {'': ''})) + + remove_tags = [] + remove_tags.append(dict(name = 'base')) + #remove_tags.append(dict(name = '', attrs = {'': ''})) + + remove_attributes = ['style'] + + # feeds from http://motherjones.com/about/rss + feeds = [] + feeds.append((u'Latest News', u'http://feeds.feedburner.com/motherjones/main?format=xml')) + feeds.append((u'Politics & Current Affairs', u'http://motherjones.com/rss/sections/Politics/feed&format=xml')) + feeds.append((u'Environment & Health', u'http://motherjones.com/rss/sections/Environment/feed')) + feeds.append((u'Media & Culture', u'http://motherjones.com/rss/sections/Media/feed')) + feeds.append((u'Blog: Kevin Drum', u'http://motherjones.com/rss/blogs/Kevin+Drum/feed')) + feeds.append((u'Blog: MoJo Blog', u'http://motherjones.com/rss/blogs/mojo/feed')) + feeds.append((u'Blog: Blue Marble', u'http://motherjones.com/rss/blogs/Blue+Marble/feed')) + feeds.append((u'Blog: The Riff', u'http://motherjones.com/rss/blogs/Riff/feed')) + ##feeds.append((u'', u'')) + + extra_css = ''' + body{font-family:verdana,arial,helvetica,geneva,sans-serif;} + img {float: left; margin-right: 0.5em;} + div.dek {font-style: italic;} + p.submitted {font-size: x-small; color: #696969;} + div.mj_support {font-size: x-small; color: #0666666; border: 1px solid black; padding: 0.5em} + a, a[href] {text-decoration: none; color: blue;} + ''' + + conversion_options = {'comments': description, 'tags': category, 'language': 'en', + 'publisher': publisher} + + temp_files = [] + articles_are_obfuscated = True + + def get_obfuscated_article(self, url): + ''' + The print version is sort of hard to get. I think they look at the referer header, and if + it is not right they serve the original. This method works around that. + ''' + br = self.get_browser() + br.open(url) + + response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0) + html = response.read() + + self.temp_files.append(PersistentTemporaryFile('_motherjones.html')) + self.temp_files[-1].write(html) + self.temp_files[-1].close() + + return self.temp_files[-1].name + + def get_article_url(self, article): + ''' + Some of the feeds are served by feedburner (grr). Then the workaround to get their + print version doesn't work anymore. This method provides a workaround. + ''' + if hasattr(article, 'feedburner_origlink'): + return article.feedburner_origlink + else: + return article.link + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs = {'src': True}): + if not img['src'].startswith('http://'): + img['src'] = 'http://motherjones.com' + img['src'] + + div = Tag(soup, 'div', [('class', 'mj_support')]) + div.append('''Your tax-deductible gifts help keep Mother Jones independent and uncompromised. + To make a contribution, visit MotherJones.com or call 877-GIV-MOJO. + ''') + soup.body.append(div) + + return soup