diff --git a/recipes/national_geographic_mag.recipe b/recipes/national_geographic_mag.recipe index f571d117be..8e7cc7709e 100644 --- a/recipes/national_geographic_mag.recipe +++ b/recipes/national_geographic_mag.recipe @@ -1,46 +1,49 @@ from calibre.web.feeds.recipes import BasicNewsRecipe -class NatGeoMag(BasicNewsRecipe): - title = 'National Geographic Mag' - __author__ = 'Terminal Veracity' - description = 'The National Geographic Magazine' - publisher = 'National Geographic' - oldest_article = 31 - max_articles_per_feed = 50 - category = 'geography, magazine' - language = 'en' - publication_type = 'magazine' - cover_url = 'http://www.yourlogoresources.com/wp-content/uploads/2011/09/national-geographic-logo.jpg' - use_embedded_content = False - no_stylesheets = True - remove_javascript = True - recursions = 1 - remove_empty_feeds = True - feeds = [('National Geographic Magazine', 'http://feeds.nationalgeographic.com/ng/NGM/NGM_Magazine')] - remove_tags = [dict(name='div', attrs={'class':['nextpage_continue', 'subscribe']})] - keep_only_tags = [dict(attrs={'class':'main_3narrow'})] - extra_css = """ - h1 {font-size: large; font-weight: bold; margin: .5em 0; } - h2 {font-size: large; font-weight: bold; margin: .5em 0; } - h3 {font-size: medium; font-weight: bold; margin: 0 0; } - .article_credits_author {font-size: small; font-style: italic; } - .article_credits_photographer {font-size: small; font-style: italic; display: inline } - """ +class NGM(BasicNewsRecipe): - def parse_feeds(self): - feeds = BasicNewsRecipe.parse_feeds(self) - for feed in feeds: - for article in feed.articles[:]: - if 'Flashback' in article.title: - feed.articles.remove(article) - elif 'Desktop Wallpaper' in article.title: - feed.articles.remove(article) - elif 'Visions of Earth' in article.title: - feed.articles.remove(article) - elif 'Your Shot' in article.title: - feed.articles.remove(article) - elif 'MyShot' in article.title: - feed.articles.remove(article) - elif 'Field Test' in article.title: - feed.articles.remove(article) - return feeds + title = 'National Geographic Magazine' + __author__ = 'Krittika Goyal' + description = 'National Geographic Magazine' + timefmt = ' [%d %b, %Y]' + + no_stylesheets = True + auto_cleanup = True + auto_cleanup_keep = '//div[@class="featurepic"]' + + def nejm_get_index(self): + return self.index_to_soup('http://ngm.nationalgeographic.com/2013/10/table-of-contents') + + # To parse artice toc + def parse_index(self): + soup = self.nejm_get_index() + tocfull = soup.find('div', attrs={'class':'coltoc'}) + + toc = tocfull.find('div', attrs={'class':'more_section'}) + + articles = [] + feeds = [] + section_title = 'Features' + for x in toc.findAll(True): + if x.name == 'a': + # Article found + title = self.tag_to_string(x) + url = x.get('href', False) + if not url or not title: + continue + url = 'http://ngm.nationalgeographic.com' + url + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + articles.append({'title': title, 'url':url, + 'description':'', 'date':''}) + feeds.append((section_title, articles)) + + art1 = tocfull.findAll('a')[1] + art1_title = self.tag_to_string(art1.find('div', attrs={'class': 'toched'})) + art1_url = art1.get('href', False) + art1_url = 'http://ngm.nationalgeographic.com' + art1_url + art1feed = {'title': art1_title, 'url':art1_url, + 'description':'', 'date':''} + feeds.append(('Cover Story', [art1feed])) + + return feeds