Update National Geographic Magazine

2025-07-09 03:04:10 -04:00 · 2013-10-10 15:26:27 +05:30 · 2013-10-10 15:26:27 +05:30 · bfe6099a3d
commit bfe6099a3d
parent 7275d89cb5
1 changed files with 46 additions and 43 deletions
--- a/recipes/national_geographic_mag.recipe
+++ b/recipes/national_geographic_mag.recipe
@ -1,46 +1,49 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class NatGeoMag(BasicNewsRecipe):
+class NGM(BasicNewsRecipe):
-    title                  = 'National Geographic Mag'
+
-    __author__             = 'Terminal Veracity'
+    title       = 'National Geographic Magazine'
-    description            = 'The National Geographic Magazine'
+    __author__  = 'Krittika Goyal'
-    publisher              = 'National Geographic'
+    description = 'National Geographic Magazine'
-    oldest_article         = 31
+    timefmt = ' [%d %b, %Y]'
-    max_articles_per_feed  = 50
+
-    category               = 'geography, magazine'
+    no_stylesheets = True
-    language               = 'en'
+    auto_cleanup = True
-    publication_type       = 'magazine'
+    auto_cleanup_keep = '//div[@class="featurepic"]'
-    cover_url              = 'http://www.yourlogoresources.com/wp-content/uploads/2011/09/national-geographic-logo.jpg'
+
-    use_embedded_content   = False
+    def nejm_get_index(self):
-    no_stylesheets         = True
+        return self.index_to_soup('http://ngm.nationalgeographic.com/2013/10/table-of-contents')
-    remove_javascript      = True
+
-    recursions             = 1
+    # To parse artice toc
-    remove_empty_feeds     = True
+    def parse_index(self):
-    feeds                  = [('National Geographic Magazine', 'http://feeds.nationalgeographic.com/ng/NGM/NGM_Magazine')]
+            soup = self.nejm_get_index()
-    remove_tags            = [dict(name='div', attrs={'class':['nextpage_continue', 'subscribe']})]
+            tocfull = soup.find('div', attrs={'class':'coltoc'})
-    keep_only_tags         = [dict(attrs={'class':'main_3narrow'})]
+
-    extra_css              = """
+            toc = tocfull.find('div', attrs={'class':'more_section'})
-                                h1 {font-size: large; font-weight: bold; margin: .5em 0; }
+
-                                h2 {font-size: large; font-weight: bold; margin: .5em 0; }
+            articles = []
-                                h3 {font-size: medium; font-weight: bold; margin: 0 0; }
+            feeds = []
-                                .article_credits_author {font-size: small; font-style: italic; }
+            section_title = 'Features'
-                                .article_credits_photographer {font-size: small; font-style: italic; display: inline }
+            for x in toc.findAll(True):
-                             """
+                if x.name == 'a':
                    # Article found
                    title = self.tag_to_string(x)
                    url = x.get('href', False)
                    if not url or not title:
                        continue
                    url = 'http://ngm.nationalgeographic.com' + url
                    self.log('\t\tFound article:', title)
                    self.log('\t\t\t', url)
                    articles.append({'title': title, 'url':url,
                        'description':'', 'date':''})
            feeds.append((section_title, articles))
            art1 = tocfull.findAll('a')[1]
            art1_title = self.tag_to_string(art1.find('div', attrs={'class': 'toched'}))
            art1_url = art1.get('href', False)
            art1_url = 'http://ngm.nationalgeographic.com' + art1_url
            art1feed = {'title': art1_title, 'url':art1_url,
                        'description':'', 'date':''}
            feeds.append(('Cover Story', [art1feed]))
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        for feed in feeds:
            for article in feed.articles[:]:
                if 'Flashback' in article.title:
                    feed.articles.remove(article)
                elif 'Desktop Wallpaper' in article.title:
                    feed.articles.remove(article)
                elif 'Visions of Earth' in article.title:
                    feed.articles.remove(article)
                elif 'Your Shot' in article.title:
                    feed.articles.remove(article)
                elif 'MyShot' in article.title:
                    feed.articles.remove(article)
                elif 'Field Test' in article.title:
                    feed.articles.remove(article)
            return feeds