Update National Geographic Magazine

2025-07-09 03:04:10 -04:00 · 2013-10-10 15:26:27 +05:30 · 2013-10-10 15:26:27 +05:30 · bfe6099a3d
commit bfe6099a3d
parent 7275d89cb5
1 changed files with 46 additions and 43 deletions
--- a/recipes/national_geographic_mag.recipe
+++ b/recipes/national_geographic_mag.recipe
@ -1,46 +1,49 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe

-class NatGeoMag(BasicNewsRecipe):
-    title                  = 'National Geographic Mag'
-    __author__             = 'Terminal Veracity'
-    description            = 'The National Geographic Magazine'
-    publisher              = 'National Geographic'
-    oldest_article         = 31
-    max_articles_per_feed  = 50
-    category               = 'geography, magazine'
-    language               = 'en'
-    publication_type       = 'magazine'
-    cover_url              = 'http://www.yourlogoresources.com/wp-content/uploads/2011/09/national-geographic-logo.jpg'
-    use_embedded_content   = False
-    no_stylesheets         = True
-    remove_javascript      = True
-    recursions             = 1
-    remove_empty_feeds     = True
-    feeds                  = [('National Geographic Magazine', 'http://feeds.nationalgeographic.com/ng/NGM/NGM_Magazine')]
-    remove_tags            = [dict(name='div', attrs={'class':['nextpage_continue', 'subscribe']})]
-    keep_only_tags         = [dict(attrs={'class':'main_3narrow'})]
-    extra_css              = """
-                                h1 {font-size: large; font-weight: bold; margin: .5em 0; }
-                                h2 {font-size: large; font-weight: bold; margin: .5em 0; }
-                                h3 {font-size: medium; font-weight: bold; margin: 0 0; }
-                                .article_credits_author {font-size: small; font-style: italic; }
-                                .article_credits_photographer {font-size: small; font-style: italic; display: inline }
-                             """
+class NGM(BasicNewsRecipe):

-    def parse_feeds(self):
-        feeds = BasicNewsRecipe.parse_feeds(self)
-        for feed in feeds:
-            for article in feed.articles[:]:
-                if 'Flashback' in article.title:
-                    feed.articles.remove(article)
-                elif 'Desktop Wallpaper' in article.title:
-                    feed.articles.remove(article)
-                elif 'Visions of Earth' in article.title:
-                    feed.articles.remove(article)
-                elif 'Your Shot' in article.title:
-                    feed.articles.remove(article)
-                elif 'MyShot' in article.title:
-                    feed.articles.remove(article)
-                elif 'Field Test' in article.title:
-                    feed.articles.remove(article)
-        return feeds
+    title       = 'National Geographic Magazine'
+    __author__  = 'Krittika Goyal'
+    description = 'National Geographic Magazine'
+    timefmt = ' [%d %b, %Y]'
+
+    no_stylesheets = True
+    auto_cleanup = True
+    auto_cleanup_keep = '//div[@class="featurepic"]'
+
+    def nejm_get_index(self):
+        return self.index_to_soup('http://ngm.nationalgeographic.com/2013/10/table-of-contents')
+
+    # To parse artice toc
+    def parse_index(self):
+            soup = self.nejm_get_index()
+            tocfull = soup.find('div', attrs={'class':'coltoc'})
+
+            toc = tocfull.find('div', attrs={'class':'more_section'})
+
+            articles = []
+            feeds = []
+            section_title = 'Features'
+            for x in toc.findAll(True):
+                if x.name == 'a':
+                    # Article found
+                    title = self.tag_to_string(x)
+                    url = x.get('href', False)
+                    if not url or not title:
+                        continue
+                    url = 'http://ngm.nationalgeographic.com' + url
+                    self.log('\t\tFound article:', title)
+                    self.log('\t\t\t', url)
+                    articles.append({'title': title, 'url':url,
+                        'description':'', 'date':''})
+            feeds.append((section_title, articles))
+
+            art1 = tocfull.findAll('a')[1]
+            art1_title = self.tag_to_string(art1.find('div', attrs={'class': 'toched'}))
+            art1_url = art1.get('href', False)
+            art1_url = 'http://ngm.nationalgeographic.com' + art1_url
+            art1feed = {'title': art1_title, 'url':art1_url,
+                        'description':'', 'date':''}
+            feeds.append(('Cover Story', [art1feed]))
+
+            return feeds