Update National Post

2025-12-05 12:45:02 -05:00 · 2017-08-20 14:56:48 +05:30 · 2017-08-20 14:56:48 +05:30 · af6e96b7a5
commit af6e96b7a5
parent c90a726acc
1 changed files with 21 additions and 46 deletions
--- a/recipes/national_post.recipe
+++ b/recipes/national_post.recipe
@ -1,57 +1,32 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
 from calibre.web.feeds.recipes import BasicNewsRecipe
-class NYTimes(BasicNewsRecipe):
+def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 class NationalPost(BasicNewsRecipe):
    title = 'National Post'
-    __author__ = 'Krittika Goyal'
+    __author__ = 'Kovid Goyal'
    description = 'Canadian national newspaper'
    timefmt = ' [%d %b, %Y]'
    language = 'en_CA'
    needs_subscription = False
    no_stylesheets = True
-    auto_cleanup = True
+    oldest_article = 1.5
-    auto_cleanup_keep = '//*[@class="npStoryPhoto npTxtPlain"]'
+    use_embedded_content = False
-    # TO GET ARTICLE TOC
+    keep_only_tags = [
-    def nejm_get_index(self):
+        dict(itemprop='headline'),
-        return self.index_to_soup('http://www.nationalpost.com/todays-paper/index.html')
+        classes('featured-image'),
        dict(itemprop='articleBody'),
    ]
-    # To parse artice toc
+    feeds = ['http://nationalpost.com/rss']
    def parse_index(self):
        soup = self.nejm_get_index()
        div = soup.find(id='npContentMain')
        current_section = None
        current_articles = []
        feeds = []
        for x in div.findAll(True):
            if x.name == 'h4':
                # Section found
                if current_articles and current_section:
                    feeds.append((current_section, current_articles))
                current_section = self.tag_to_string(x)
                current_articles = []
                self.log('\tFound section:', current_section)
            if current_section is not None and x.name == 'h5':
                # Article found
                title = self.tag_to_string(x)
                a = x.find('a', href=True)
                if a is None:
                    continue
                url = a.get('href', False)
                if not url or not title:
                    continue
                # if url.startswith('story'):
                # url = 'http://www.nationalpost.com/todays-paper/'+url
                self.log('\t\tFound article:', title)
                self.log('\t\t\t', url)
                current_articles.append({'title': title, 'url': url,
                                         'description': '', 'date': ''})
        if current_articles and current_section:
            feeds.append((current_section, current_articles))
        return feeds