Update Fortune Magazine

2025-07-09 03:04:10 -04:00 · 2018-04-01 09:16:24 +05:30 · 2018-04-01 09:16:24 +05:30 · 2d9a45db03
commit 2d9a45db03
parent 146b3373eb
1 changed files with 33 additions and 53 deletions
--- a/recipes/fortune_magazine.recipe
+++ b/recipes/fortune_magazine.recipe
@ -1,5 +1,10 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-from collections import OrderedDict
+
 def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 class Fortune(BasicNewsRecipe):
@ -11,15 +16,19 @@ class Fortune(BasicNewsRecipe):
    language = 'en'
    category = 'news'
    encoding = 'UTF-8'
-    keep_only_tags = [dict(attrs={'id': ['storycontent']})]
+    keep_only_tags = [
-    remove_tags = [
+            dict(name='h1', attrs={'class': lambda x: x and 'headline' in x}),
-        dict(attrs={'class': ['hed_side', 'socialMediaToolbarContainer']})]
+            classes('lead-media author'),
            dict(id='article-body'),
    ]
    no_javascript = True
    no_stylesheets = True
-    needs_subscription = True
+    needs_subscription = 'optional'
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username and self.password:
            br.open('http://fortune.com')
            br.select_form(id='sign-in-form')
            br['username'] = self.username
@ -32,48 +41,19 @@ class Fortune(BasicNewsRecipe):
        # Go to the latestissue
        soup = self.index_to_soup('http://fortune.com/section/magazine/')
        # Find cover & date
        cover_item = soup.find('div', attrs={'id': 'cover-story'})
        cover = cover_item.find('img', src=True)
        self.cover_url = cover['src']
        date = self.tag_to_string(cover_item.find(
            'div', attrs={'class': 'tocDate'})).strip()
        self.timefmt = u' [%s]' % date
        feeds = OrderedDict()
        section_title = ''
        # checkout the cover story
        articles = []
        coverstory = soup.find('div', attrs={'class': 'cnnHeadline'})
        title = self.tag_to_string(coverstory.a).strip()
        url = coverstory.a['href']
        desc = self.tag_to_string(coverstory.findNext(
            'p', attrs={'class': 'cnnBlurbTxt'}))
        articles.append({'title': title, 'url': url,
                         'description': desc, 'date': ''})
        feeds['Cover Story'] = []
        feeds['Cover Story'] += articles
        for post in soup.findAll('div', attrs={'class': 'cnnheader'}):
            section_title = self.tag_to_string(post).strip()
        articles = []
-            ul = post.findNext('ul')
+        for i, article in enumerate(soup.findAll('article', attrs={'class': lambda x: x and 'type-article' in x.split()})):
-            for link in ul.findAll('li'):
+            div = article.find('div', attrs={'class': lambda x: x and 'article-info' in x.split()})
-                links = link.find('h2')
+            a = div.find('a', href=True)
-                title = self.tag_to_string(links.a).strip()
+            url = a['href']
-                url = links.a['href']
+            if url.startswith('/'):
-                desc = self.tag_to_string(
+                url = 'http://fortune.com' + url
-                    link.find('p', attrs={'class': 'cnnBlurbTxt'}))
+            title = self.tag_to_string(a)
-                articles.append({'title': title, 'url': url,
+            ai = div.find('div', attrs={'class': lambda x: x and 'article-info-extended' in x.split()})
-                                 'description': desc, 'date': ''})
+            desc = ''
-
+            if ai:
-            if articles:
+                desc = self.tag_to_string(desc)
-                if section_title not in feeds:
+            self.log('Article:', title, 'at', url)
-                    feeds[section_title] = []
+            articles.append({'title': title, 'url': url, 'description': desc})
-                feeds[section_title] += articles
+        return [('Articles', articles)]
        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans