Update Esquire

2026-01-01 09:40:21 -05:00 · 2015-04-02 10:38:17 +05:30 · 2015-04-02 10:38:17 +05:30 · 6d34cf7dfe
commit 6d34cf7dfe
parent 57a8c0e8a5
1 changed files with 61 additions and 28 deletions
--- a/recipes/esquire.recipe
+++ b/recipes/esquire.recipe
@ -1,47 +1,80 @@
 __license__   = 'GPL v3'
-__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'

 '''
 www.esquire.com
 '''
+from collections import defaultdict

 from calibre.web.feeds.news import BasicNewsRecipe
+from css_selectors import Select

 class Esquire(BasicNewsRecipe):
    title                 = 'Esquire'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Kovid Goyal'
    description           = 'Esquire: Man at His Best'
    publisher             = 'Hearst Communications, Inc.'
-    category              = 'magazine, men, women we love, style, the guide, sex, screen'
-    oldest_article        = 30
-    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'cp1250'
-    use_embedded_content  = False
+    encoding              = 'utf-8'
    language              = 'en'
-    publication_type      = 'magazine'
-    masthead_url          = 'http://www.esquire.com/cm/shared/site_images/print_this/esquire_logo.gif'

-    conversion_options = {
-                          'comment'   : description
-                        , 'tags'      : category
-                        , 'publisher' : publisher
-                        , 'language'  : language
-                        }
+    keep_only_tags = [
+        dict(name='header', attrs={'class':['gallery-header', 'article-header']}),
+        dict(attrs={'class':['gallery-main-view', 'article-body--content']}),
+    ]

-    keep_only_tags    = [dict(name='div', attrs={'id':['article_header','article_content']})]
-    remove_tags       = [dict(name=['object','link','embed','iframe','base'])]
-    remove_attributes = ['width','height']
-
-    feeds = [
-               (u'Style'    , u'http://www.esquire.com/style/rss/'    )
-              ,(u'Women'    , u'http://www.esquire.com/women/rss/'    )
-              ,(u'Features' , u'http://www.esquire.com/features/rss/' )
-              ,(u'Fiction'  , u'http://www.esquire.com/fiction/rss/'  )
-              ,(u'Frontpage', u'http://www.esquire.com/rss/'          )
-            ]
+    remove_tags = [
+        dict(attrs={'class':'article-body--share-container'}),
+        dict(attrs={'class':lambda x: x and 'tags--top' in x}),
+        dict(attrs={'class':lambda x: x and 'image-share' in x}),
+        dict(attrs={'class':lambda x: x and 'share-gallery' in x}),
+        dict(attrs={'class':lambda x: x and 'embedded-image--expand' in x}),
+        dict(attrs={'class':lambda x: x and 'embedded-image--close' in x}),
+    ]

    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
+        for img in soup.findAll('img', attrs={'data-src':True}):
+            img['src'] = img['data-src']
        return soup
+
+    def parse_index(self):
+        url = 'http://www.esquire.com'
+        root = self.index_to_soup(url, as_tree=True)
+        select = Select(root)
+        feeds = defaultdict(list)
+
+        for a in select('.custom-promo--title a[href]'):
+            title = self.tag_to_string(a).strip()
+            url = a.get('href')
+            if url.startswith('/'):
+                url = 'http://www.esquire.com' + url
+            feeds['Cover Story'] = [{'title':title, 'url':url}]
+            break
+
+        for story in select('.landing-feed--story-container'):
+            for sec in select('.landing-feed--story-section-name', story):
+                section = self.tag_to_string(sec).strip()
+                break
+            else:
+                continue
+            articles = feeds[section]
+            for a in select('a.landing-feed--story-title[href]', story):
+                title = self.tag_to_string(a).strip()
+                url = a.get('href')
+                if url.startswith('/'):
+                    url = 'http://www.esquire.com' + url
+                break
+            else:
+                continue
+            for div in select('.landing-feed--story-abstract', story):
+                desc = self.tag_to_string(div).strip()
+                break
+            else:
+                desc = ''
+            articles.append({'title':title, 'url':url, 'description':desc})
+
+        ans = []
+        for sec in sorted(feeds, key=lambda x:(x != 'Cover Story', x)):
+            articles = feeds[sec]
+            if articles:
+                ans.append((sec, articles))
+        return ans