Update Esquire

2025-12-09 22:55:02 -05:00 · 2015-04-02 10:38:17 +05:30 · 2015-04-02 10:38:17 +05:30 · 6d34cf7dfe
commit 6d34cf7dfe
parent 57a8c0e8a5
1 changed files with 61 additions and 28 deletions
--- a/recipes/esquire.recipe
+++ b/recipes/esquire.recipe
@ -1,47 +1,80 @@
 __license__   = 'GPL v3'
 __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.esquire.com
 '''
 from collections import defaultdict
 from calibre.web.feeds.news import BasicNewsRecipe
 from css_selectors import Select
 class Esquire(BasicNewsRecipe):
    title                 = 'Esquire'
-    __author__            = 'Darko Miletic'
+    __author__            = 'Kovid Goyal'
    description           = 'Esquire: Man at His Best'
    publisher             = 'Hearst Communications, Inc.'
    category              = 'magazine, men, women we love, style, the guide, sex, screen'
    oldest_article        = 30
    max_articles_per_feed = 100
    no_stylesheets        = True
-    encoding              = 'cp1250'
+    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'en'
    publication_type      = 'magazine'
    masthead_url          = 'http://www.esquire.com/cm/shared/site_images/print_this/esquire_logo.gif'
-    conversion_options = {
+    keep_only_tags = [
-                          'comment'   : description
+        dict(name='header', attrs={'class':['gallery-header', 'article-header']}),
-                        , 'tags'      : category
+        dict(attrs={'class':['gallery-main-view', 'article-body--content']}),
-                        , 'publisher' : publisher
+    ]
                        , 'language'  : language
                        }
-    keep_only_tags    = [dict(name='div', attrs={'id':['article_header','article_content']})]
+    remove_tags = [
-    remove_tags       = [dict(name=['object','link','embed','iframe','base'])]
+        dict(attrs={'class':'article-body--share-container'}),
-    remove_attributes = ['width','height']
+        dict(attrs={'class':lambda x: x and 'tags--top' in x}),
-
+        dict(attrs={'class':lambda x: x and 'image-share' in x}),
-    feeds = [
+        dict(attrs={'class':lambda x: x and 'share-gallery' in x}),
-               (u'Style'    , u'http://www.esquire.com/style/rss/'    )
+        dict(attrs={'class':lambda x: x and 'embedded-image--expand' in x}),
-              ,(u'Women'    , u'http://www.esquire.com/women/rss/'    )
+        dict(attrs={'class':lambda x: x and 'embedded-image--close' in x}),
              ,(u'Features' , u'http://www.esquire.com/features/rss/' )
              ,(u'Fiction'  , u'http://www.esquire.com/fiction/rss/'  )
              ,(u'Frontpage', u'http://www.esquire.com/rss/'          )
    ]
    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
+        for img in soup.findAll('img', attrs={'data-src':True}):
-            del item['style']
+            img['src'] = img['data-src']
        return soup
    def parse_index(self):
        url = 'http://www.esquire.com'
        root = self.index_to_soup(url, as_tree=True)
        select = Select(root)
        feeds = defaultdict(list)
        for a in select('.custom-promo--title a[href]'):
            title = self.tag_to_string(a).strip()
            url = a.get('href')
            if url.startswith('/'):
                url = 'http://www.esquire.com' + url
            feeds['Cover Story'] = [{'title':title, 'url':url}]
            break
        for story in select('.landing-feed--story-container'):
            for sec in select('.landing-feed--story-section-name', story):
                section = self.tag_to_string(sec).strip()
                break
            else:
                continue
            articles = feeds[section]
            for a in select('a.landing-feed--story-title[href]', story):
                title = self.tag_to_string(a).strip()
                url = a.get('href')
                if url.startswith('/'):
                    url = 'http://www.esquire.com' + url
                break
            else:
                continue
            for div in select('.landing-feed--story-abstract', story):
                desc = self.tag_to_string(div).strip()
                break
            else:
                desc = ''
            articles.append({'title':title, 'url':url, 'description':desc})
        ans = []
        for sec in sorted(feeds, key=lambda x:(x != 'Cover Story', x)):
            articles = feeds[sec]
            if articles:
                ans.append((sec, articles))
        return ans