From 75a0ac806e1d7e797f348e0c2b527784218f6d89 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 12 Aug 2010 11:25:54 -0600 Subject: [PATCH] Fix #6480 (Esquire receipe returning blank articles) --- resources/recipes/esquire.recipe | 39 ++++++++++---------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/resources/recipes/esquire.recipe b/resources/recipes/esquire.recipe index 96338d0b48..d9c189b473 100644 --- a/resources/recipes/esquire.recipe +++ b/resources/recipes/esquire.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' www.esquire.com @@ -9,7 +7,6 @@ www.esquire.com from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Esquire(BasicNewsRecipe): title = 'Esquire' @@ -22,23 +19,21 @@ class Esquire(BasicNewsRecipe): no_stylesheets = True encoding = 'cp1250' use_embedded_content = False - language = 'en' - - lang = 'en-US' - cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg' + language = 'en' + publication_type = 'magazine' + masthead_url = 'http://www.esquire.com/cm/shared/site_images/print_this/esquire_logo.gif' conversion_options = { - 'comment' : description - , 'tags' : category - , 'publisher' : publisher - , 'language' : lang - , 'pretty_print' : True + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language } - keep_only_tags = [dict(name='div', attrs={'id':'content'})] - - remove_tags = [dict(name=['object','link','embed','iframe'])] - + keep_only_tags = [dict(name='div', attrs={'id':['article_header','article_content']})] + remove_tags = [dict(name=['object','link','embed','iframe','base'])] + remove_attributes = ['width','height'] + feeds = [ (u'Style' , u'http://www.esquire.com/style/rss/' ) ,(u'Women' , u'http://www.esquire.com/women/rss/' ) @@ -47,17 +42,7 @@ class Esquire(BasicNewsRecipe): ,(u'Frontpage', u'http://www.esquire.com/rss/' ) ] - - def print_version(self, url): - rest = url.rpartition('?')[0] - article = rest.rpartition('/')[2] - return 'http://www.esquire.com/print-this/' + article - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - soup.head.insert(0,mlang) for item in soup.findAll(style=True): del item['style'] return soup