Fix #6480 (Esquire receipe returning blank articles)

This commit is contained in:
Kovid Goyal 2010-08-12 11:25:54 -06:00
parent 6eade64d80
commit 75a0ac806e

View File

@ -1,7 +1,5 @@
#!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
''' '''
www.esquire.com www.esquire.com
@ -9,7 +7,6 @@ www.esquire.com
from calibre import strftime from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Esquire(BasicNewsRecipe): class Esquire(BasicNewsRecipe):
title = 'Esquire' title = 'Esquire'
@ -23,21 +20,19 @@ class Esquire(BasicNewsRecipe):
encoding = 'cp1250' encoding = 'cp1250'
use_embedded_content = False use_embedded_content = False
language = 'en' language = 'en'
publication_type = 'magazine'
lang = 'en-US' masthead_url = 'http://www.esquire.com/cm/shared/site_images/print_this/esquire_logo.gif'
cover_url = strftime('http://www.esquire.com/cm/esquire/cover-images/%Y_') + strftime('%m').strip('0') + '.jpg'
conversion_options = { conversion_options = {
'comment' : description 'comment' : description
, 'tags' : category , 'tags' : category
, 'publisher' : publisher , 'publisher' : publisher
, 'language' : lang , 'language' : language
, 'pretty_print' : True
} }
keep_only_tags = [dict(name='div', attrs={'id':'content'})] keep_only_tags = [dict(name='div', attrs={'id':['article_header','article_content']})]
remove_tags = [dict(name=['object','link','embed','iframe','base'])]
remove_tags = [dict(name=['object','link','embed','iframe'])] remove_attributes = ['width','height']
feeds = [ feeds = [
(u'Style' , u'http://www.esquire.com/style/rss/' ) (u'Style' , u'http://www.esquire.com/style/rss/' )
@ -47,17 +42,7 @@ class Esquire(BasicNewsRecipe):
,(u'Frontpage', u'http://www.esquire.com/rss/' ) ,(u'Frontpage', u'http://www.esquire.com/rss/' )
] ]
def print_version(self, url):
rest = url.rpartition('?')[0]
article = rest.rpartition('/')[2]
return 'http://www.esquire.com/print-this/' + article
def preprocess_html(self, soup): def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True): for item in soup.findAll(style=True):
del item['style'] del item['style']
return soup return soup