Fix #1119685 (new yorker magazine only has web links, no text)

This commit is contained in:
Kovid Goyal 2013-02-15 09:15:26 +05:30
parent 9089edfa68
commit 29a45f9670

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
''' '''
newyorker.com newyorker.com
''' '''
@ -44,20 +44,18 @@ class NewYorker(BasicNewsRecipe):
, 'language' : language , 'language' : language
} }
keep_only_tags = [ keep_only_tags = [dict(name='div', attrs={'id':'pagebody'})]
dict(name='div', attrs={'class':'headers'})
,dict(name='div', attrs={'id':['articleheads','items-container','articleRail','articletext','photocredits']})
]
remove_tags = [ remove_tags = [
dict(name=['meta','iframe','base','link','embed','object']) dict(name=['meta','iframe','base','link','embed','object'])
,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons'] }) ,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons','social-utils-top','entry-keywords','entry-categories','utilsPrintEmail'] })
,dict(attrs={'id':['show-header','show-footer'] }) ,dict(attrs={'id':['show-header','show-footer'] })
] ]
remove_tags_after = dict(attrs={'class':'entry-content'})
remove_attributes = ['lang'] remove_attributes = ['lang']
feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')] feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')]
def print_version(self, url): def print_version(self, url):
return url + '?printable=true' return url + '?printable=true&currentPage=all'
def image_url_processor(self, baseurl, url): def image_url_processor(self, baseurl, url):
return url.strip() return url.strip()