From 530876cc4732ddcad78f1be14ecf8f089ab2c0a2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 16 Apr 2015 08:37:12 +0530 Subject: [PATCH] Update NZZ Webpaper --- recipes/nzz_webpaper.recipe | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/recipes/nzz_webpaper.recipe b/recipes/nzz_webpaper.recipe index 9fbe1172f9..c398637713 100644 --- a/recipes/nzz_webpaper.recipe +++ b/recipes/nzz_webpaper.recipe @@ -1,8 +1,9 @@ import re from calibre import strftime +from calibre.ebooks.BeautifulSoup import Tag __license__ = 'GPL v3' -__copyright__ = '2012-14, Bernd Leinfelder ' +__copyright__ = '2012-15, Bernd Leinfelder ' ''' webpaper.nzz.ch @@ -14,7 +15,7 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class Nzz(BasicNewsRecipe): title = 'NZZ Webpaper' __author__ = 'Bernd Leinfelder' - description = 'Neue Zuercher Zeitung Webpaper - Erfordert NZZ Digital Abonnement' + description = 'Neue Zuercher Zeitung Webpaper - Erfordert NZZ Digital Abonnement. v20140425' timefmt = ' [%a, %d %b, %Y]' publisher = 'NZZ AG' needs_subscription = True @@ -30,9 +31,9 @@ class Nzz(BasicNewsRecipe): conversion_options = { 'comments' : description - ,'tags' : category - ,'language' : language - ,'publisher' : publisher + ,'tags' : category + ,'language' : language + ,'publisher' : publisher } remove_tags = [dict(name='footer') , dict({'class' : ['sharebox' , 'fullarticle__related']})] @@ -52,7 +53,15 @@ class Nzz(BasicNewsRecipe): issue = soup.find("link",rel="prefetch") soup = self.index_to_soup(baseref+issue['href']) + + for span in soup.findAll('span', attrs={'data-src-640':True}): + imgSrc=span['data-src-640'] + # print "image source: "+ imgSrc + imgTag = Tag(soup,"img",[("src",imgSrc)]) + span.replaceWith(imgTag) + # print soup.prettify() + section = "" lastsection = "" pubdate = strftime('%a, %d %b') @@ -78,7 +87,7 @@ class Nzz(BasicNewsRecipe): articles[section].append( dict(title=caption.string,url='file://'+filename, date=pubdate, description='', content='')) - ans = [(key, articles[key]) for key in sections if articles.has_key(key)] + ans = [(key, articles[key]) for key in sections if key in articles] # pprint.pprint(ans)