diff --git a/recipes/nzz_webpaper.recipe b/recipes/nzz_webpaper.recipe deleted file mode 100644 index 8847e26547..0000000000 --- a/recipes/nzz_webpaper.recipe +++ /dev/null @@ -1,112 +0,0 @@ -import re -from calibre import strftime -from calibre.ebooks.BeautifulSoup import Tag - -__license__ = 'GPL v3' -__copyright__ = '2012-15, Bernd Leinfelder ' - -''' -webpaper.nzz.ch -''' - -from calibre.ptempfile import PersistentTemporaryFile -from calibre.web.feeds.recipes import BasicNewsRecipe - - -def new_tag(soup, name, attrs=()): - impl = getattr(soup, 'new_tag', None) - if impl is not None: - return impl(name, attrs=dict(attrs)) - return Tag(soup, name, attrs=attrs or None) - - -class Nzz(BasicNewsRecipe): - title = 'NZZ Webpaper' - __author__ = 'Bernd Leinfelder' - description = 'Neue Zuercher Zeitung Webpaper - Erfordert NZZ Digital Abonnement. v20140425' - timefmt = ' [%a, %d %b, %Y]' - publisher = 'NZZ AG' - needs_subscription = True - category = 'news, politics, nachrichten, Switzerland' - oldest_article = 2 - max_articles_per_feed = 100 - no_stylesheets = True - encoding = 'utf-8' - use_embedded_content = False - language = 'de' - temp_files = [] - extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}' - - conversion_options = { - 'comments': description, 'tags': category, 'language': language, 'publisher': publisher - } - - remove_tags = [dict(name='footer'), dict( - {'class': ['sharebox', 'fullarticle__related']})] - - remove_tags_before = dict(name='article') - remove_tags_after = dict(name='footer') - - def parse_index(self): - baseref = 'https://webpaper.nzz.ch' - soup = self.index_to_soup(baseref) - - # print soup.prettify() - - articles = {} - sections = [] - ans = [] - issue = soup.find("link", rel="prefetch") - - soup = self.index_to_soup(baseref + issue['href']) - - for span in soup.findAll('span', attrs={'data-src-640': True}): - imgSrc = span['data-src-640'] - # print "image source: "+ imgSrc - imgTag = new_tag(soup, "img", [("src", imgSrc)]) - span.replaceWith(imgTag) - - # print soup.prettify() - - section = "" - lastsection = "" - pubdate = strftime('%a, %d %b') - - articlesoup = soup.findAll( - "article", {"class": re.compile(".*fullarticle[ \"].*")}) - for art in articlesoup: - # print art.prettify() - section = art['data-department'] - # print "section is "+section - - if section != lastsection: - sections.append(section) - articles[section] = [] - lastsection = section - - caption = art.find("h2") - - self.temp_files.append(PersistentTemporaryFile('_fa.html')) - self.temp_files[-1].write("" + - art.prettify() + "") - self.temp_files[-1].close() - filename = self.temp_files[-1].name - - articles[section].append( - dict(title=caption.string, url='file://' + filename, date=pubdate, description='', content='')) - - ans = [(key, articles[key]) for key in sections if key in articles] - - # pprint.pprint(ans) - - return ans - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - br.open('https://webpaper.nzz.ch/login') - br.select_form(nr=0) - br['username'] = self.username - br['password'] = self.password - br.submit() - return br