From 2129993f9bc99b86f5ddefef56574dbb1711f905 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 11 Feb 2010 14:11:22 -0700 Subject: [PATCH] Suedeutsche Zeitung by Darko Miletic --- resources/images/news/sueddeutschezeitung.png | Bin 0 -> 492 bytes resources/recipes/sueddeutschezeitung.recipe | 107 ++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 resources/images/news/sueddeutschezeitung.png create mode 100644 resources/recipes/sueddeutschezeitung.recipe diff --git a/resources/images/news/sueddeutschezeitung.png b/resources/images/news/sueddeutschezeitung.png new file mode 100644 index 0000000000000000000000000000000000000000..f6ed36cd91fc341975fde0bf0a8ca48725630344 GIT binary patch literal 492 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!60wlNoGJgf6n3BBRT^Rni_n+Ah2>S z4={E+nQaFWEGuwK2hw1@3^B*n9%f)*l=O6Q4AD5BJJDX_NTE#I{P(5mPN#~FXBaF_ z*{M=C*>i5A@2j#hH?i)U1;CVcoZe^sWO|iO+7*wb57zv?cIAY!``@tVM^8VC z)Tr>7S9eU}*)&cu%hYKqv-cFAH*!>b)RbK{Gl@kbNa>(@Dbq`~53dzV3Iu=aJ9VG_ zuw3t2+!km1lk6|myX}^(0EU=qiEBhjN@7W>RdP`(kYX@0Ff`CLFw!+N3^6peGBCC> z26D}<3=A|syaWX=LPKtTN@iLmT!VpSh@qL4iJ=uxGgyNRH}f%|1_n=8KbLh*2~7ap CK(}}R literal 0 HcmV?d00001 diff --git a/resources/recipes/sueddeutschezeitung.recipe b/resources/recipes/sueddeutschezeitung.recipe new file mode 100644 index 0000000000..260c5a012a --- /dev/null +++ b/resources/recipes/sueddeutschezeitung.recipe @@ -0,0 +1,107 @@ + +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +www.sueddeutsche.de/sz/ +''' + +import urllib +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class SueddeutcheZeitung(BasicNewsRecipe): + title = 'Sueddeutche Zeitung' + __author__ = 'Darko Miletic' + description = 'News from Germany. Access to paid content.' + publisher = 'Sueddeutche Zeitung' + category = 'news, politics, Germany' + no_stylesheets = True + oldest_article = 2 + encoding = 'cp1252' + needs_subscription = True + remove_empty_feeds = True + PREFIX = 'http://www.sueddeutsche.de' + INDEX = PREFIX + strftime('/sz/%Y-%m-%d/') + LOGIN = PREFIX + '/app/lbox/index.html' + use_embedded_content = False + masthead_url = 'http://pix.sueddeutsche.de/img/g_.gif' + language = 'de_DE' + extra_css = ' body{font-family: Arial,Helvetica,sans-serif} ' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + + remove_attributes = ['height','width'] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open(self.INDEX) + if self.username is not None and self.password is not None: + data = urllib.urlencode({ 'login_name':self.username + ,'login_passwort':self.password + ,'lboxaction':'doLogin' + ,'passtxt':'Passwort' + ,'referer':self.INDEX + ,'x':'22' + ,'y':'7' + }) + br.open(self.LOGIN,data) + return br + + remove_tags =[ + dict(attrs={'class':'hidePrint'}) + ,dict(name=['link','object','embed','base','iframe']) + ] + remove_tags_before = dict(name='h2') + remove_tags_after = dict(attrs={'class':'author'}) + + feeds = [ + (u'Politik' , INDEX + 'politik/' ) + ,(u'Seite drei' , INDEX + 'seitedrei/' ) + ,(u'Meinungsseite', INDEX + 'meinungsseite/') + ,(u'Wissen' , INDEX + 'wissen/' ) + ,(u'Panorama' , INDEX + 'panorama/' ) + ,(u'Feuilleton' , INDEX + 'feuilleton/' ) + ,(u'Medien' , INDEX + 'medien/' ) + ,(u'Wirtschaft' , INDEX + 'wirtschaft/' ) + ,(u'Sport' , INDEX + 'sport/' ) + ,(u'Bayern' , INDEX + 'bayern/' ) + ,(u'Muenchen' , INDEX + 'muenchen/' ) + ,(u'jetzt.de' , INDEX + 'jetzt.de/' ) + ] + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + tbl = soup.find(attrs={'class':'szprintd'}) + for item in tbl.findAll(name='td',attrs={'class':'topthema'}): + atag = item.find(attrs={'class':'Titel'}).a + ptag = item.find('p') + stag = ptag.find('script') + if stag: + stag.extract() + url = self.PREFIX + atag['href'] + title = self.tag_to_string(atag) + description = self.tag_to_string(ptag) + articles.append({ + 'title' :title + ,'date' :strftime(self.timefmt) + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds + + def print_version(self, url): + return url + 'print.html' +