Update Sueddeutsche Zeitung mobil

This commit is contained in:
Kovid Goyal 2014-10-07 09:23:29 +05:30
parent d35f3ce2de
commit 59fc8e518a

View File

@ -4,13 +4,15 @@ __copyright__ = '2012, 2013 Andreas Zeiser <andreas.zeiser@web.de>'
szmobil.sueddeutsche.de/ szmobil.sueddeutsche.de/
''' '''
# History # History
# 2014.10.06 Fixing Login URL and Article URL by lala-rob (web@lala-rob.de)
#
# 2013.01.09 Fixed bugs in article titles containing "strong" and # 2013.01.09 Fixed bugs in article titles containing "strong" and
# other small changes # other small changes
# 2012.08.04 Initial release # 2012.08.04 Initial release
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
import re import re
class SZmobil(BasicNewsRecipe): class SZmobil(BasicNewsRecipe):
title = u'Süddeutsche Zeitung mobil' title = u'Süddeutsche Zeitung mobil'
@ -33,7 +35,7 @@ class SZmobil(BasicNewsRecipe):
# timefmt = '' # timefmt = ''
timefmt = ' [%a, %d %b, %Y]' timefmt = ' [%a, %d %b, %Y]'
root_url ='http://szmobil.sueddeutsche.de/' root_url ='http://epaper.sueddeutsche.de/app/service/epaper-mobil/'
keep_only_tags = [dict(name='div', attrs={'class':'article'})] keep_only_tags = [dict(name='div', attrs={'class':'article'})]
def get_cover_url(self): def get_cover_url(self):
@ -45,22 +47,24 @@ class SZmobil(BasicNewsRecipe):
browser = BasicNewsRecipe.get_browser(self) browser = BasicNewsRecipe.get_browser(self)
# Login via fetching of Streiflicht -> Fill out login request # Login via fetching of Streiflicht -> Fill out login request
url = self.root_url + 'show.php?id=streif' url = 'https://id.sueddeutsche.de/login'
url2 = 'https://id.sueddeutsche.de/service/ticket?redirect_uri=http%3A%2F%2Fepaper.sueddeutsche.de%2Fapp%2Fservice%2Fepaper-mobil%2Flanding.php%3Fid%3Dstreif%26etag%3D1&service_id=epapermobile'
browser.open(url) browser.open(url)
browser.select_form(nr=0) # to select the first form browser.select_form(nr=0) # to select the first form
browser['username'] = self.username browser['login'] = self.username
browser['password'] = self.password browser['password'] = self.password
browser.submit() browser.submit()
browser.open(url2)
return browser return browser
def parse_index(self): def parse_index(self):
# find all sections # find all sections
src = self.index_to_soup('http://szmobil.sueddeutsche.de') src = self.index_to_soup('http://epaper.sueddeutsche.de/app/service/epaper-mobil/')
feeds = [] feeds = []
for itt in src.findAll('a',href=True): for itt in src.findAll('a',href=True):
if itt['href'].startswith('show.php?section'): if itt['href'].startswith('section.php?section'):
feeds.append( (itt.string[0:-2],itt['href']) ) feeds.append( (itt.string[0:-2],itt['href']) )
all_articles = [] all_articles = []
@ -74,7 +78,7 @@ class SZmobil(BasicNewsRecipe):
articles = [] articles = []
shorttitles = dict() shorttitles = dict()
for itt in src.findAll('a', href=True): for itt in src.findAll('a', href=True):
if itt['href'].startswith('show.php?id='): if itt['href'].startswith('article.php?id='):
article_url = itt['href'] article_url = itt['href']
article_id = int(re.search("id=(\d*)&etag=", itt['href']).group(1)) article_id = int(re.search("id=(\d*)&etag=", itt['href']).group(1))