Update mediapart.recipe

3 changes made :
* feed read from "https://www.mediapart.fr/journal/fil-dactualites" had change in structure, so "page-content bust" is replaced by "global-wrapper"
* website login page has changed, so record number of login form is adjusted (2 instead of 1)
* Change in code for building articles collection, which was previously not working (my change might not be very pythonic but it works)

Best regards,
Hervé
This commit is contained in:
Hervé M 2016-11-12 09:38:37 +01:00 committed by GitHub
parent 415fb9c317
commit d6e3a82b4f

View File

@ -54,7 +54,7 @@ class Mediapart(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.mediapart.fr/journal/fil-dactualites') 'https://www.mediapart.fr/journal/fil-dactualites')
page = soup.find('div', {'class': 'page-content bust'}) page = soup.find('div', {'class': 'global-wrapper'})
fils = page.find('ul', {'class': 'post-list universe-journal'}) fils = page.find('ul', {'class': 'post-list universe-journal'})
for article in fils.findAll('li'): for article in fils.findAll('li'):
@ -92,13 +92,14 @@ class Mediapart(BasicNewsRecipe):
summary = { summary = {
'title': self.tag_to_string(title).strip(), 'title': self.tag_to_string(title).strip(),
'author': ', '.join(authors), 'author': ', '.join(authors),
'url': url, 'url': 'https://www.mediapart.fr' + url
} }
{ if article_type == 'Lien':
"Brève": breves, liens.append(summary)
"Lien": liens, if article_type == 'Confidentiel':
"Confidentiel": confidentiels, confidentiels.append(summary)
}.get(article_type).append(summary) if article_type not in ['Lien', 'Confidentiel']:
breves.append(summary)
except: except:
pass pass
@ -149,7 +150,7 @@ class Mediapart(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self) br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:
br.open('https://www.mediapart.fr/login') br.open('https://www.mediapart.fr/login')
br.select_form(nr=1) br.select_form(nr=2)
br['name'] = self.username br['name'] = self.username
br['password'] = self.password br['password'] = self.password
br.submit() br.submit()