From 7d61da1ab65b3af29bd59c9f342161e24bc7b3ee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 17 Sep 2013 16:19:31 +0530 Subject: [PATCH] Update Liberation Fixes #1226391 [Private bug](https://bugs.launchpad.net/calibre/+bug/1226391) --- recipes/liberation.recipe | 60 +++++++++----------- recipes/liberation_sub.recipe | 103 ---------------------------------- 2 files changed, 25 insertions(+), 138 deletions(-) delete mode 100644 recipes/liberation_sub.recipe diff --git a/recipes/liberation.recipe b/recipes/liberation.recipe index 741e2e87d2..14cc23c1c2 100644 --- a/recipes/liberation.recipe +++ b/recipes/liberation.recipe @@ -21,42 +21,10 @@ class Liberation(BasicNewsRecipe): max_articles_per_feed = 15 no_stylesheets = True remove_empty_feeds = True - filterDuplicates = True + needs_subscription = 'optional' - extra_css = ''' - h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} - p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} - h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;} - ''' - - keep_only_tags = [ - dict(name='div', attrs={'class':'article'}) - ,dict(name='div', attrs={'class':'text-article m-bot-s1'}) - ,dict(name='div', attrs={'class':'entry'}) - ,dict(name='div', attrs={'class':'col_contenu'}) - ] - - remove_tags_after = [ - dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']}) - ,dict(name='p',attrs={'class':['chapo']}) - ,dict(id='_twitter_facebook') - ] - - remove_tags = [ - dict(name='iframe') - ,dict(name='a', attrs={'class':'lnk-comments'}) - ,dict(name='div', attrs={'class':'toolbox'}) - ,dict(name='ul', attrs={'class':'share-box'}) - ,dict(name='ul', attrs={'class':'tool-box'}) - ,dict(name='ul', attrs={'class':'rub'}) - ,dict(name='p',attrs={'class':['chapo']}) - ,dict(name='p',attrs={'class':['tag']}) - ,dict(name='div',attrs={'class':['blokLies']}) - ,dict(name='div',attrs={'class':['alire']}) - ,dict(id='_twitter_facebook') - ] + keep_only_tags = [dict(name='article')] + remove_tags = [dict(attrs={'class':['tool-bar']})] feeds = [ (u'La une', u'http://rss.liberation.fr/rss/9/') @@ -69,6 +37,16 @@ class Liberation(BasicNewsRecipe): ,(u'Sports', u'http://www.liberation.fr/rss/12/') ] + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + if self.username is not None and self.password is not None: + br.open('http://token.liberation.fr/accounts/login/') + br.select_form(nr=0) + br['email'] = self.username + br['password'] = self.password + br.submit() + return br + def get_masthead_url(self): masthead = 'http://s0.libe.com/libe/img/common/logo-liberation-150.png' br = BasicNewsRecipe.get_browser(self) @@ -78,3 +56,15 @@ class Liberation(BasicNewsRecipe): self.log("\nCover unavailable") masthead = None return masthead + + def get_article_url(self, article): + url = BasicNewsRecipe.get_article_url(self, article) + url = url.split('/')[-2] + encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&', + '0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S': + 'www.', '0I': '_'} + for k, v in encoding.iteritems(): + url = url.replace(k, v) + return url.partition('?')[0] + + diff --git a/recipes/liberation_sub.recipe b/recipes/liberation_sub.recipe deleted file mode 100644 index 60450341e4..0000000000 --- a/recipes/liberation_sub.recipe +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai - -__license__ = 'GPL v3' -__copyright__ = '2012, Rémi Vanicat ' -''' -liberation.fr -''' -# The cleanning is from the Liberation recipe, by Darko Miletic - -from calibre.web.feeds.news import BasicNewsRecipe - -class Liberation(BasicNewsRecipe): - - title = u'Libération: Édition abonnés' - __author__ = 'Rémi Vanicat' - description = u'Actualités' - category = 'Actualités, France, Monde' - language = 'fr' - needs_subscription = True - - use_embedded_content = False - no_stylesheets = True - remove_empty_feeds = True - - extra_css = ''' - h1, h2, h3 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;} - p.subtitle {font-size:xx-small; font-family:Arial,Helvetica,sans-serif;} - h4, h5, h2.rubrique, {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .ref, .date, .author, .legende {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;} - .mna-body, entry-body {font-size:medium; font-family:Arial,Helvetica,sans-serif;} - ''' - - keep_only_tags = [ - dict(name='div', attrs={'class':'article'}) - ,dict(name='div', attrs={'class':'text-article m-bot-s1'}) - ,dict(name='div', attrs={'class':'entry'}) - ,dict(name='div', attrs={'class':'col_contenu'}) - ] - - remove_tags_after = [ - dict(name='div',attrs={'class':['object-content text text-item', 'object-content', 'entry-content', 'col01', 'bloc_article_01']}) - ,dict(name='p',attrs={'class':['chapo']}) - ,dict(id='_twitter_facebook') - ] - - remove_tags = [ - dict(name='iframe') - ,dict(name='a', attrs={'class':'lnk-comments'}) - ,dict(name='div', attrs={'class':'toolbox'}) - ,dict(name='ul', attrs={'class':'share-box'}) - ,dict(name='ul', attrs={'class':'tool-box'}) - ,dict(name='ul', attrs={'class':'rub'}) - ,dict(name='p',attrs={'class':['chapo']}) - ,dict(name='p',attrs={'class':['tag']}) - ,dict(name='div',attrs={'class':['blokLies']}) - ,dict(name='div',attrs={'class':['alire']}) - ,dict(id='_twitter_facebook') - ] - - index = 'http://www.liberation.fr/abonnes/' - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - if self.username is not None and self.password is not None: - br.open('http://www.liberation.fr/jogger/login/') - br.select_form(nr=0) - br['email'] = self.username - br['password'] = self.password - br.submit() - return br - - def parse_index(self): - soup=self.index_to_soup(self.index) - - content = soup.find('div', { 'class':'block-content' }) - - articles = [] - cat_articles = [] - - for tag in content.findAll(recursive=False): - if(tag['class']=='headrest headrest-basic-rounded'): - cat_articles = [] - articles.append((tag.find('h5').contents[0],cat_articles)) - else: - title = tag.find('h3').contents[0] - url = tag.find('a')['href'] - print(url) - descripion = tag.find('p',{ 'class':'subtitle' }).contents[0] - article = { - 'title': title, - 'url': url, - 'descripion': descripion, - 'content': '' - } - cat_articles.append(article) - return articles - - - -# Local Variables: -# mode: python -# End: