From b8240c99b9df56a5000201d5d880c8ff3cc8046d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Oct 2010 13:00:46 -0600 Subject: [PATCH] Clic_RBS by arvoredo --- resources/recipes/clic_rbs.recipe | 50 +++++++++++++++++++++++++++ src/calibre/ebooks/metadata/amazon.py | 2 +- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 resources/recipes/clic_rbs.recipe diff --git a/resources/recipes/clic_rbs.recipe b/resources/recipes/clic_rbs.recipe new file mode 100644 index 0000000000..559dfa2000 --- /dev/null +++ b/resources/recipes/clic_rbs.recipe @@ -0,0 +1,50 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class ClicRBS(BasicNewsRecipe): + title = u'ClicRBS' + language = 'pt' + __author__ = 'arvoredo' + oldest_article = 3 + max_articles_per_feed = 9 + cover_url = 'http://www.publicidade.clicrbs.com.br/clicrbs/imgs/logo_clic.gif' + + remove_tags = [ + dict(name='div', attrs={'class':['clic-barra-inner', 'botao-versao-mobile ']}) + ] + + remove_tags_before = dict(name='div ', attrs={'class':'descricao'}) + remove_tags_before = dict(name='div', attrs={'id':'glb-corpo'}) + remove_tags_before = dict(name='div', attrs={'class':'descricao'}) + remove_tags_before = dict(name='div', attrs={'class':'coluna'}) + remove_tags_after = dict(name='div', attrs={'class':'extra'}) + remove_tags_after = dict(name='div', attrs={'id':'links-patrocinados'}) + remove_tags_after = dict(name='h4', attrs={'class':'tipo-c comente'}) + remove_tags_after = dict(name='ul', attrs={'class':'lista'}) + + feeds = [ + (u'zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=1&local=1&channel=13') + , (u'diariocatarinense.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?uf=2&local=18&channel=67') + , (u'Concursos e Emprego', u'http://g1.globo.com/Rss2/0,,AS0-9654,00.xml') + , (u'Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?channel=87&uf=1&local=1') + , (u'Economia, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=801&uf=1&local=1&channel=13') + , (u'Esportes, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=802&uf=1&local=1&channel=13') + , (u'Economia, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1180&channel=87&uf=1&local=1') + , (u'Política, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1185&channel=87&uf=1&local=1') + , (u'Mundo, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1184&channel=87&uf=1&local=1') + , (u'Catarinense, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=&theme=371&uf=2&channel=2') + , (u'Geral, Pioneiro.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1183&channel=87&uf=1&local=1') + , (u'Estilo de Vida, zerohora.com, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=805&uf=1&local=1&channel=13') + , (u'Corrida, Corrida, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1313&theme=15704&uf=1&channel=2') + , (u'Jornal de Santa Catarina, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?espid=159&uf=2&local=18') + , (u'Grêmio, Futebol, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=11&theme=65&uf=1&channel=2') + , (u'Velocidade, Esportes, clicRBS', u'http://www.clicrbs.com.br/jsp/rssfeed.jspx?sect_id=1314&theme=2655&uf=1&channel=2') + ] + + extra_css = ''' + cite{color:#007BB5; font-size:xx-small; font-style:italic;} + body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} + h3{font-size:large; color:#082963; font-weight:bold;} + #ident{color:#0179B4; font-size:xx-small;} + p{color:#000000;font-weight:normal;} + .commentario p{color:#007BB5; font-style:italic;} + ''' diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py index 5d7d0358f0..9c89016e8b 100644 --- a/src/calibre/ebooks/metadata/amazon.py +++ b/src/calibre/ebooks/metadata/amazon.py @@ -114,7 +114,7 @@ def main(args=sys.argv): #print # Test sophisticated comment formatting - print get_social_metadata('Swan Thieves', None, None, '9781416580829') + print get_social_metadata('Angels & Demons', None, None, '9781416580829') print return