diff --git a/recipes/b365realitatea.recipe b/recipes/b365realitatea.recipe new file mode 100644 index 0000000000..80a1ee225b --- /dev/null +++ b/recipes/b365realitatea.recipe @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +b365.realitatea.net +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class b365Realitatea(BasicNewsRecipe): + title = u'b365 Realitatea' + __author__ = u'Silviu Cotoar\u0103' + publisher = u'b365 Realitatea' + description = u'b365 Realitatea' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Romania,Bucuresti' + encoding = 'utf-8' + cover_url = 'http://b365.realitatea.net/wp-content/themes/b/images/b365-logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'class':'newsArticle'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':'date'}) + , dict(name='dic', attrs={'class':'addthis_toolbox addthis_default_style'}) + , dict(name='div', attrs={'class':'related_posts'}) + , dict(name='div', attrs={'id':'RelevantiWidget'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'id':'RelevantiWidget'}) + ] + feeds = [ + (u'\u0218tiri', u'http://b365.realitatea.net/rss-full/') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) + diff --git a/recipes/capital_gr.recipe b/recipes/capital_gr.recipe new file mode 100644 index 0000000000..8dac48c95b --- /dev/null +++ b/recipes/capital_gr.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.recipes import BasicNewsRecipe + +class Capital(BasicNewsRecipe): + title = 'Capital.gr' + __author__ ='Stelios' + description = 'Financial News from Greece' + #max_articles_per_feed = 100 + oldest_article = 3 + publisher = 'Capital.gr' + category = 'news, GR' + language = 'el' + encoding = 'windows-1253' + cover_url = 'http://files.capital.gr/images/caplogo.gif' + no_stylesheets = True + use_embedded_content = False + remove_empty_feeds = True + keep_only_tags = [ + dict(name='h1'), + dict(name='p'), + dict(name='span', attrs={'id' : ["textbody"]}) + ] + +#3 posts seemed to have utf8 encoding + feeds = [ + (u'\u039F\u039B\u0395\u03A3 \u039F\u0399 \u0395\u0399\u0394\u0397\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-1'), + (u'\u0395\u03A0\u0399\u03A7\u0395\u0399\u03A1\u0397\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-2'), + (u'\u0391\u0393\u039F\u03A1\u0395\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-3'), + (u'\u039F\u0399\u039A\u039F\u039D\u039F\u039C\u0399\u0391', 'http://www.capital.gr/news/newsrss.asp?s=-4'), + (u'\u03A7\u03A1\u0397\u039C. \u0391\u039D\u0391\u039A\u039F\u0399\u039D\u03A9\u03A3\u0395\u0399\u03A3', 'http://www.capital.gr/news/newsrss.asp?s=-6'), + (u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u039C\u0395 \u0391\u03A0\u039F\u03A8\u0397', 'http://www.capital.gr/articles/articlesrss.asp?catid=4'), + (u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A3\u0399\u03A9\u03A0\u0397\u03A4\u0397\u03A1\u0399\u039F', 'http://www.capital.gr/articles/articlesrss.asp?catid=6'), + (u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A0\u0399\u03A3\u03A9 \u0391\u03A0\u039F \u03A4\u0399\u03A3 \u0393\u03A1\u0391\u039C\u039C\u0395\u03A3', 'http://www.capital.gr/articles/articlesrss.asp?catid=8'), + #(u'\u039C\u03CC\u03BD\u03B9\u03BC\u03B5\u03C2 \u03C3\u03C4\u03AE\u03BB\u03B5\u03C2: \u03A4\u0395\u03A7\u039D\u039F\u039B\u039F\u0393\u0399\u0391', 'http://www.capital.gr/news/newsrss.asp?s=-8') not working for now +] + diff --git a/recipes/catavencii.recipe b/recipes/catavencii.recipe new file mode 100644 index 0000000000..7dff212d74 --- /dev/null +++ b/recipes/catavencii.recipe @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +catavencii.ro +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class Catavencii(BasicNewsRecipe): + title = u'Ca\u0163avencii' + __author__ = u'Silviu Cotoar\u0103' + publisher = u'Ca\u0163avencii' + description = u'Ca\u0163avencii' + oldest_article = 5 + language = 'ro' + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + category = 'Ziare,Romania' + encoding = 'utf-8' + cover_url = 'http://www.simonatache.ro/wp-content/uploads/2011/06/catavencii-logo.png' + + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } + + keep_only_tags = [ + dict(name='div', attrs={'id':'content'}) + ] + + remove_tags = [ + dict(name='div', attrs={'id':'breadcrumbs'}) + , dict(name='span', attrs={'class':'info'}) + , dict(name='div', attrs={'id':'social-media-article'}) + ] + + remove_tags_after = [ + dict(name='div', attrs={'id':'social-media-article'}) + ] + feeds = [ + (u'\u0218tiri', u'http://www.catavencii.ro/rss') + ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe index 5d4dbe3f4b..f0d28c72e7 100644 --- a/recipes/daily_mirror.recipe +++ b/recipes/daily_mirror.recipe @@ -1,10 +1,11 @@ from calibre.web.feeds.news import BasicNewsRecipe - +import re class AdvancedUserRecipe1306061239(BasicNewsRecipe): title = u'The Daily Mirror' description = 'News as provide by The Daily Mirror -UK' __author__ = 'Dave Asbury' + # last updated 30/10/11 language = 'en_GB' cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg' @@ -12,26 +13,30 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif' - oldest_article = 1 - max_articles_per_feed = 100 + oldest_article = 2 + max_articles_per_feed = 30 remove_empty_feeds = True remove_javascript = True no_stylesheets = True + extra_css = ''' + body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} + ''' keep_only_tags = [ - dict(name='h1'), - dict(attrs={'class':['article-attr']}), - dict(name='div', attrs={'class' : [ 'article-body', 'crosshead']}) + dict(name='div',attrs={'id' : 'body-content'}) + ] - - ] + remove_tags_after = [dict (name='div',attrs={'class' : 'related'})] remove_tags = [ - dict(name='div', attrs={'class' : ['caption', 'article-resize']}), - dict( attrs={'class':'append-html'}) - ] - + dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}), + dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}), + dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}), + dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'}) + ] + preprocess_regexps = [ + (re.compile(r'