diff --git a/resources/recipes/adevarul.recipe b/resources/recipes/adevarul.recipe index ea0f2826ce..eec3ca771a 100644 --- a/resources/recipes/adevarul.recipe +++ b/resources/recipes/adevarul.recipe @@ -32,16 +32,25 @@ class Adevarul(BasicNewsRecipe): } keep_only_tags = [ dict(name='div', attrs={'class':'article_header'}) - ,dict(name='div', attrs={'class':'bd'}) + ,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'}) ] - remove_tags = [ dict(name='div', attrs={'class':'bb-wg-article_related_attachements'}) + remove_tags = [ + dict(name='li', attrs={'class':'author'}) + ,dict(name='li', attrs={'class':'date'}) + ,dict(name='li', attrs={'class':'comments'}) + ,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'}) ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'}) - ,dict(name='form', attrs={'id':'bb-comment-create-form'}) - ] + ,dict(name='form', attrs={'id':'bb-comment-create-form'}) + ,dict(name='div', attrs={'id':'mediatag'}) + ,dict(name='div', attrs={'id':'ft'}) + ,dict(name='div', attrs={'id':'comment_wrapper'}) + ] - remove_tags_after = [ dict(name='form', attrs={'id':'bb-comment-create-form'}) ] + remove_tags_after = [ + dict(name='div', attrs={'id':'comment_wrapper'}), + ] feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ] diff --git a/resources/recipes/gsp.recipe b/resources/recipes/gsp.recipe index 90a8eecfe6..efc76ee71e 100644 --- a/resources/recipes/gsp.recipe +++ b/resources/recipes/gsp.recipe @@ -1,20 +1,43 @@ +# -*- coding: utf-8 -*- +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = u'2011, Silviu Cotoar\u0103' +''' +gsp.ro +''' + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1286351181(BasicNewsRecipe): - title = u'gsp.ro' - __author__ = 'bucsie' - oldest_article = 2 +class GSP(BasicNewsRecipe): + title = u'Gazeta Sporturilor' + language = 'ro' + __author__ = u'Silviu Cotoar\u0103' + description = u'Gazeta Sporturilor' + publisher = u'Gazeta Sporturilor' + category = 'Ziare,Sport,Stiri,Romania' + oldest_article = 5 max_articles_per_feed = 100 - language='ro' - cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg' + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + remove_javascript = True + cover_url = 'http://www.gsp.ro/images/logo.jpg' - remove_tags = [ - dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}), - dict(name='div', attrs={'id':'icons'}) - ] - remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'}) + conversion_options = { + 'comments' : description + ,'tags' : category + ,'language' : language + ,'publisher' : publisher + } - feeds = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')] + keep_only_tags = [ dict(name='h1', attrs={'class':'serif title_2'}) + ,dict(name='div', attrs={'id':'only_text'}) + ,dict(name='span', attrs={'class':'block poza_principala'}) + ] + + feeds = [ (u'\u0218tiri', u'http://www.gsp.ro/rss.xml') ] + + def preprocess_html(self, soup): + return self.adeify_images(soup) - def print_version(self, url): - return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):] diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe index b2043bb463..0a5c310af4 100644 --- a/resources/recipes/nytimes.recipe +++ b/resources/recipes/nytimes.recipe @@ -89,7 +89,7 @@ class NYTimes(BasicNewsRecipe): if headlinesOnly: title='New York Times Headlines' description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com' - needs_subscription = True + needs_subscription = 'optional' elif webEdition: title='New York Times (Web)' description = 'New York Times on the Web' diff --git a/resources/template-functions.json b/resources/template-functions.json index 332ce1ddea..5d9b6a11a3 100644 --- a/resources/template-functions.json +++ b/resources/template-functions.json @@ -15,6 +15,7 @@ "template": "def evaluate(self, formatter, kwargs, mi, locals, template):\n template = template.replace('[[', '{').replace(']]', '}')\n return formatter.__class__().safe_format(template, kwargs, 'TEMPLATE', mi)\n", "print": "def evaluate(self, formatter, kwargs, mi, locals, *args):\n print args\n return None\n", "titlecase": "def evaluate(self, formatter, kwargs, mi, locals, val):\n return titlecase(val)\n", + "sublist": "def evaluate(self, formatter, kwargs, mi, locals, val, start_index, end_index, sep):\n if not val:\n return ''\n si = int(start_index)\n ei = int(end_index)\n val = val.split(sep)\n try:\n if ei == 0:\n return sep.join(val[si:])\n else:\n return sep.join(val[si:ei])\n except:\n return ''\n", "test": "def evaluate(self, formatter, kwargs, mi, locals, val, value_if_set, value_not_set):\n if val:\n return value_if_set\n else:\n return value_not_set\n", "eval": "def evaluate(self, formatter, kwargs, mi, locals, template):\n from formatter import eval_formatter\n template = template.replace('[[', '{').replace(']]', '}')\n return eval_formatter.safe_format(template, locals, 'EVAL', None)\n", "multiply": "def evaluate(self, formatter, kwargs, mi, locals, x, y):\n x = float(x if x else 0)\n y = float(y if y else 0)\n return unicode(x * y)\n",