diff --git a/resources/recipes/ynet.recipe b/resources/recipes/ynet.recipe new file mode 100644 index 0000000000..b77e1a36a6 --- /dev/null +++ b/resources/recipes/ynet.recipe @@ -0,0 +1,72 @@ +import re + +from calibre.web.feeds.news import BasicNewsRecipe +import mechanize + +class AdvancedUserRecipe1283848012(BasicNewsRecipe): + description = 'This is a recipe of Ynet.co.il. The recipe opens the article page and clicks on an advertisement to not hurt the sites advertising income.' + cover_url = 'http://www.bneiakiva.net/uploads/images/ynet%282%29.jpg' + title = u'Ynet' + __author__ = 'marbs' + language = 'he' + extra_css='img {max-width:100%;direction: rtl;} #article{direction: rtl;} div{direction: rtl;} title{direction: rtl; } article_description{direction: rtl; } a.article{direction: rtl; } calibre_feed_description{direction: rtl; } body{direction: ltr;}' + remove_attributes = ['width'] + simultaneous_downloads = 5 + keep_only_tags =dict(name='div', attrs={'id':'articleContainer'}) + remove_javascript = True + timefmt = '[%a, %d %b, %Y]' + oldest_article = 1 + remove_tags = [dict(name='p', attrs={'text':[' ']})] + max_articles_per_feed = 100 + preprocess_regexps = [ + (re.compile(r'
', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + + def preprocess_html(self, soup): + soup.html['dir'] = 'rtl' + soup.body['dir'] = 'rtl' + return soup + + feeds =[(u'\u05d7\u05d3\u05e9\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss2.xml'), + (u'\u05db\u05dc\u05db\u05dc\u05d4', + u'http://www.ynet.co.il/Integration/StoryRss6.xml'), + (u'\u05e6\u05e8\u05db\u05e0\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss437.xml'), + (u'\u05e1\u05e4\u05d5\u05e8\u05d8', + u'http://www.ynet.co.il/Integration/StoryRss3.xml'), + (u'\u05ea\u05e8\u05d1\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss538.xml'), + (u'\u05de\u05e2\u05d5\u05e8\u05d1\u05d5\u05ea \u05d5\u05d7\u05d1\u05e8\u05d4', + u'http://www.ynet.co.il/Integration/StoryRss3262.xml'), + (u'\u05d1\u05e8\u05d9\u05d0\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss1208.xml'), + (u'\u05d9\u05e8\u05d5\u05e7', + u'http://www.ynet.co.il/Integration/StoryRss4872.xml'), + (u'\u05de\u05d7\u05e9\u05d1\u05d9\u05dd', + u'http://www.ynet.co.il/Integration/StoryRss544.xml'), + (u'\u05e8\u05db\u05d1', u'http://www.ynet.co.il/Integration/StoryRss550.xml'), + (u'\u05ea\u05d9\u05d9\u05e8\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss598.xml'), + (u'\u05d4\u05d5\u05e8\u05d9\u05dd', + u'http://www.ynet.co.il/Integration/StoryRss3052.xml'), + (u'\u05d0\u05d5\u05db\u05dc', + u'http://www.ynet.co.il/Integration/StoryRss975.xml'), + (u'\u05d9\u05d4\u05d3\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss4403.xml'), + (u'\u05de\u05d3\u05e2 \u05d5\u05d8\u05d1\u05e2', + u'http://www.ynet.co.il/Integration/StoryRss2142.xml'), + (u'\u05d9\u05d7\u05e1\u05d9\u05dd', + u'http://www.ynet.co.il/Integration/StoryRss3925.xml'), + (u'\u05d3\u05e2\u05d5\u05ea', + u'http://www.ynet.co.il/Integration/StoryRss194.xml')] + + def print_version(self, url): +#remove from here + br = BasicNewsRecipe.get_browser() + br.open(url) + br.follow_link(mechanize.Link(base_url = '', url =url, text = '', tag = 'a', attrs = [{'id':'buzzerATop'}])) +#to here to stop supporting ynet... + split1 = url.split("-") + print_url = 'http://www.ynet.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1] + return print_url