mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Ynet by marbs
This commit is contained in:
parent
b0d4dd936f
commit
7026383ebf
72
resources/recipes/ynet.recipe
Normal file
72
resources/recipes/ynet.recipe
Normal file
@ -0,0 +1,72 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import mechanize
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
description = 'This is a recipe of Ynet.co.il. The recipe opens the article page and clicks on an advertisement to not hurt the sites advertising income.'
|
||||
cover_url = 'http://www.bneiakiva.net/uploads/images/ynet%282%29.jpg'
|
||||
title = u'Ynet'
|
||||
__author__ = 'marbs'
|
||||
language = 'he'
|
||||
extra_css='img {max-width:100%;direction: rtl;} #article{direction: rtl;} div{direction: rtl;} title{direction: rtl; } article_description{direction: rtl; } a.article{direction: rtl; } calibre_feed_description{direction: rtl; } body{direction: ltr;}'
|
||||
remove_attributes = ['width']
|
||||
simultaneous_downloads = 5
|
||||
keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
|
||||
remove_javascript = True
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1
|
||||
remove_tags = [dict(name='p', attrs={'text':[' ']})]
|
||||
max_articles_per_feed = 100
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['dir'] = 'rtl'
|
||||
soup.body['dir'] = 'rtl'
|
||||
return soup
|
||||
|
||||
feeds =[(u'\u05d7\u05d3\u05e9\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss2.xml'),
|
||||
(u'\u05db\u05dc\u05db\u05dc\u05d4',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss6.xml'),
|
||||
(u'\u05e6\u05e8\u05db\u05e0\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss437.xml'),
|
||||
(u'\u05e1\u05e4\u05d5\u05e8\u05d8',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3.xml'),
|
||||
(u'\u05ea\u05e8\u05d1\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss538.xml'),
|
||||
(u'\u05de\u05e2\u05d5\u05e8\u05d1\u05d5\u05ea \u05d5\u05d7\u05d1\u05e8\u05d4',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3262.xml'),
|
||||
(u'\u05d1\u05e8\u05d9\u05d0\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss1208.xml'),
|
||||
(u'\u05d9\u05e8\u05d5\u05e7',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss4872.xml'),
|
||||
(u'\u05de\u05d7\u05e9\u05d1\u05d9\u05dd',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss544.xml'),
|
||||
(u'\u05e8\u05db\u05d1', u'http://www.ynet.co.il/Integration/StoryRss550.xml'),
|
||||
(u'\u05ea\u05d9\u05d9\u05e8\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss598.xml'),
|
||||
(u'\u05d4\u05d5\u05e8\u05d9\u05dd',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3052.xml'),
|
||||
(u'\u05d0\u05d5\u05db\u05dc',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss975.xml'),
|
||||
(u'\u05d9\u05d4\u05d3\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss4403.xml'),
|
||||
(u'\u05de\u05d3\u05e2 \u05d5\u05d8\u05d1\u05e2',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss2142.xml'),
|
||||
(u'\u05d9\u05d7\u05e1\u05d9\u05dd',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss3925.xml'),
|
||||
(u'\u05d3\u05e2\u05d5\u05ea',
|
||||
u'http://www.ynet.co.il/Integration/StoryRss194.xml')]
|
||||
|
||||
def print_version(self, url):
|
||||
#remove from here
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.open(url)
|
||||
br.follow_link(mechanize.Link(base_url = '', url =url, text = '', tag = 'a', attrs = [{'id':'buzzerATop'}]))
|
||||
#to here to stop supporting ynet...
|
||||
split1 = url.split("-")
|
||||
print_url = 'http://www.ynet.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
|
||||
return print_url
|
Loading…
x
Reference in New Issue
Block a user