__license__ = 'GPL v3' __copyright__ = '2010-2015, Darko Miletic ' ''' www.haaretz.com ''' from calibre.web.feeds.news import BasicNewsRecipe try: from urllib.parse import urlencode except ImportError: from urllib import urlencode class Haaretz_en(BasicNewsRecipe): title = 'Haaretz' __author__ = 'Darko Miletic' description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East." publisher = 'Haaretz' oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf8' use_embedded_content = False language = 'en_IL' needs_subscription = True remove_empty_feeds = True publication_type = 'newspaper' PREFIX = 'http://www.haaretz.com' masthead_url = PREFIX + '/images/logos/HaaretzLogo.gif' extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif } h1, .articleBody {font-family: Georgia, serif} .authorBar {font-size: small} """ conversion_options = { 'comment': description, 'publisher': publisher, 'language': language } keep_only_tags = [dict(name='div', attrs={'id': 'content'})] remove_attributes = ['width', 'height'] remove_tags = [ dict(name=['iframe', 'link', 'object', 'embed']), dict(name='div', attrs={ 'class': ['rightcol', 'fblike']}), dict(name='div', attrs={'id': 'article_sso_form'}) ] feeds = [ (u'Headlines', 'http://www.haaretz.com/cmlink/1.263335'), (u'Opinion', 'http://www.haaretz.com/cmlink/1.628752'), (u'Defence and diplomacy', 'http://www.haaretz.com/cmlink/1.628763'), (u'National', 'http://www.haaretz.com/cmlink/1.628764'), (u'International', 'http://www.haaretz.com/cmlink/1.628765'), (u'Jewish World', 'http://www.haaretz.com/cmlink/1.628766'), (u'Business', 'http://www.haaretz.com/cmlink/1.628767'), (u'Real Estate', 'http://www.haaretz.com/cmlink/1.628768'), (u'Features', 'http://www.haaretz.com/cmlink/1.628769'), (u'Arts & Leisure', 'http://www.haaretz.com/cmlink/1.628771'), (u'Books', 'http://www.haaretz.com/cmlink/1.628772'), (u'Food & Wine', 'http://www.haaretz.com/cmlink/1.628773'), (u'Sports', 'http://www.haaretz.com/cmlink/1.628774') ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) br.open(self.PREFIX) if self.username is not None and self.password is not None: data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password # noqa }) br.open('https://sso.haaretz.com/sso/sso/signIn', data) return br def print_version(self, url): article = url.rpartition('/')[2] return 'http://www.haaretz.com/misc/article-print-page/' + article