calibre/recipes/haaretz_en.recipe

__license__ = 'GPL v3'
__copyright__ = '2010-2015, Darko Miletic <darko.miletic at gmail.com>'
'''
www.haaretz.com
'''

from calibre.web.feeds.news import BasicNewsRecipe
try:
    from urllib.parse import urlencode
except ImportError:
    from urllib import urlencode


class Haaretz_en(BasicNewsRecipe):
    title = 'Haaretz'
    __author__ = 'Darko Miletic'
    description = "Haaretz.com is the world's leading English-language Website for real-time news and analysis of Israel and the Middle East."
    publisher = 'Haaretz'
    oldest_article = 2
    max_articles_per_feed = 200
    no_stylesheets = True
    encoding = 'utf8'
    use_embedded_content = False
    language = 'en_IL'
    needs_subscription = True
    remove_empty_feeds = True
    publication_type = 'newspaper'
    PREFIX = 'http://www.haaretz.com'
    masthead_url = PREFIX + '/images/logos/HaaretzLogo.gif'
    extra_css             = """
                                body{font-family: Verdana,Arial,Helvetica,sans-serif }
                                h1, .articleBody {font-family: Georgia, serif}
                                .authorBar {font-size: small}
                            """

    conversion_options = {
        'comment': description, 'publisher': publisher, 'language': language
    }

    keep_only_tags = [dict(name='div', attrs={'id': 'content'})]
    remove_attributes = ['width', 'height']
    remove_tags = [
        dict(name=['iframe', 'link', 'object', 'embed']), dict(name='div', attrs={
            'class': ['rightcol', 'fblike']}), dict(name='div', attrs={'id': 'article_sso_form'})
    ]

    feeds = [

    (u'Headlines', 'http://www.haaretz.com/cmlink/1.263335'),
    (u'Opinion', 'http://www.haaretz.com/cmlink/1.628752'),
    (u'Defence and diplomacy', 'http://www.haaretz.com/cmlink/1.628763'),
    (u'National', 'http://www.haaretz.com/cmlink/1.628764'),
    (u'International', 'http://www.haaretz.com/cmlink/1.628765'),
    (u'Jewish World', 'http://www.haaretz.com/cmlink/1.628766'),
    (u'Business', 'http://www.haaretz.com/cmlink/1.628767'),
    (u'Real Estate', 'http://www.haaretz.com/cmlink/1.628768'),
    (u'Features', 'http://www.haaretz.com/cmlink/1.628769'),
    (u'Arts & Leisure', 'http://www.haaretz.com/cmlink/1.628771'),
    (u'Books', 'http://www.haaretz.com/cmlink/1.628772'),
    (u'Food & Wine', 'http://www.haaretz.com/cmlink/1.628773'),
    (u'Sports', 'http://www.haaretz.com/cmlink/1.628774')
    ]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.PREFIX)
        if self.username is not None and self.password is not None:
            data = urlencode({'cb': 'parseEngReply', 'newsso': 'true', 'fromlogin': 'true', 'layer': 'eng_login', 'userName': self.username, 'password': self.password  # noqa
                                     })
            br.open('https://sso.haaretz.com/sso/sso/signIn', data)
        return br

    def print_version(self, url):
        article = url.rpartition('/')[2]
        return 'http://www.haaretz.com/misc/article-print-page/' + article