calibre/recipes/the_escapist.recipe

#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini and Tom Surace'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>, 2013 Tom Surace <tekhedd@byteheaven.net>'
description = 'The Escapist Magazine - v1.3 (2013, April 2013)'

#
# Based on 'the Escapist Magazine - v1.02 (09, January 2010)'

'''
http://www.escapistmagazine.com/
'''

from calibre.web.feeds.news import BasicNewsRecipe


class al(BasicNewsRecipe):
    author = 'Lorenzo Vigentini and Tom Surace'
    description = 'The Escapist Magazine'
    cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
    title = u'The Escapist Magazine'
    publisher = 'Themis Media'
    category = 'Video games news, lifestyle, gaming culture'

    language = 'en'
    timefmt = '[%a, %d %b, %Y]'

    oldest_article = 1
    max_articles_per_feed = 100
    use_embedded_content = False
    recursion = 10

    remove_javascript = True
    no_stylesheets = True

    feeds = [
        (u'Daily News', u'http://www.escapistmagazine.com/rss/news/0.xml'),
        (u'Articles', u'http://www.escapistmagazine.com/rss/articles/0.xml')
    ]

    def print_version(self, url):
        # Expect article url in the format:
        # http://www.escapistmagazine.com/news/view/123198-article-name?utm_source=rss&utm_medium=rss&utm_campaign=news
        #
        baseURL = 'http://www.escapistmagazine.com'
        segments = url.split('/')
        subPath = '/' + segments[3] + '/'

        # The article number is the "number" that starts the name
        articleNumber = segments[len(segments) - 1]   # the "article name"
        articleNumber = articleNumber.split('-')[0]  # keep part before hyphen

        fullUrl = baseURL + subPath + 'print/' + articleNumber
        return fullUrl

    keep_only_tags = [
        dict(name='div', attrs={'id': 'article'})
    ]

    remove_tags = [
        dict(name='div', attrs={
             'id': ['ad_leaderboard', 'print_notice', 'bottom_panel_container']})
    ]