calibre/recipes/orlando_sentinel.recipe

import urllib
import re
from calibre.web.feeds.news import BasicNewsRecipe


class AdvancedUserRecipe1279258912(BasicNewsRecipe):
    title = u'Orlando Sentinel'
    oldest_article = 3
    max_articles_per_feed = 100

    feeds = [
        (u'News', u'http://feeds.feedburner.com/orlandosentinel/news'),
        (u'Opinion', u'http://feeds.feedburner.com/orlandosentinel/news/opinion'),
        (u'Business', u'http://feeds.feedburner.com/orlandosentinel/business'),
        (u'Technology', u'http://feeds.feedburner.com/orlandosentinel/technology'),
        (u'Space and Science', u'http://feeds.feedburner.com/orlandosentinel/news/space'),
        (u'Entertainment', u'http://feeds.feedburner.com/orlandosentinel/entertainment'),
        (u'Life and Family',
         u'http://feeds.feedburner.com/orlandosentinel/features/lifestyle'),
    ]
    __author__ = 'rty'
    pubisher = 'OrlandoSentinel.com'
    description = 'Orlando, Florida, Newspaper'
    category = 'News, Orlando, Florida'

    remove_javascript = True
    use_embedded_content = False
    no_stylesheets = True
    language = 'en'
    encoding = 'utf-8'
    conversion_options = {'linearize_tables': True}
    remove_empty_feeds = True

    auto_cleanup = True

    def get_article_url(self, article):
        ans = None
        try:
            s = article.summary
            ans = urllib.unquote(
                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
        except:
            pass
        if ans is None:
            link = article.get('feedburner_origlink', None)
            if link and link.split('/')[-1] == "story01.htm":
                link = link.split('/')[-2]
                encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
                            '0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
                            '0S': '//', '0H': ','}
                for k, v in encoding.iteritems():
                    link = link.replace(k, v)
                ans = link
            elif link:
                ans = link
        if ans is not None:
            return ans.replace('?track=rss', '')