calibre/recipes/orlando_sentinel.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

58 lines
2.1 KiB
Plaintext

import urllib
import re
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1279258912(BasicNewsRecipe):
title = u'Orlando Sentinel'
oldest_article = 3
max_articles_per_feed = 100
feeds = [
(u'News', u'http://feeds.feedburner.com/orlandosentinel/news'),
(u'Opinion', u'http://feeds.feedburner.com/orlandosentinel/news/opinion'),
(u'Business', u'http://feeds.feedburner.com/orlandosentinel/business'),
(u'Technology', u'http://feeds.feedburner.com/orlandosentinel/technology'),
(u'Space and Science', u'http://feeds.feedburner.com/orlandosentinel/news/space'),
(u'Entertainment', u'http://feeds.feedburner.com/orlandosentinel/entertainment'),
(u'Life and Family',
u'http://feeds.feedburner.com/orlandosentinel/features/lifestyle'),
]
__author__ = 'rty'
pubisher = 'OrlandoSentinel.com'
description = 'Orlando, Florida, Newspaper'
category = 'News, Orlando, Florida'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables': True}
remove_empty_feeds = True
auto_cleanup = True
def get_article_url(self, article):
ans = None
try:
s = article.summary
ans = urllib.unquote(
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
except:
pass
if ans is None:
link = article.get('feedburner_origlink', None)
if link and link.split('/')[-1] == "story01.htm":
link = link.split('/')[-2]
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
'0S': '//', '0H': ','}
for k, v in encoding.iteritems():
link = link.replace(k, v)
ans = link
elif link:
ans = link
if ans is not None:
return ans.replace('?track=rss', '')