calibre/recipes/orlando_sentinel.recipe
2019-04-01 13:57:21 +05:30

61 lines
2.2 KiB
Plaintext

import re
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1279258912(BasicNewsRecipe):
title = u'Orlando Sentinel'
oldest_article = 3
max_articles_per_feed = 100
feeds = [
(u'News', u'http://feeds.feedburner.com/orlandosentinel/news'),
(u'Opinion', u'http://feeds.feedburner.com/orlandosentinel/news/opinion'),
(u'Business', u'http://feeds.feedburner.com/orlandosentinel/business'),
(u'Technology', u'http://feeds.feedburner.com/orlandosentinel/technology'),
(u'Space and Science', u'http://feeds.feedburner.com/orlandosentinel/news/space'),
(u'Entertainment', u'http://feeds.feedburner.com/orlandosentinel/entertainment'),
(u'Life and Family',
u'http://feeds.feedburner.com/orlandosentinel/features/lifestyle'),
]
__author__ = 'rty'
pubisher = 'OrlandoSentinel.com'
description = 'Orlando, Florida, Newspaper'
category = 'News, Orlando, Florida'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
conversion_options = {'linearize_tables': True}
remove_empty_feeds = True
auto_cleanup = True
def get_article_url(self, article):
ans = None
try:
s = article.summary
ans = unquote(
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
except:
pass
if ans is None:
link = article.get('feedburner_origlink', None)
if link and link.split('/')[-1] == "story01.htm":
link = link.split('/')[-2]
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http:',
'0S': '//', '0H': ','}
for k, v in encoding.items():
link = link.replace(k, v)
ans = link
elif link:
ans = link
if ans is not None:
return ans.replace('?track=rss', '')