mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
70 lines
2.6 KiB
Plaintext
70 lines
2.6 KiB
Plaintext
import random
|
|
|
|
from calibre import browser
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|
|
|
title = u'The Sun UK'
|
|
description = 'Articles from The Sun tabloid UK'
|
|
__author__ = 'Dave Asbury'
|
|
# last updated 5/5/13 better cover fetch
|
|
language = 'en_GB'
|
|
oldest_article = 1
|
|
max_articles_per_feed = 15
|
|
remove_empty_feeds = True
|
|
|
|
masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
|
|
encoding = 'UTF-8'
|
|
remove_javascript = True
|
|
no_stylesheets = True
|
|
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
compress_news_images = True
|
|
|
|
keep_only_tags = [
|
|
dict(name='div', attrs={'class': 'intro'}),
|
|
dict(name='h3'),
|
|
dict(name='div', attrs={'id': 'articlebody'}),
|
|
|
|
]
|
|
remove_tags_after = [dict(id='bodyText')]
|
|
remove_tags = [
|
|
dict(name='li'),
|
|
dict(attrs={'class': 'grid-4 right-hand-column'}),
|
|
]
|
|
|
|
feeds = [
|
|
(u'News', u'http://www.thesun.co.uk/sol/homepage/news/rss'),
|
|
(u'Sport', u'http://www.thesun.co.uk/sol/homepage/sport/rss'),
|
|
(u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
|
|
(u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
|
|
]
|
|
# starsons code
|
|
|
|
def parse_feeds(self):
|
|
feeds = BasicNewsRecipe.parse_feeds(self)
|
|
for feed in feeds:
|
|
for article in feed.articles[:]:
|
|
if 'Try out The Sun' in article.title.upper() or 'Try-out-The-Suns' in article.url:
|
|
feed.articles.remove(article)
|
|
if 'Web porn harms kids' in article.title.upper() or 'Sun-says-Web-porn' in article.url:
|
|
feed.articles.remove(article)
|
|
return feeds
|
|
|
|
def get_cover_url(self):
|
|
br = browser()
|
|
br.set_handle_redirect(False)
|
|
cover_url = 'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg'
|
|
|
|
try:
|
|
br.open_novisit(
|
|
'http://www.thepaperboy.com/frontpages/current/The_Sun_newspaper_front_page.jpg')
|
|
except:
|
|
cover_url = random.choice([
|
|
'http://img.thesun.co.uk/multimedia/archive/00905/errorpage6_677961a_905507a.jpg', 'http://img.thesun.co.uk/multimedia/archive/00905/errorpage7_677962a_905505a.jpg', 'http://img.thesun.co.uk/multimedia/archive/00905/errorpage5_677960a_905512a.jpg', 'http://img.thesun.co.uk/multimedia/archive/00905/errorpage2_677957a_905502a.jpg', 'http://img.thesun.co.uk/multimedia/archive/00905/errorpage3_677958a_905503a.jpg' # noqa
|
|
])
|
|
|
|
return cover_url
|