mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
161 lines
6.7 KiB
Plaintext
161 lines
6.7 KiB
Plaintext
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
from calibre.ebooks.BeautifulSoup import Tag
|
|
|
|
class TheIndependentNew(BasicNewsRecipe):
|
|
|
|
title = u'The Independent'
|
|
__author__ = 'Krittika Goyal'
|
|
description = 'The latest in UK News and World News from The \
|
|
Independent. Wide range of international and local news, sports \
|
|
news, commentary and opinion pieces.Independent News - Breaking news \
|
|
that matters. Your daily comprehensive news source - The \
|
|
Independent Newspaper'
|
|
publisher = 'The Independent'
|
|
oldest_article = 2.0
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
remove_empty_feeds = True
|
|
category = 'news, UK'
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
remove_empty_feeds = True
|
|
language = 'en_GB'
|
|
publication_type = 'newspaper'
|
|
encoding = 'utf-8'
|
|
compress_news_images = True
|
|
|
|
keep_only_tags = [
|
|
dict(itemprop=['articleBody', 'headline', 'contentUrl']),
|
|
dict(attrs={'class':['intro', 'author']}),
|
|
]
|
|
remove_tags = [
|
|
dict(attrs={'class':lambda x: x and 'show-all' in x.split()}),
|
|
dict(attrs={'data-scald-gallery':True}),
|
|
]
|
|
|
|
remove_attributes = ['style']
|
|
|
|
def preprocess_html(self, soup):
|
|
for div in soup.findAll(attrs={'class':'full-gallery'}):
|
|
imgs = {}
|
|
for li in div.findAll('li', attrs={'data-gallery-item':True, 'data-original':True}):
|
|
imgs[li['data-gallery-item']] = li['data-original']
|
|
li.extract()
|
|
for li in div.findAll('li', attrs={'data-gallery-legend':True}):
|
|
src = imgs.get(li['data-gallery-legend'])
|
|
if src is not None:
|
|
img = Tag(soup, 'img')
|
|
img['src'] = src
|
|
img['style'] = 'display:block'
|
|
li.append(img)
|
|
return soup
|
|
|
|
feeds = [
|
|
(u'News - UK',
|
|
u'http://www.independent.co.uk/news/uk/?service=rss'),
|
|
(u'News - World',
|
|
u'http://www.independent.co.uk/news/world/?service=rss'),
|
|
(u'News - Business',
|
|
u'http://www.independent.co.uk/news/business/?service=rss'),
|
|
(u'News - People',
|
|
u'http://www.independent.co.uk/news/people/?service=rss'),
|
|
(u'News - Science',
|
|
u'http://www.independent.co.uk/news/science/?service=rss'),
|
|
(u'News - Media',
|
|
u'http://www.independent.co.uk/news/media/?service=rss'),
|
|
(u'News - Education',
|
|
u'http://www.independent.co.uk/news/education/?service=rss'),
|
|
(u'News - Obituaries',
|
|
u'http://www.independent.co.uk/news/obituaries/?service=rss'),
|
|
(u'News - Corrections',
|
|
u'http://www.independent.co.uk/news/corrections/?service=rss'
|
|
),
|
|
(u'Opinion',
|
|
u'http://www.independent.co.uk/opinion/?service=rss'),
|
|
(u'Voices',
|
|
u'http://www.independent.co.uk/voices/?service=rss'
|
|
),
|
|
(u'Environment',
|
|
u'http://www.independent.co.uk/environment/?service=rss'),
|
|
(u'Sport - Athletics',
|
|
u'http://www.independent.co.uk/sport/general/athletics/?service=rss'
|
|
),
|
|
(u'Sport - Cricket',
|
|
u'http://www.independent.co.uk/sport/cricket/?service=rss'),
|
|
(u'Sport - Football',
|
|
u'http://www.independent.co.uk/sport/football/?service=rss'),
|
|
(u'Sport - Golf',
|
|
u'http://www.independent.co.uk/sport/golf/?service=rss'),
|
|
(u'Sport - Motor racing',
|
|
u'http://www.independent.co.uk/sport/motor-racing/?service=rss'
|
|
),
|
|
(u'Sport - Olympics',
|
|
u'http://www.independent.co.uk/sport/olympics/?service=rss'),
|
|
(u'Sport - Racing',
|
|
u'http://www.independent.co.uk/sport/racing/?service=rss'),
|
|
(u'Sport - Rugby League',
|
|
u'http://www.independent.co.uk/sport/general/rugby-league/?service=rss'),
|
|
(u'Sport - Rugby Union',
|
|
u'http://www.independent.co.uk/sport/rugby/rugby-union/?service=rss'
|
|
),
|
|
(u'Sport - Sailing',
|
|
u'http://www.independent.co.uk/sport/general/sailing/?service=rss'
|
|
),
|
|
(u'Sport - Tennis',
|
|
u'http://www.independent.co.uk/sport/tennis/?service=rss'),
|
|
(u'Sport - Others',
|
|
u'http://www.independent.co.uk/sport/general/others/?service=rss'
|
|
),
|
|
(u'Life & Style - Fashion',
|
|
u'http://www.independent.co.uk/life-style/fashion/?service=rss'
|
|
),
|
|
(u'Life & Style -Food & Drink',
|
|
u'http://www.independent.co.uk/life-style/food-and-drink/?service=rss'
|
|
),
|
|
(u'Life & Style - Health and Families',
|
|
u'http://www.independent.co.uk/life-style/health-and-families/?service=rss'
|
|
),
|
|
(u'Life & Style - House & Home',
|
|
u'http://www.independent.co.uk/life-style/house-and-home/'),
|
|
(u'Life & Style - History',
|
|
u'http://www.independent.co.uk/life-style/history/?service=rss'
|
|
),
|
|
(u'Life & Style - Gadgets & Tech',
|
|
u'http://www.independent.co.uk/life-style/gadgets-and-tech/?service=rss'
|
|
),
|
|
(u'Life & Style - Motoring',
|
|
u'http://www.independent.co.uk/life-style/motoring/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Art',
|
|
u'http://www.independent.co.uk/arts-entertainment/art/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Architecture',
|
|
u'http://www.independent.co.uk/arts-entertainment/architecture/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Music',
|
|
u'http://www.independent.co.uk/arts-entertainment/music/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Classical',
|
|
u'http://www.independent.co.uk/arts-entertainment/classical/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Films',
|
|
u'http://www.independent.co.uk/arts-entertainment/films/?service=rss'
|
|
),
|
|
(u'Arts & Ents - TV',
|
|
u'http://www.independent.co.uk/arts-entertainment/tv/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Theatre and Dance',
|
|
u'http://www.independent.co.uk/arts-entertainment/theatre-dance/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Comedy',
|
|
u'http://www.independent.co.uk/arts-entertainment/comedy/?service=rss'
|
|
),
|
|
(u'Arts & Ents - Books',
|
|
u'http://www.independent.co.uk/arts-entertainment/books/?service=rss'
|
|
),
|
|
(u'Travel', u'http://www.independent.co.uk/travel/?service=rss'
|
|
),
|
|
(u'Money', u'http://www.independent.co.uk/money/?service=rss'),
|
|
(u'IndyBest',
|
|
u'http://www.independent.co.uk/extras/indybest/?service=rss'),
|
|
]
|