mirror of
https://github.com/kovidgoyal/calibre.git
synced 2026-01-05 11:40:20 -05:00
59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
def fix_title(title):
|
|
return title.replace('-', ' ').capitalize()
|
|
|
|
|
|
class TheWeek(BasicNewsRecipe):
|
|
title = u'The Week'
|
|
language = 'en_IN'
|
|
__author__ = 'Kovid Goyal'
|
|
encoding = 'utf-8'
|
|
oldest_article = 8 # days
|
|
max_articles_per_feed = 25
|
|
no_stylesheets = True
|
|
use_embedded_content = True
|
|
ignore_duplicate_articles = {'url'}
|
|
remove_attributes = ['style', 'align', 'border', 'hspace']
|
|
|
|
feeds = [
|
|
('Cover Story', 'https://www.theweek.in/theweek/cover.rss'),
|
|
('Sports', 'https://www.theweek.in/theweek/sports.rss'),
|
|
('Current', 'https://www.theweek.in/theweek/current.rss'),
|
|
('Statescan', 'https://www.theweek.in/theweek/statescan.rss'),
|
|
('Leisure', 'https://www.theweek.in/theweek/leisure.rss'),
|
|
('Business', 'https://www.theweek.in/theweek/business.rss'),
|
|
('Specials', 'https://www.theweek.in/theweek/specials.rss'),
|
|
('More', 'https://www.theweek.in/theweek/more.rss'),
|
|
('Society', 'https://www.theweek.in/leisure/society.rss'),
|
|
]
|
|
|
|
def get_cover_url(self):
|
|
soup = self.index_to_soup(
|
|
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
|
)
|
|
for citem in soup.findAll(
|
|
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
|
):
|
|
return citem['content']
|
|
|
|
def preprocess_html(self, soup):
|
|
a = soup.find('a')
|
|
if a:
|
|
a.name = 'div'
|
|
h2 = soup.find('h2')
|
|
if h2:
|
|
h2.string = fix_title(h2.string)
|
|
for p in soup.findAll('p'):
|
|
if p.string == '\xa0':
|
|
p.decompose()
|
|
return soup
|
|
|
|
def populate_article_metadata(self, article, soup, first):
|
|
article.title = fix_title(article.title)
|