mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Week
This commit is contained in:
parent
7944a8a022
commit
7f79f21f1d
@ -63,6 +63,11 @@ class FE_India(BasicNewsRecipe):
|
||||
('Money','https://www.financialexpress.com/money/feed'),
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/')
|
||||
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||
return citem['content']
|
||||
|
||||
def preprocess_html(self, soup, *a):
|
||||
for img in soup.findAll(attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
|
@ -49,6 +49,11 @@ class HindustanTimes(BasicNewsRecipe):
|
||||
# ('Budget',''https://www.hindustantimes.com/feeds/rss/budget/rssfeed.xml')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Delhi/Newspaper/')
|
||||
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||
return citem['content']
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||
img['src'] = img['data-src']
|
||||
|
@ -35,6 +35,13 @@ class IndiaToday(BasicNewsRecipe):
|
||||
('Sports','https://www.indiatoday.in/rss/1206518'),
|
||||
]
|
||||
|
||||
extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}'
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.magzter.com/IN/India-Today-Group/India-Today/News/')
|
||||
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||
return citem['content']
|
||||
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
soup = BeautifulSoup(raw_html)
|
||||
|
@ -2,7 +2,6 @@
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
@ -20,6 +19,7 @@ class TheWeek(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
ignore_duplicate_articles = {'url'}
|
||||
remove_attributes = ['style', 'align', 'border', 'hspace']
|
||||
|
||||
feeds = [
|
||||
('Cover Story', 'https://www.theweek.in/theweek/cover.rss'),
|
||||
@ -34,23 +34,24 @@ class TheWeek(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://www.theweek.in/theweek.html')
|
||||
for img in soup.findAll('img', attrs={'data-src-web': lambda x: x and '/cover-magazine' in x}):
|
||||
src = img['data-src-web']
|
||||
try:
|
||||
idx = src.rfind('.image.')
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
if idx > -1:
|
||||
src = src[:idx]
|
||||
return 'https://img.theweek.in' + src
|
||||
soup = self.index_to_soup(
|
||||
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||
)
|
||||
for citem in soup.findAll(
|
||||
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||
):
|
||||
return citem['content']
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
a = soup.find('a')
|
||||
a.name = 'div'
|
||||
if a:
|
||||
a.name = 'div'
|
||||
h2 = soup.find('h2')
|
||||
h2.string = fix_title(h2.string)
|
||||
if h2:
|
||||
h2.string = fix_title(h2.string)
|
||||
for p in soup.findAll('p'):
|
||||
if p.string == '\xa0':
|
||||
p.decompose()
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
|
Loading…
x
Reference in New Issue
Block a user