mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Week
This commit is contained in:
parent
7944a8a022
commit
7f79f21f1d
@ -63,6 +63,11 @@ class FE_India(BasicNewsRecipe):
|
|||||||
('Money','https://www.financialexpress.com/money/feed'),
|
('Money','https://www.financialexpress.com/money/feed'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/')
|
||||||
|
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||||
|
return citem['content']
|
||||||
|
|
||||||
def preprocess_html(self, soup, *a):
|
def preprocess_html(self, soup, *a):
|
||||||
for img in soup.findAll(attrs={'data-src': True}):
|
for img in soup.findAll(attrs={'data-src': True}):
|
||||||
img['src'] = img['data-src']
|
img['src'] = img['data-src']
|
||||||
|
@ -49,6 +49,11 @@ class HindustanTimes(BasicNewsRecipe):
|
|||||||
# ('Budget',''https://www.hindustantimes.com/feeds/rss/budget/rssfeed.xml')
|
# ('Budget',''https://www.hindustantimes.com/feeds/rss/budget/rssfeed.xml')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Delhi/Newspaper/')
|
||||||
|
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||||
|
return citem['content']
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
img['src'] = img['data-src']
|
img['src'] = img['data-src']
|
||||||
|
@ -35,6 +35,13 @@ class IndiaToday(BasicNewsRecipe):
|
|||||||
('Sports','https://www.indiatoday.in/rss/1206518'),
|
('Sports','https://www.indiatoday.in/rss/1206518'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}'
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('https://www.magzter.com/IN/India-Today-Group/India-Today/News/')
|
||||||
|
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
|
||||||
|
return citem['content']
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw_html, url):
|
def preprocess_raw_html(self, raw_html, url):
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
soup = BeautifulSoup(raw_html)
|
soup = BeautifulSoup(raw_html)
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
# vim:fileencoding=utf-8
|
# vim:fileencoding=utf-8
|
||||||
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
@ -20,6 +19,7 @@ class TheWeek(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = True
|
use_embedded_content = True
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
|
remove_attributes = ['style', 'align', 'border', 'hspace']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Cover Story', 'https://www.theweek.in/theweek/cover.rss'),
|
('Cover Story', 'https://www.theweek.in/theweek/cover.rss'),
|
||||||
@ -34,23 +34,24 @@ class TheWeek(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('https://www.theweek.in/theweek.html')
|
soup = self.index_to_soup(
|
||||||
for img in soup.findAll('img', attrs={'data-src-web': lambda x: x and '/cover-magazine' in x}):
|
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
|
||||||
src = img['data-src-web']
|
)
|
||||||
try:
|
for citem in soup.findAll(
|
||||||
idx = src.rfind('.image.')
|
'meta', content=lambda s: s and s.endswith('view/3.jpg')
|
||||||
except Exception:
|
):
|
||||||
pass
|
return citem['content']
|
||||||
else:
|
|
||||||
if idx > -1:
|
|
||||||
src = src[:idx]
|
|
||||||
return 'https://img.theweek.in' + src
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
a = soup.find('a')
|
a = soup.find('a')
|
||||||
a.name = 'div'
|
if a:
|
||||||
|
a.name = 'div'
|
||||||
h2 = soup.find('h2')
|
h2 = soup.find('h2')
|
||||||
h2.string = fix_title(h2.string)
|
if h2:
|
||||||
|
h2.string = fix_title(h2.string)
|
||||||
|
for p in soup.findAll('p'):
|
||||||
|
if p.string == '\xa0':
|
||||||
|
p.decompose()
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def populate_article_metadata(self, article, soup, first):
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user