This commit is contained in:
Kovid Goyal 2025-06-27 11:14:19 +05:30
commit a5eef775dd
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
9 changed files with 9 additions and 28 deletions

View File

@ -48,10 +48,7 @@ class BusinessStandardPrint(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/' 'https://www.magzter.com/IN/Business-Standard-Private-Ltd/Business-Standard/Newspaper/'
) )
for citem in soup.findAll( return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
'meta', content=lambda s: s and s.endswith('view/3.jpg')
):
return citem['content']
def parse_index(self): def parse_index(self):
today = datetime.today().strftime('%d-%m-%Y') today = datetime.today().strftime('%d-%m-%Y')

View File

@ -24,10 +24,7 @@ class BusinessLine(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.magzter.com/IN/THG-publishing-pvt-ltd/The-Hindu-Business-Line/Newspaper/' 'https://www.magzter.com/IN/THG-publishing-pvt-ltd/The-Hindu-Business-Line/Newspaper/'
) )
for citem in soup.findAll( return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
'meta', content=lambda s: s and s.endswith('view/3.jpg')
):
return citem['content']
keep_only_tags = [ keep_only_tags = [
classes( classes(

View File

@ -44,5 +44,4 @@ class LiveHindustan(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Hindi-New-Delhi/Newspaper/') soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Hindi-New-Delhi/Newspaper/')
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
return citem['content']

View File

@ -51,8 +51,7 @@ class HindustanTimes(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Delhi/Newspaper/') soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Delhi/Newspaper/')
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
return citem['content']
def preprocess_html(self, soup): def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}): for img in soup.findAll('img', attrs={'data-src': True}):

View File

@ -22,10 +22,7 @@ class MuyInteresante(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.magzter.com/ES/Zinet-Media-Global/Muy-Interesante-Espa%C3%B1a/Science/1806044' 'https://www.magzter.com/ES/Zinet-Media-Global/Muy-Interesante-Espa%C3%B1a/Science/1806044'
) )
for citem in soup.findAll( return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
'meta', content=lambda s: s and s.endswith('view/3.jpg')
):
return citem['content']
extra_css = ''' extra_css = '''
.c-detail__bar, .c-detail__author, .c-detail__media__txt { font-size:small; } .c-detail__bar, .c-detail__author, .c-detail__media__txt { font-size:small; }

View File

@ -94,5 +94,4 @@ class pcWorld(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.magzter.com/US/IDG-Consumer-and-SMB,-Inc./PCWorld/Computer-&-Mobile/' 'https://www.magzter.com/US/IDG-Consumer-and-SMB,-Inc./PCWorld/Computer-&-Mobile/'
) )
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
return citem['content']

View File

@ -19,10 +19,7 @@ class RevistaMuyInteresante(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.magzter.com/ES/Zinet-Media-Global/Muy-Interesante-Espa%C3%B1a/Science/1806044' 'https://www.magzter.com/ES/Zinet-Media-Global/Muy-Interesante-Espa%C3%B1a/Science/1806044'
) )
for citem in soup.findAll( return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
'meta', content=lambda s: s and s.endswith('view/3.jpg')
):
return citem['content']
extra_css = ''' extra_css = '''
.c-detail__bar, .c-detail__author, .c-detail__media__txt { font-size:small; } .c-detail__bar, .c-detail__author, .c-detail__media__txt { font-size:small; }

View File

@ -44,10 +44,7 @@ class TheWeek(BasicNewsRecipe):
soup = self.index_to_soup( soup = self.index_to_soup(
'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/' 'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
) )
for citem in soup.findAll( return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
'meta', content=lambda s: s and s.endswith('view/3.jpg')
):
return citem['content']
def parse_index(self): def parse_index(self):
issue = 'https://www.theweek.in/theweek.html' issue = 'https://www.theweek.in/theweek.html'

View File

@ -69,8 +69,7 @@ class TheEconomicTimes(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
soup = self.index_to_soup('https://www.magzter.com/IN/Bennett-Coleman-and-Company-Limited/The-Times-of-India-Delhi/Newspaper/') soup = self.index_to_soup('https://www.magzter.com/IN/Bennett-Coleman-and-Company-Limited/The-Times-of-India-Delhi/Newspaper/')
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): return soup.find('img', id=lambda s: s and 'mgd__lhd__cover' in s.split())['src']
return citem['content']
def get_article_url(self, article): def get_article_url(self, article):
rurl = article.get('guid', None) rurl = article.get('guid', None)