This commit is contained in:
Kovid Goyal 2022-05-02 14:01:22 +05:30
parent 6eb871d823
commit 82027ad79c
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -11,8 +11,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
def classes(classes): def classes(classes):
q = frozenset(classes.split(' ')) q = frozenset(classes.split(' '))
return dict(attrs={ return dict(
'class': lambda x: x and frozenset(x.split()).intersection(q)}) attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
)
class CaravanMagazineHindi(BasicNewsRecipe): class CaravanMagazineHindi(BasicNewsRecipe):
@ -41,7 +42,11 @@ class CaravanMagazineHindi(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self, *args, **kw) br = BasicNewsRecipe.get_browser(self, *args, **kw)
if not self.username or not self.password: if not self.username or not self.password:
return br return br
data = json.dumps({'email': self.username, 'name': '', 'password': self.password}) data = json.dumps({
'email': self.username,
'name': '',
'password': self.password
})
if not isinstance(data, bytes): if not isinstance(data, bytes):
data = data.encode('utf-8') data = data.encode('utf-8')
rq = Request( rq = Request(
@ -70,16 +75,25 @@ class CaravanMagazineHindi(BasicNewsRecipe):
# find current issue cover # find current issue cover
feeds = [] feeds = []
sections = soup.find(attrs={'class': lambda x: x and 'current-magazine-issue' in x.split()}).find( sections = soup.find(
attrs={'class': lambda x: x and 'sections' in x.split()}) attrs={
for section in sections.findAll(attrs={'class': lambda x: x and 'section' in x.split()}): 'class': lambda x: x and 'current-magazine-issue' in x.split()
}
).find(attrs={'class': lambda x: x and 'sections' in x.split()})
for section in sections.findAll(
attrs={'class': lambda x: x and 'section' in x.split()}
):
a = section.find('a') a = section.find('a')
section_title = self.tag_to_string(a) section_title = self.tag_to_string(a)
self.log('\nSection:', section_title) self.log('\nSection:', section_title)
articles = [] articles = []
for article in section.findAll('article'): for article in section.findAll('article'):
details = article.find(attrs={'class': lambda x: x and 'details' in x.split()}) details = article.find(
pre = details.find(attrs={'class': lambda x: x and 'pre-heading' in x.split()}) attrs={'class': lambda x: x and 'details' in x.split()}
)
pre = details.find(
attrs={'class': lambda x: x and 'pre-heading' in x.split()}
)
if pre is not None: if pre is not None:
pre.extract() pre.extract()
a = details.find('a') a = details.find('a')
@ -98,6 +112,3 @@ class CaravanMagazineHindi(BasicNewsRecipe):
for img in div.findAll('img'): for img in div.findAll('img'):
img['src'] = div['content'] img['src'] = div['content']
return soup return soup
calibre_most_common_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'