mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Update hbr.recipe
This commit is contained in:
parent
3fe8bfd89a
commit
2025f05a1b
@ -1,3 +1,5 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
@ -7,8 +9,6 @@ from calibre import browser, random_user_agent
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||||
from mechanize import Request
|
from mechanize import Request
|
||||||
|
|
||||||
_issue_url = "" # custom issue url
|
|
||||||
|
|
||||||
|
|
||||||
class HBR(BasicNewsRecipe):
|
class HBR(BasicNewsRecipe):
|
||||||
title = "Harvard Business Review"
|
title = "Harvard Business Review"
|
||||||
@ -129,15 +129,23 @@ class HBR(BasicNewsRecipe):
|
|||||||
content_ele.append(new_soup.body)
|
content_ele.append(new_soup.body)
|
||||||
return str(soup)
|
return str(soup)
|
||||||
|
|
||||||
|
recipe_specific_options = {
|
||||||
|
'issue': {
|
||||||
|
'short': 'Enter the Issue Number you want to download ',
|
||||||
|
'long': 'For example, 2403'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
if not _issue_url:
|
d = self.recipe_specific_options.get('issue')
|
||||||
|
if not (d and isinstance(d, str)):
|
||||||
soup = self.index_to_soup(f"{self.base_url}/magazine")
|
soup = self.index_to_soup(f"{self.base_url}/magazine")
|
||||||
a = soup.find("a", href=lambda x: x and x.startswith("/archive-toc/"))
|
a = soup.find("a", href=lambda x: x and x.startswith("/archive-toc/"))
|
||||||
cov_url = a.find("img", attrs={"src": True})["src"]
|
cov_url = a.find("img", attrs={"src": True})["src"]
|
||||||
self.cover_url = urljoin(self.base_url, cov_url)
|
self.cover_url = urljoin(self.base_url, cov_url)
|
||||||
issue_url = urljoin(self.base_url, a["href"])
|
issue_url = urljoin(self.base_url, a["href"])
|
||||||
else:
|
else:
|
||||||
issue_url = _issue_url
|
issue_url = 'https://hbr.org/archive-toc/BR' + d
|
||||||
mobj = re.search(r"archive-toc/(?P<issue>(BR)?\d+)\b", issue_url)
|
mobj = re.search(r"archive-toc/(?P<issue>(BR)?\d+)\b", issue_url)
|
||||||
if mobj:
|
if mobj:
|
||||||
self.cover_url = f'https://hbr.org/resources/images/covers/{mobj.group("issue")}_500.png'
|
self.cover_url = f'https://hbr.org/resources/images/covers/{mobj.group("issue")}_500.png'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user