From 2025f05a1bcb2044d4cb411ef033d581fed3ee6e Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 27 Jul 2024 13:21:43 +0530 Subject: [PATCH] Update hbr.recipe --- recipes/hbr.recipe | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/recipes/hbr.recipe b/recipes/hbr.recipe index 8f3081c48b..e3a441d814 100644 --- a/recipes/hbr.recipe +++ b/recipes/hbr.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 import json import re from collections import OrderedDict @@ -7,8 +9,6 @@ from calibre import browser, random_user_agent from calibre.web.feeds.news import BasicNewsRecipe, classes from mechanize import Request -_issue_url = "" # custom issue url - class HBR(BasicNewsRecipe): title = "Harvard Business Review" @@ -129,15 +129,23 @@ class HBR(BasicNewsRecipe): content_ele.append(new_soup.body) return str(soup) + recipe_specific_options = { + 'issue': { + 'short': 'Enter the Issue Number you want to download ', + 'long': 'For example, 2403' + } + } + def parse_index(self): - if not _issue_url: + d = self.recipe_specific_options.get('issue') + if not (d and isinstance(d, str)): soup = self.index_to_soup(f"{self.base_url}/magazine") a = soup.find("a", href=lambda x: x and x.startswith("/archive-toc/")) cov_url = a.find("img", attrs={"src": True})["src"] self.cover_url = urljoin(self.base_url, cov_url) issue_url = urljoin(self.base_url, a["href"]) else: - issue_url = _issue_url + issue_url = 'https://hbr.org/archive-toc/BR' + d mobj = re.search(r"archive-toc/(?P(BR)?\d+)\b", issue_url) if mobj: self.cover_url = f'https://hbr.org/resources/images/covers/{mobj.group("issue")}_500.png'