From f83db42a8c4ce2a7e2677ff02b8f6fd3a78f2051 Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 24 Oct 2023 13:23:16 +0800 Subject: [PATCH] New literature-related recipes (Bookforum, Kirkus Reviews, Poetry Magazine) --- recipes/bookforummagazine.recipe | 78 ++++++++++++++++ recipes/icons/bookforummagazine.png | Bin 0 -> 305 bytes recipes/icons/kirkusreviews.png | Bin 0 -> 1429 bytes recipes/icons/poetrymagazine.png | Bin 0 -> 536 bytes recipes/kirkusreviews.recipe | 130 +++++++++++++++++++++++++++ recipes/poetrymagazine.recipe | 135 ++++++++++++++++++++++++++++ 6 files changed, 343 insertions(+) create mode 100644 recipes/bookforummagazine.recipe create mode 100644 recipes/icons/bookforummagazine.png create mode 100644 recipes/icons/kirkusreviews.png create mode 100644 recipes/icons/poetrymagazine.png create mode 100644 recipes/kirkusreviews.recipe create mode 100644 recipes/poetrymagazine.recipe diff --git a/recipes/bookforummagazine.recipe b/recipes/bookforummagazine.recipe new file mode 100644 index 0000000000..cab082a8e3 --- /dev/null +++ b/recipes/bookforummagazine.recipe @@ -0,0 +1,78 @@ +from urllib.parse import urljoin + +from calibre.web.feeds.news import BasicNewsRecipe + +_issue_url = "" + + +class BookforumMagazine(BasicNewsRecipe): + title = "Bookforum" + description = ( + "Bookforum is an American book review magazine devoted to books and " + "the discussion of literature. https://www.bookforum.com/print" + ) + language = "en" + __author__ = "ping" + publication_type = "magazine" + encoding = "utf-8" + remove_javascript = True + no_stylesheets = True + auto_cleanup = False + compress_news_images = True + compress_news_images_auto_size = 8 + + keep_only_tags = [dict(class_="blog-article")] + remove_tags = [dict(name=["af-share-toggle", "af-related-articles"])] + + extra_css = """ + .blog-article__header { font-size: 1.8rem; margin-bottom: 0.4rem; } + .blog-article__subtitle { font-size: 1.2rem; font-style: italic; margin-bottom: 1rem; } + .blog-article__writer { font-size: 1rem; font-weight: bold; color: #444; } + .blog-article__book-info { margin: 1rem 0; } + .article-image-container img, .blog-article__publication-media img { + display: block; max-width: 100%; height: auto; + } + .blog-article__caption { font-size: 0.8rem; display: block; margin-top: 0.2rem; } + """ + + def preprocess_html(self, soup): + # strip away links that's not needed + for ele in soup.select(".blog-article__header a"): + ele.unwrap() + return soup + + def parse_index(self): + soup = self.index_to_soup( + _issue_url if _issue_url else "https://www.bookforum.com/print" + ) + meta_ele = soup.find("meta", property="og:title") + if meta_ele: + self.timefmt = f' [{meta_ele["content"]}]' + + cover_ele = soup.find("img", class_="toc-issue__cover") + if cover_ele: + self.cover_url = urljoin( + "https://www.bookforum.com", + soup.find("img", class_="toc-issue__cover")["src"], + ) + + articles = {} + for sect_ele in soup.find_all("div", class_="toc-articles__section"): + section_name = self.tag_to_string( + sect_ele.find("a", class_="toc__anchor-links__link") + ) + for article_ele in sect_ele.find_all("article"): + title_ele = article_ele.find("h1") + sub_title_ele = article_ele.find(class_="toc-article__subtitle") + articles.setdefault(section_name, []).append( + { + "title": self.tag_to_string(title_ele), + "url": article_ele.find("a", class_="toc-article__link")[ + "href" + ], + "description": self.tag_to_string(sub_title_ele) + if sub_title_ele + else "", + } + ) + return articles.items() diff --git a/recipes/icons/bookforummagazine.png b/recipes/icons/bookforummagazine.png new file mode 100644 index 0000000000000000000000000000000000000000..5e6eac016dd530bdf37b03cceed420ac0ff87a05 GIT binary patch literal 305 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdz&H|6fVg?3oArNM~bhqvgQ1G#* zi(^Q{;iuDcc@HZHv`q})^l)%e^?{&6Dn3j?6B=5q7Q`@lUS4y+H0J(N<;waA1=H0F zvDQO4Ei=x!4Xhl>^*&dmb>@U+!_^ zWLWjTcvAjO)|XyCG;4jWYx`NAbZbcmr5B?uVNmQIHdaseSRrzqtJYLViF zXdwpy1i}%hB65X8jzB1C1PKI1K`T@)t9C4M1p1}ZnNnx^$Gh+B?(cb?-FeRk_!BJ5 zH<<$fEPM~(gWzswsufG&6`z;V1~)UhH_;n_TbEYNk(a^yCA=U44wOCEJZd5-2ZD$I zoZ1dR!WjT&;G=|B00__kjD-S#%>clbBdZPE1M5-){761)nbLI;GAo9}nkltmO05|Z zn+Targw!@l9#^XU`QB4y`(s6Oue_-@tEo3nV<_z$$ZY7@Da!LmEGg7J&TTaWU8!6{ zmk?5Joyc$bb9#1RVZk^*U(@|uRHz9~uiDI3STm&d+??++)EV`UKFrJwO}x8Y{g9z( zM4wiEO3U03C0G42(D`iCQJA-ynh_?gdp|vUTQ|5TNo7TsSkf{tRqMXFUNbcQuAr@d zhw#RQN}aq(pWWPN&&vt9UNboMR?^t>*@^5>N!_!tx9RGxEo?=YwEp?Ty9-si&C!Z; z<@YwSvSfGl6j}X?$v={B-(MdodoVQi>h1d*8bja6>n6kS%$(7Im%Ejd{o|bnQ!{fL zS@MfjI-_wut+s2e#l4Axrh$eXBH)weDs8>ev)B=3pP;(lnQrH66H)mPfy{tfX(@;A2S2Hn@2u+Zv-2;H zrQIPCmlm06%Yn1=IA^|R6{sG_sxd#cHDUU!4;$gNJfUMUB=`K9p&y2?>A@khMAuR5 zCra-P2E)q$C(Fb)YoC|0NXrGy+sX8wTD%&Me2`X%6fN2&R8sC896m0sV(QHbhl`X# zvp1dMn+{?bA!KjatCcTy##_kZT)7adqj^axJ^8m2p^msw)IjpU@H)u0)~rmq+6rG+ zQ?FjG{+;Z3^~Nl;UgsIZ^=kY)t>etd(RJdgsUCv8+%LYM!Dd-WD#A7}d|6~Vl}M&DDUo(=NO$CJCl5O~(!~kw z3a8oyLT+{pKA(d{p+q7Pl4+vhf@vh1OCLBl2^)e9CPxxOzzV}+@ob8KMUCcTu>mm3 z1CVk-jrxfBI}?j#P?>ZFA3z=$OTaw~Gq#I-{$Y+W!}$yo@<_F91&nN5gbF9pDOrPtxc@E~^}Y0AB(6TA*pe&vd@J0G+N0?U0}2-jaCN$F@V!dfV6S29o1C7 z0uDtY013T{M=EoogV+>jR3m$%Rq4(B82KW|0c?5-G9uWdWp@FP%mN@Ozx#4eZMtn0 zrATno$I4axyO>%~G5?3nP%Dfq^_P@G-adD)en-T4lk)+!z1ba1F9lIuBK>%lx z%T}nJ0Ur&?-mlEov(YLYH3z8L?bIucR+Vj^D*-Mv6;q>CHux>*ndP#>aY`wP06aAc z4~CWYStcz?Xsbv?Kk7b)YSrd2U662?V1uqR?% li"): + author = self.tag_to_string( + li.find("span", attrs={"class": "c-txt_attribution"}) + ) + for link in li.find_all("a", attrs={"class": "c-txt_abstract"}): + self.log("Found article:", self.tag_to_string(link)) + sectioned_feeds[tab_title].append( + { + "title": self.tag_to_string(link), + "url": link["href"], + "author": author, + "description": author, + } + ) + + return sectioned_feeds.items()