From 9136b9c1d7cbb85b8db03abf5c923ae7f49c8390 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 7 Jul 2024 09:59:40 +0530 Subject: [PATCH] Military History Magazine --- recipes/icons/military_history.png | Bin 0 -> 339 bytes recipes/military_history.recipe | 97 +++++++++++++++++++++++++++++ recipes/wsj_mag.recipe | 2 +- 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 recipes/icons/military_history.png create mode 100644 recipes/military_history.recipe diff --git a/recipes/icons/military_history.png b/recipes/icons/military_history.png new file mode 100644 index 0000000000000000000000000000000000000000..26f8e2f282fce780f14d378a2067c942ad628245 GIT binary patch literal 339 zcmV-Z0j&OsP)e#DuRzlrJxenDN#W1rPT@|6XJFgVdL$gviXrjqR))frVg#?tCfwVYs?>>}(9Q9>ezGy^cYc+*|}i?;3jx2!IKq?e8*ou^iW8vrN@vS)TmH l6J{c2-m&h`|Hl2!e*yW52eBj&($fF{002ovPDHLkV1m9ipJxC7 literal 0 HcmV?d00001 diff --git a/recipes/military_history.recipe b/recipes/military_history.recipe new file mode 100644 index 0000000000..46882e27f9 --- /dev/null +++ b/recipes/military_history.recipe @@ -0,0 +1,97 @@ +''' +https://www.military-history.org/ +''' +from calibre import browser +from calibre.web.feeds.news import BasicNewsRecipe + +class milthist(BasicNewsRecipe): + title = 'Military History Matters' + language = 'en' + __author__ = 'unkn0wn' + description = ( + 'Transport yourself to the thick of battle with the compelling narrative and authoritative detail in ' + 'Military History Matters – from the 20th-century to the ancient world.' + ) + no_stylesheets = True + use_embedded_content = False + remove_attributes = ['style', 'height', 'width'] + ignore_duplicate_articles = {'url'} + resolve_internal_links = True + masthead_url = 'https://i0.wp.com/www.military-history.org/wp-content/uploads/2018/08/MHMatters-masthead-web1.jpg?w=600&ssl=1' + simultaneous_downloads = 1 + + extra_css = ''' + [class^="meta"] { font-size:small; } + .post-subtitle { font-style: italic; color:#202020; } + .wp-block-image { font-size:small; text-align:center; } + ''' + + keep_only_tags = [ + dict(attrs={'class':lambda x: x and '__header' in x}), + dict(attrs={'class':lambda x: x and '__background' in x}), + dict(attrs={'class':lambda x: x and '__body_area' in x}), + ] + + remove_tags = [ + dict(attrs={'class':'ad-break'}), + dict(attrs={'class':lambda x: x and 'avatar' in x.split()}), + dict(attrs={'class':lambda x: x and '--share' in x}) + ] + + def preprocess_html(self, soup): + exp = soup.find(attrs={'class':lambda x: x and 'post-subtitle' in x.split()}) + if exp: + exp.name = 'p' + return soup + + def parse_index(self): + soup = self.index_to_soup('https://the-past.com/category/magazines/mhm/') + art = soup.find('article', attrs={'class':lambda x: x and 'tag-magazines' in x.split()}) + url = art.h2.a['href'] + + # for past editions, add url + # url = '' + + issue = self.index_to_soup(url) + dt = soup.find(attrs={'class':lambda x: x and '__date' in x}) + if dt: + self.timefmt = ' [' + self.tag_to_string(dt).strip() + ']' + edit = issue.find('h2', attrs={'id':'from-the-editor'}) + if edit.findNext('p'): + self.description = self.tag_to_string(edit.findNext('p')) + if edit.findPrevious('figure'): + self.cover_url = edit.findPrevious('figure').img['src'] + div = issue.find('div', attrs={'class':lambda x: x and 'entry-content' in x.split()}) + + feeds = [] + + h2 = div.findAll('h2', attrs={'class':lambda x: x and 'wp-block-heading' in x.split()}) + lt = div.findAll(attrs={'class':'display-posts-listing'}) + for x, y in zip(h2, lt): + section = self.tag_to_string(x).strip() + self.log(section) + articles = [] + for a in y.findAll('a', href=True, attrs={'class':'title'}): + url = a['href'] + title = self.tag_to_string(a).strip() + desc = '' + exp = a.findNext(attrs={'class':'excerpt'}) + if exp: + desc = self.tag_to_string(exp).strip() + self.log('\t', title, '\n\t', desc, '\n\t\t', url) + articles.append({'title': title, 'description':desc, 'url': url}) + if articles: + feeds.append((section, articles)) + return feeds + + def get_browser(self, *args, **kwargs): + return self + + def clone_browser(self, *args, **kwargs): + return self.get_browser() + + def open_novisit(self, *args, **kwargs): + br = browser() + return br.open_novisit(*args, **kwargs) + + open = open_novisit diff --git a/recipes/wsj_mag.recipe b/recipes/wsj_mag.recipe index c2f5db1b1d..29712902f1 100644 --- a/recipes/wsj_mag.recipe +++ b/recipes/wsj_mag.recipe @@ -111,7 +111,7 @@ class WSJ(BasicNewsRecipe): date = itm['date'] key = itm['key'] manifest = itm['manifest'] - self.title = itm['label'] + self.title = 'WSJ. Magazine: ' + itm['label'] dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone) dt = dt.strftime('%b, %Y')