calibre/recipes/military_history.recipe
Kovid Goyal 48ed5c63f5
pep8
2025-07-12 22:35:56 +05:30

127 lines
4.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
https://www.military-history.org/
'''
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
class milthist(BasicNewsRecipe):
title = 'The Past: Military History Matters'
language = 'en'
__author__ = 'unkn0wn'
description = (
'Transport yourself to the thick of battle with the compelling narrative and authoritative detail in '
'Military History Matters from the 20th-century to the ancient world.'
)
no_stylesheets = True
use_embedded_content = False
remove_attributes = ['style', 'height', 'width']
ignore_duplicate_articles = {'url'}
resolve_internal_links = True
masthead_url = 'https://i0.wp.com/www.military-history.org/wp-content/uploads/2018/08/MHMatters-masthead-web1.jpg?w=600&ssl=1'
simultaneous_downloads = 1
extra_css = '''
[class^="meta"], [class~="__author__text"], [class~="__date"] { font-size:small; }
.post-subtitle { font-style: italic; color:#202020; }
.wp-block-image { font-size:small; text-align:center; }
'''
keep_only_tags = [
dict(attrs={'class': lambda x: x and any(tag in x for tag in [
'__image', '__header', '__background',
'__body_area', '__author__text', '__date'
])})
]
remove_tags = [
dict(attrs={'class':'ad-break'}),
dict(attrs={'class': lambda x: x and any(cls in x.split()
for cls in ['avatar', 'what-mag-row'])}),
dict(attrs={'class':lambda x: x and '--share' in x})
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'src': True}):
if '?w=' in img['src']:
res = '?w=600'
w = self.recipe_specific_options.get('res')
if w and isinstance(w, str):
res = '?w=' + w
img['src'] = img['src'].split('?')[0] + res
exp = soup.find(attrs={'class':lambda x: x and 'post-subtitle' in x.split()})
if exp:
exp.name = 'p'
return soup
recipe_specific_options = {
'issue': {
'short': 'Enter the Issue Number you want to download ',
'long': 'For example, 136'
},
'res': {
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
'default': '600',
},
}
def parse_index(self):
soup = self.index_to_soup('https://the-past.com/category/magazines/mhm/')
art = soup.find('article', attrs={'class':lambda x: x and 'tag-magazines' in x.split()})
url = art.h2.a['href']
d = self.recipe_specific_options.get('issue')
if d and isinstance(d, str):
url = 'https://the-past.com/magazines/military-history-matters-' + d + '/'
issue = self.index_to_soup(url)
ti = issue.find('h1', attrs={'class':lambda x: x and 'post-title' in x.split()})
if ti:
self.title = self.tag_to_string(ti).strip()
dt = soup.find(attrs={'class':lambda x: x and '__date' in x})
if dt:
self.timefmt = ' [' + self.tag_to_string(dt).strip() + ']'
edit = issue.find('h2', attrs={'id':'from-the-editor'})
if edit and edit.findParent('div'):
self.description = self.tag_to_string(edit.findParent('div'))
cov = issue.find('figure', attrs={'class':lambda x: x and 'wp-block-image' in x.split()})
if cov:
self.cover_url = cov.img['src'].split('?')[0] + '?w=600'
div = issue.find('div', attrs={'class':lambda x: x and 'entry-content' in x.split()})
feeds = []
h2 = div.findAll('h2', attrs={'class':lambda x: x and 'wp-block-heading' in x.split()})
lt = div.findAll(attrs={'class':'display-posts-listing'})
for x, y in zip(h2, lt):
section = self.tag_to_string(x).strip()
self.log(section)
articles = []
for a in y.findAll('a', href=True, attrs={'class':'title'}):
url = a['href']
title = self.tag_to_string(a).strip()
desc = ''
exp = a.findNext(attrs={'class':'excerpt'})
if exp:
desc = self.tag_to_string(exp).strip()
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
articles.append({'title': title, 'description':desc, 'url': url})
if articles:
feeds.append((section, articles))
return feeds
def get_browser(self, *args, **kwargs):
return self
def clone_browser(self, *args, **kwargs):
return self.get_browser()
def open_novisit(self, *args, **kwargs):
br = browser()
return br.open_novisit(*args, **kwargs)
open = open_novisit