Update Ancient Egypt Magazine

Added support for selecting image resolution via a new 'res' option and updated image URLs to use the specified resolution. Enhanced tag filtering logic to include additional classes for keeping and removing tags, improving content extraction accuracy.
This commit is contained in:
unkn0w7n 2025-07-11 18:18:18 +05:30
parent e21256c483
commit 8acb5ddb37

View File

@ -25,24 +25,33 @@ class ancientegypt(BasicNewsRecipe):
simultaneous_downloads = 1 simultaneous_downloads = 1
extra_css = ''' extra_css = '''
[class^="meta"] { font-size:small; } [class^="meta"], [class~="__author__text"], [class~="__date"] { font-size:small; }
.post-subtitle { font-style: italic; color:#202020; } .post-subtitle { font-style: italic; color:#202020; }
.wp-block-image { font-size:small; text-align:center; } .wp-block-image { font-size:small; text-align:center; }
''' '''
keep_only_tags = [ keep_only_tags = [
dict(attrs={'class':lambda x: x and '__header' in x}), dict(attrs={'class': lambda x: x and any(tag in x for tag in [
dict(attrs={'class':lambda x: x and '__background' in x}), '__image', '__header', '__background',
dict(attrs={'class':lambda x: x and '__body_area' in x}), '__body_area', '__author__text', '__date'
])})
] ]
remove_tags = [ remove_tags = [
dict(attrs={'class':'ad-break'}), dict(attrs={'class':'ad-break'}),
dict(attrs={'class':lambda x: x and 'avatar' in x.split()}), dict(attrs={'class': lambda x: x and any(cls in x.split()
for cls in ['avatar', 'what-mag-row'])}),
dict(attrs={'class':lambda x: x and '--share' in x}) dict(attrs={'class':lambda x: x and '--share' in x})
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'src': True}):
if '?w=' in img['src']:
res = '?w=600'
w = self.recipe_specific_options.get('res')
if w and isinstance(w, str):
res = '?w=' + w
img['src'] = img['src'].split('?')[0] + res
exp = soup.find(attrs={'class':lambda x: x and 'post-subtitle' in x.split()}) exp = soup.find(attrs={'class':lambda x: x and 'post-subtitle' in x.split()})
if exp: if exp:
exp.name = 'p' exp.name = 'p'
@ -52,7 +61,12 @@ class ancientegypt(BasicNewsRecipe):
'issue': { 'issue': {
'short': 'Enter the Issue Number you want to download ', 'short': 'Enter the Issue Number you want to download ',
'long': 'For example, 136' 'long': 'For example, 136'
} },
'res': {
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
'default': '600',
},
} }
def parse_index(self): def parse_index(self):
@ -76,7 +90,7 @@ class ancientegypt(BasicNewsRecipe):
self.description = self.tag_to_string(edit.findParent('div')) self.description = self.tag_to_string(edit.findParent('div'))
cov = issue.find('figure', attrs={'class':lambda x: x and 'wp-block-image' in x.split()}) cov = issue.find('figure', attrs={'class':lambda x: x and 'wp-block-image' in x.split()})
if cov: if cov:
self.cover_url = cov.img['src'] self.cover_url = cov.img['src'].split('?')[0] + '?w=600'
div = issue.find('div', attrs={'class':lambda x: x and 'entry-content' in x.split()}) div = issue.find('div', attrs={'class':lambda x: x and 'entry-content' in x.split()})
feeds = [] feeds = []