Update Ancient Egypt Magazine

Added support for selecting image resolution via a new 'res' option and updated image URLs to use the specified resolution. Enhanced tag filtering logic to include additional classes for keeping and removing tags, improving content extraction accuracy.
2025-07-31 14:33:54 -04:00 · 2025-07-11 18:18:18 +05:30 · 2025-07-11 18:18:18 +05:30 · 8acb5ddb37
commit 8acb5ddb37
parent e21256c483
1 changed files with 21 additions and 7 deletions
--- a/recipes/ancient_egypt.recipe
+++ b/recipes/ancient_egypt.recipe
@ -25,24 +25,33 @@ class ancientegypt(BasicNewsRecipe):
    simultaneous_downloads = 1
    extra_css = '''
-        [class^="meta"] { font-size:small; }
+        [class^="meta"], [class~="__author__text"], [class~="__date"] { font-size:small; }
        .post-subtitle { font-style: italic; color:#202020; }
        .wp-block-image { font-size:small; text-align:center; }
    '''
    keep_only_tags = [
-        dict(attrs={'class':lambda x: x and '__header' in x}),
+        dict(attrs={'class': lambda x: x and any(tag in x for tag in [
-        dict(attrs={'class':lambda x: x and '__background' in x}),
+            '__image', '__header', '__background', 
-        dict(attrs={'class':lambda x: x and '__body_area' in x}),
+            '__body_area', '__author__text', '__date'
        ])})
    ]
    remove_tags = [
        dict(attrs={'class':'ad-break'}),
-        dict(attrs={'class':lambda x: x and 'avatar' in x.split()}),
+        dict(attrs={'class': lambda x: x and any(cls in x.split()
        for cls in ['avatar', 'what-mag-row'])}),
        dict(attrs={'class':lambda x: x and '--share' in x})
    ]
    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'src': True}):
            if '?w=' in img['src']:
                res = '?w=600'
                w = self.recipe_specific_options.get('res')
                if w and isinstance(w, str):
                    res = '?w=' + w
                img['src'] = img['src'].split('?')[0] + res
        exp = soup.find(attrs={'class':lambda x: x and 'post-subtitle' in x.split()})
        if exp:
            exp.name = 'p'
@ -52,7 +61,12 @@ class ancientegypt(BasicNewsRecipe):
        'issue': {
            'short': 'Enter the Issue Number you want to download ',
            'long': 'For example, 136'
-        }
+        },
        'res': {
            'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
            'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
            'default': '600',
        },
    }
    def parse_index(self):
@ -76,7 +90,7 @@ class ancientegypt(BasicNewsRecipe):
            self.description = self.tag_to_string(edit.findParent('div'))
        cov = issue.find('figure', attrs={'class':lambda x: x and 'wp-block-image' in x.split()})
        if cov:
-            self.cover_url = cov.img['src']
+            self.cover_url = cov.img['src'].split('?')[0] + '?w=600'
        div = issue.find('div', attrs={'class':lambda x: x and 'entry-content' in x.split()})
        feeds = []