mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Update Ancient Egypt Magazine
Added support for selecting image resolution via a new 'res' option and updated image URLs to use the specified resolution. Enhanced tag filtering logic to include additional classes for keeping and removing tags, improving content extraction accuracy.
This commit is contained in:
parent
e21256c483
commit
8acb5ddb37
@ -25,24 +25,33 @@ class ancientegypt(BasicNewsRecipe):
|
||||
simultaneous_downloads = 1
|
||||
|
||||
extra_css = '''
|
||||
[class^="meta"] { font-size:small; }
|
||||
[class^="meta"], [class~="__author__text"], [class~="__date"] { font-size:small; }
|
||||
.post-subtitle { font-style: italic; color:#202020; }
|
||||
.wp-block-image { font-size:small; text-align:center; }
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'class':lambda x: x and '__header' in x}),
|
||||
dict(attrs={'class':lambda x: x and '__background' in x}),
|
||||
dict(attrs={'class':lambda x: x and '__body_area' in x}),
|
||||
dict(attrs={'class': lambda x: x and any(tag in x for tag in [
|
||||
'__image', '__header', '__background',
|
||||
'__body_area', '__author__text', '__date'
|
||||
])})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class':'ad-break'}),
|
||||
dict(attrs={'class':lambda x: x and 'avatar' in x.split()}),
|
||||
dict(attrs={'class': lambda x: x and any(cls in x.split()
|
||||
for cls in ['avatar', 'what-mag-row'])}),
|
||||
dict(attrs={'class':lambda x: x and '--share' in x})
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', attrs={'src': True}):
|
||||
if '?w=' in img['src']:
|
||||
res = '?w=600'
|
||||
w = self.recipe_specific_options.get('res')
|
||||
if w and isinstance(w, str):
|
||||
res = '?w=' + w
|
||||
img['src'] = img['src'].split('?')[0] + res
|
||||
exp = soup.find(attrs={'class':lambda x: x and 'post-subtitle' in x.split()})
|
||||
if exp:
|
||||
exp.name = 'p'
|
||||
@ -52,7 +61,12 @@ class ancientegypt(BasicNewsRecipe):
|
||||
'issue': {
|
||||
'short': 'Enter the Issue Number you want to download ',
|
||||
'long': 'For example, 136'
|
||||
}
|
||||
},
|
||||
'res': {
|
||||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||
'default': '600',
|
||||
},
|
||||
}
|
||||
|
||||
def parse_index(self):
|
||||
@ -76,7 +90,7 @@ class ancientegypt(BasicNewsRecipe):
|
||||
self.description = self.tag_to_string(edit.findParent('div'))
|
||||
cov = issue.find('figure', attrs={'class':lambda x: x and 'wp-block-image' in x.split()})
|
||||
if cov:
|
||||
self.cover_url = cov.img['src']
|
||||
self.cover_url = cov.img['src'].split('?')[0] + '?w=600'
|
||||
div = issue.find('div', attrs={'class':lambda x: x and 'entry-content' in x.split()})
|
||||
|
||||
feeds = []
|
||||
|
Loading…
x
Reference in New Issue
Block a user