From aae88fa07eb1b8336c3443607c37f28bd0ee3fe2 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Tue, 27 Feb 2024 23:12:27 +0530 Subject: [PATCH] ... --- recipes/new_yorker.recipe | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/recipes/new_yorker.recipe b/recipes/new_yorker.recipe index a4eff56522..bfe5c68d00 100644 --- a/recipes/new_yorker.recipe +++ b/recipes/new_yorker.recipe @@ -34,6 +34,7 @@ class NewYorker(BasicNewsRecipe): timefmt = ' [%b %d]' encoding = 'utf-8' extra_css = ''' + img { display:block; margin:0 auto; } .byline { font-size:smaller; font-weight: bold;} h3 { margin-bottom: 6px; } .caption { font-size: smaller; font-style: italic; font-weight: normal; } @@ -72,6 +73,21 @@ class NewYorker(BasicNewsRecipe): self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') def preprocess_html(self, soup): + w = '/w_320' # use '/w_640' for highres + for img in soup.findAll('img'): + if img.has_attr('srcset'): + for x in img['srcset'].split(): + if w in x: + img['src'] = x + elif img.find_previous_sibling('source', attrs={'srcset':True}): + srcset = img.find_previous_sibling('source', attrs={'srcset':True}) + for x in srcset['srcset'].split(): + if w in x: + img['src'] = x + elif '/w_560' in x: + img['src'] = x + for src in soup.findAll('source'): + src.decompose() for noscript in soup.findAll('noscript'): noscript.name = 'div' return soup