From 1b91defcedd873aeee5e10663e261aa754b6b488 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 29 Sep 2024 20:21:32 +0530 Subject: [PATCH] Update tls_mag.recipe reduce file size --- recipes/tls_mag.recipe | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe index 041df92226..c1210b4214 100644 --- a/recipes/tls_mag.recipe +++ b/recipes/tls_mag.recipe @@ -19,7 +19,7 @@ def get_id(url): return re.search('\?p=(\S+)>', str(rq.info())).group(1) -class tls(BasicNewsRecipe): +class TLS(BasicNewsRecipe): title = 'Times Literary Supplement' __author__ = 'unkn0wn' description = ( @@ -39,7 +39,7 @@ class tls(BasicNewsRecipe): .desc { font-style:italic; color:#202020; } .auth { font-size:small; } em, blockquote { color:#202020; } - .det { font-size:small; color:#202020; font-weight:bold; } + .det { font-size:small; color:#202020; } ''' recipe_specific_options = { @@ -60,7 +60,7 @@ class tls(BasicNewsRecipe): url = 'https://www.the-tls.co.uk/wp-json/tls/v2/contents-page/' + get_id(issue) raw = self.index_to_soup(url, raw=True) data = json.loads(raw) - self.cover_url = data['featuredimage']['full_image'] + '?w600' + self.cover_url = data['featuredimage']['full_image'].split('?')[0] + '?w600' self.timefmt = ' [' + data['issuedateline']['issuedate'] + ']' if data['issuedateline']['issuenumber']: self.description = 'Issue ' + data['issuedateline']['issuenumber'] @@ -93,6 +93,11 @@ class tls(BasicNewsRecipe): self.log(' ', title, '\n\t', desc, '\n\t', url) return ({ 'title': title, 'description': desc, 'url': url }) + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'src':True}): + img['src'] = img['src'].split('?')[0] + '?w=600' + return soup + def preprocess_raw_html(self, raw, *a): pg = re.search(r'var tlsPageObject = ({.+)', raw).group(1) data = json.JSONDecoder().raw_decode(pg)[0]