From 253012d392218183943901e96819ec95e9d2dc68 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
' + yield '' for p in x.get('content', {}): yield from parse_body(p) yield '' @@ -36,7 +36,7 @@ def parse_body(x): yield from parse_body(p) elif x.get('type', '') in {'caption', 'credit'}: yield '' - for div in x.get('content', {}): + for div in x.get('content', {}): yield ''.join(parse_p(div)) yield '\n' elif x.get('type', '') != '': @@ -126,7 +126,7 @@ class CaravanMagazine(BasicNewsRecipe): # for past editions # inp = json.dumps({"0":{"json":{"month":6,"year":2023}}}) # api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='') - + raw = json.loads(self.index_to_soup(api, raw=True)) if isinstance(raw, list): data = raw[0]['result']['data']['json'] @@ -168,7 +168,7 @@ class CaravanMagazine(BasicNewsRecipe): art_id = cache_data['result']['data']['json']['articleId'] prim_data = cache_data['result']['data']['json']['data'] - cat = subhead = desc = lede = auth = '' + cat = desc = lede = auth = '' cat = '' + safe_dict(prim_data, 'printTitle') + '\n' title = '' + safe_dict(prim_data, 'title') + '
\n' @@ -179,8 +179,8 @@ class CaravanMagazine(BasicNewsRecipe): authors.append(safe_dict(q, 'name')) dt = '' if prim_data.get('writtenAt', '') != '': - from datetime import datetime, timedelta import time + from datetime import datetime, timedelta dt = datetime.fromisoformat(prim_data['writtenAt'][:-1]) + timedelta(seconds=time.timezone) dt = dt.strftime('%b %d, %Y, %I:%M %p') auth ='' + ', '.join(authors) + ' | ' + dt + '
\n' diff --git a/recipes/internazionale.recipe b/recipes/internazionale.recipe index eb57139733..d2c1a0bbfc 100644 --- a/recipes/internazionale.recipe +++ b/recipes/internazionale.recipe @@ -44,15 +44,18 @@ class Volkskrant(BasicNewsRecipe): url = self.home_url + url title_parts = [] tag = article.find('div', {'class': 'abstract-article__tag'}) - if tag: title_parts.append(self.tag_to_string(tag).upper()) + if tag: + title_parts.append(self.tag_to_string(tag).upper()) title_parts.append(self.tag_to_string(article.find('div', {'class': 'abstract-article__title'}))) article_title = ' \u2022 '.join(title_parts) pubdate='' description_parts = [] author = article.find('div', {'class': 'abstract-article__author'}) - if author: description_parts.append(self.tag_to_string(author)) + if author: + description_parts.append(self.tag_to_string(author)) summary = article.find('div', {'class': 'abstract-article__content'}) - if summary: description_parts.append(self.tag_to_string(summary)) + if summary: + description_parts.append(self.tag_to_string(summary)) description = ' \u2022 '.join(description_parts) return dict( title=article_title, diff --git a/recipes/parool.recipe b/recipes/parool.recipe index 3a6bfa408a..4f19e35d39 100644 --- a/recipes/parool.recipe +++ b/recipes/parool.recipe @@ -1,9 +1,11 @@ #!/usr/bin/env python -from calibre.web.feeds.recipes import BasicNewsRecipe -import uuid -from mechanize import Request -from contextlib import closing import json +import uuid +from contextlib import closing + +from calibre.web.feeds.recipes import BasicNewsRecipe +from mechanize import Request + class Parool(BasicNewsRecipe): title = 'Het Parool' @@ -18,7 +20,8 @@ class Parool(BasicNewsRecipe): remove_tags_before = dict(id='main-content') remove_tags_after = dict(id='main-content') remove_tags = [ - dict(attrs={'class':['article-footer__sharing', 'artstyle__editorial-tips', 'artstyle__advertisement','artstyle__container__icon','artstyle__disabled-embed','container__title__icon',]}), + dict(attrs={'class':['article-footer__sharing', 'artstyle__editorial-tips', 'artstyle__advertisement', + 'artstyle__container__icon','artstyle__disabled-embed','container__title__icon',]}), dict(attrs={'data-element-id': ['article-element-authors']}), dict(name=['script', 'noscript', 'style']), ] @@ -26,7 +29,7 @@ class Parool(BasicNewsRecipe): encoding = 'utf-8' no_stylesheets = True ignore_duplicate_articles = {'url'} - + def parse_index(self): soup = self.index_to_soup('https://www.parool.nl/privacy-wall/accept?redirectUri=%2Feditie%2Fvandaag%2F&authId=' + str(uuid.uuid4())) containers = soup.findAll('section', attrs={'class': 'section--horizontal'}) @@ -34,7 +37,7 @@ class Parool(BasicNewsRecipe): for container in containers: section_title = self.tag_to_string(container.find('h2')).strip() articles = [] - + for art in container.findAll('article'): a = art.find('a') url = a['href'] @@ -60,7 +63,7 @@ class Parool(BasicNewsRecipe): articles.append(dict(title=article_title, url=url, content='')) - + sections.append((section_title, articles)) return sections @@ -82,7 +85,7 @@ class Parool(BasicNewsRecipe): for node in soup.find('figure').find_next_siblings(): node.extract() return soup - + def get_cover_url(self): headers = { 'X-Requested-With': 'XMLHttpRequest', @@ -93,4 +96,4 @@ class Parool(BasicNewsRecipe): with closing(self.browser.open(Request(url, None, headers))) as r: folders = json.loads(r.read()) return folders["objects"][0]["teaser_medium"] - return None \ No newline at end of file + return None diff --git a/recipes/revista22.recipe b/recipes/revista22.recipe index 6f91d2bc00..7d2a55b2f1 100644 --- a/recipes/revista22.recipe +++ b/recipes/revista22.recipe @@ -1,6 +1,7 @@ #!/usr/bin/env python from calibre.web.feeds.recipes import BasicNewsRecipe + class Volkskrant(BasicNewsRecipe): title = 'Revista 22' __author__ = 'Cristi Ghera' @@ -71,4 +72,4 @@ class Volkskrant(BasicNewsRecipe): ) sections = [('Numărul curent', articles)] - return sections \ No newline at end of file + return sections diff --git a/recipes/volksrant.recipe b/recipes/volksrant.recipe index 6c80b890bd..da3b850ec4 100644 --- a/recipes/volksrant.recipe +++ b/recipes/volksrant.recipe @@ -1,9 +1,11 @@ #!/usr/bin/env python -from calibre.web.feeds.recipes import BasicNewsRecipe -import uuid -from mechanize import Request -from contextlib import closing import json +import uuid +from contextlib import closing + +from calibre.web.feeds.recipes import BasicNewsRecipe +from mechanize import Request + class Volkskrant(BasicNewsRecipe): title = 'Volkskrant' @@ -96,17 +98,17 @@ class Volkskrant(BasicNewsRecipe): if tag.name == 'img': if tag['src'][0] == '/': tag['src'] = 'https://www.volkskrant.nl' + tag['src'] - + for tag in soup(): if tag.name == "picture": tag.replaceWith(tag.find("img")) - + comic_articles = { "Bas van der Schot", "Poldermodellen", "Gummbah", "Sigmund" } if self.tag_to_string(soup.find('h1')).strip() in comic_articles: for node in soup.find('figure').find_next_siblings(): node.extract() return soup - + def get_cover_url(self): headers = { 'X-Requested-With': 'XMLHttpRequest', diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 12fd9112cf..25f8a8df9b 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -11,7 +11,7 @@ from functools import lru_cache from polyglot.builtins import environ_item, hasenv __appname__ = 'calibre' -numeric_version = (7, 9, 100) +numeric_version = (7, 10, 0) __version__ = '.'.join(map(str, numeric_version)) git_version = None __author__ = "Kovid Goyal"