This commit is contained in:
Kovid Goyal 2024-05-02 22:01:07 +05:30
parent 5205cc97b0
commit 253012d392
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 69 additions and 29 deletions

View File

@ -23,6 +23,37 @@
# - title by author
# }}}
{{{ 7.10.0 2024-05-03
:: new features
- Export of calibre data: Ensure individual part files in the exported data are no larger than one gigabyte even
if the library contains individual files larger than that size.
Note that this means that exports created by calibre from this version
on will not be importable by earlier versions. However, exports from
earlier versions should still be importable.
- Edit book: Spell check: Add options to exclude words in ALL CAPS or with numbers or in camelCase/snake_case from the list of words
- Allow easily inverting the current search via the right click menu on the search box
:: bug fixes
- [2064546] Kobo driver: Fix database unsupported error with newest firmware
- [2063301] DOCX Input: Fix text elements containing only whitespace being incorrectly ignored
- Bulk metadata dialog: Do not fail when setting covers from ebook files and some of the files have invalid covers
:: improved recipes
- Economist
- The Week
- Caravan Magazine
- Financial Times
}}}
{{{ 7.9.0 2024-04-19
:: new features

View File

@ -1,7 +1,7 @@
import json
from urllib.parse import urlparse, quote
from urllib.parse import quote, urlparse
from calibre.web.feeds.news import BasicNewsRecipe, classes
from calibre.web.feeds.news import BasicNewsRecipe
from mechanize import Request
@ -26,7 +26,7 @@ def parse_body(x):
yield ''.join(parse_p(p))
yield '</p>\n'
elif x.get('type', '') in {'blockquote', 'pullquote'}:
yield '<blockquote>'
yield '<blockquote>'
for p in x.get('content', {}):
yield from parse_body(p)
yield '</blockquote>'
@ -36,7 +36,7 @@ def parse_body(x):
yield from parse_body(p)
elif x.get('type', '') in {'caption', 'credit'}:
yield '<div class="sub">'
for div in x.get('content', {}):
for div in x.get('content', {}):
yield ''.join(parse_p(div))
yield '</div>\n'
elif x.get('type', '') != '':
@ -126,7 +126,7 @@ class CaravanMagazine(BasicNewsRecipe):
# for past editions
# inp = json.dumps({"0":{"json":{"month":6,"year":2023}}})
# api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=' + quote(inp, safe='')
raw = json.loads(self.index_to_soup(api, raw=True))
if isinstance(raw, list):
data = raw[0]['result']['data']['json']
@ -168,7 +168,7 @@ class CaravanMagazine(BasicNewsRecipe):
art_id = cache_data['result']['data']['json']['articleId']
prim_data = cache_data['result']['data']['json']['data']
cat = subhead = desc = lede = auth = ''
cat = desc = lede = auth = ''
cat = '<div class="cat">' + safe_dict(prim_data, 'printTitle') + '</div>\n'
title = '<h1>' + safe_dict(prim_data, 'title') + '</h1>\n'
@ -179,8 +179,8 @@ class CaravanMagazine(BasicNewsRecipe):
authors.append(safe_dict(q, 'name'))
dt = ''
if prim_data.get('writtenAt', '') != '':
from datetime import datetime, timedelta
import time
from datetime import datetime, timedelta
dt = datetime.fromisoformat(prim_data['writtenAt'][:-1]) + timedelta(seconds=time.timezone)
dt = dt.strftime('%b %d, %Y, %I:%M %p')
auth ='<p class="auth">' + ', '.join(authors) + ' | ' + dt + '</p>\n'

View File

@ -44,15 +44,18 @@ class Volkskrant(BasicNewsRecipe):
url = self.home_url + url
title_parts = []
tag = article.find('div', {'class': 'abstract-article__tag'})
if tag: title_parts.append(self.tag_to_string(tag).upper())
if tag:
title_parts.append(self.tag_to_string(tag).upper())
title_parts.append(self.tag_to_string(article.find('div', {'class': 'abstract-article__title'})))
article_title = ' \u2022 '.join(title_parts)
pubdate=''
description_parts = []
author = article.find('div', {'class': 'abstract-article__author'})
if author: description_parts.append(self.tag_to_string(author))
if author:
description_parts.append(self.tag_to_string(author))
summary = article.find('div', {'class': 'abstract-article__content'})
if summary: description_parts.append(self.tag_to_string(summary))
if summary:
description_parts.append(self.tag_to_string(summary))
description = ' \u2022 '.join(description_parts)
return dict(
title=article_title,

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
import uuid
from mechanize import Request
from contextlib import closing
import json
import uuid
from contextlib import closing
from calibre.web.feeds.recipes import BasicNewsRecipe
from mechanize import Request
class Parool(BasicNewsRecipe):
title = 'Het Parool'
@ -18,7 +20,8 @@ class Parool(BasicNewsRecipe):
remove_tags_before = dict(id='main-content')
remove_tags_after = dict(id='main-content')
remove_tags = [
dict(attrs={'class':['article-footer__sharing', 'artstyle__editorial-tips', 'artstyle__advertisement','artstyle__container__icon','artstyle__disabled-embed','container__title__icon',]}),
dict(attrs={'class':['article-footer__sharing', 'artstyle__editorial-tips', 'artstyle__advertisement',
'artstyle__container__icon','artstyle__disabled-embed','container__title__icon',]}),
dict(attrs={'data-element-id': ['article-element-authors']}),
dict(name=['script', 'noscript', 'style']),
]
@ -26,7 +29,7 @@ class Parool(BasicNewsRecipe):
encoding = 'utf-8'
no_stylesheets = True
ignore_duplicate_articles = {'url'}
def parse_index(self):
soup = self.index_to_soup('https://www.parool.nl/privacy-wall/accept?redirectUri=%2Feditie%2Fvandaag%2F&authId=' + str(uuid.uuid4()))
containers = soup.findAll('section', attrs={'class': 'section--horizontal'})
@ -34,7 +37,7 @@ class Parool(BasicNewsRecipe):
for container in containers:
section_title = self.tag_to_string(container.find('h2')).strip()
articles = []
for art in container.findAll('article'):
a = art.find('a')
url = a['href']
@ -60,7 +63,7 @@ class Parool(BasicNewsRecipe):
articles.append(dict(title=article_title,
url=url,
content=''))
sections.append((section_title, articles))
return sections
@ -82,7 +85,7 @@ class Parool(BasicNewsRecipe):
for node in soup.find('figure').find_next_siblings():
node.extract()
return soup
def get_cover_url(self):
headers = {
'X-Requested-With': 'XMLHttpRequest',
@ -93,4 +96,4 @@ class Parool(BasicNewsRecipe):
with closing(self.browser.open(Request(url, None, headers))) as r:
folders = json.loads(r.read())
return folders["objects"][0]["teaser_medium"]
return None
return None

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
class Volkskrant(BasicNewsRecipe):
title = 'Revista 22'
__author__ = 'Cristi Ghera'
@ -71,4 +72,4 @@ class Volkskrant(BasicNewsRecipe):
)
sections = [('Numărul curent', articles)]
return sections
return sections

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
import uuid
from mechanize import Request
from contextlib import closing
import json
import uuid
from contextlib import closing
from calibre.web.feeds.recipes import BasicNewsRecipe
from mechanize import Request
class Volkskrant(BasicNewsRecipe):
title = 'Volkskrant'
@ -96,17 +98,17 @@ class Volkskrant(BasicNewsRecipe):
if tag.name == 'img':
if tag['src'][0] == '/':
tag['src'] = 'https://www.volkskrant.nl' + tag['src']
for tag in soup():
if tag.name == "picture":
tag.replaceWith(tag.find("img"))
comic_articles = { "Bas van der Schot", "Poldermodellen", "Gummbah", "Sigmund" }
if self.tag_to_string(soup.find('h1')).strip() in comic_articles:
for node in soup.find('figure').find_next_siblings():
node.extract()
return soup
def get_cover_url(self):
headers = {
'X-Requested-With': 'XMLHttpRequest',

View File

@ -11,7 +11,7 @@ from functools import lru_cache
from polyglot.builtins import environ_item, hasenv
__appname__ = 'calibre'
numeric_version = (7, 9, 100)
numeric_version = (7, 10, 0)
__version__ = '.'.join(map(str, numeric_version))
git_version = None
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"