From 5d9d21572eb4d1830c947425a27a7ff2cc570c5c Mon Sep 17 00:00:00 2001 From: David Li Date: Sun, 28 Jul 2024 16:00:00 +0900 Subject: [PATCH 1/5] Fix import of manga epubs --- src/calibre/ebooks/metadata/epub.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 1f3141b113..612ea0ec88 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -21,6 +21,9 @@ from calibre.utils.localunzip import LocalZipFile from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace +import PIL +from PIL import Image as PILImage + class EPubException(Exception): pass @@ -36,7 +39,7 @@ class ContainerException(OCFException): class Container(dict): - def __init__(self, stream=None): + def __init__(self, stream=None, archive=None): if not stream: return container = safe_xml_fromstring(stream.read()) @@ -49,6 +52,15 @@ class Container(dict): mt, fp = rootfile.get('media-type'), rootfile.get('full-path') if not mt or not fp: raise EPubException(" element malformed") + + if archive: + try: + archive.getinfo(fp) + except KeyError: + # Some Kobo epubs have multiple rootfile entries, but only + # one exists. Ignore the ones that don't exist. + continue + self[mt] = fp @@ -95,7 +107,7 @@ class OCFReader(OCF): try: with closing(self.open(OCF.CONTAINER_PATH)) as f: - self.container = Container(f) + self.container = Container(f, self.archive) except KeyError: raise EPubException("missing OCF container.xml file") self.opf_path = self.container[OPF.MIMETYPE] @@ -192,6 +204,17 @@ def render_cover(cpage, zf, reader=None): cpage = os.path.join(tdir, cpage) if not os.path.exists(cpage): return + + # In the case of manga, the first spine item may be an image + # already, so treat it as a raster cover + try: + PILImage.open(cpage) + except PIL.UnidentifiedImageError: + pass + else: + with open(cpage, "rb") as source: + return source.read() + return render_html_svg_workaround(cpage, default_log, root=tdir) From f1827e40d137ae80da59135461c1ae81fbd17cf6 Mon Sep 17 00:00:00 2001 From: David Li Date: Sun, 4 Aug 2024 17:29:02 +0900 Subject: [PATCH 2/5] Feedback --- src/calibre/ebooks/metadata/epub.py | 48 +++++++++++++++++------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 612ea0ec88..2a7e0a3db6 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -17,14 +17,11 @@ from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf from calibre.ebooks.metadata.opf import set_metadata as set_metadata_opf from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory +from calibre.utils.imghdr import what as what_image_type from calibre.utils.localunzip import LocalZipFile from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace -import PIL -from PIL import Image as PILImage - - class EPubException(Exception): pass @@ -39,7 +36,7 @@ class ContainerException(OCFException): class Container(dict): - def __init__(self, stream=None, archive=None): + def __init__(self, stream=None, file_exists=None): if not stream: return container = safe_xml_fromstring(stream.read()) @@ -53,13 +50,10 @@ class Container(dict): if not mt or not fp: raise EPubException(" element malformed") - if archive: - try: - archive.getinfo(fp) - except KeyError: - # Some Kobo epubs have multiple rootfile entries, but only - # one exists. Ignore the ones that don't exist. - continue + if file_exists and not file_exists(fp): + # Some Kobo epubs have multiple rootfile entries, but only one + # exists. Ignore the ones that don't exist. + continue self[mt] = fp @@ -107,7 +101,7 @@ class OCFReader(OCF): try: with closing(self.open(OCF.CONTAINER_PATH)) as f: - self.container = Container(f, self.archive) + self.container = Container(f, self.exists) except KeyError: raise EPubException("missing OCF container.xml file") self.opf_path = self.container[OPF.MIMETYPE] @@ -137,6 +131,14 @@ class OCFReader(OCF): def read_bytes(self, name): return self.open(name).read() + def exists(self, path): + try: + self.open(path) + return True + except OSError: + return False + + class OCFZipReader(OCFReader): @@ -165,6 +167,13 @@ class OCFZipReader(OCFReader): def read_bytes(self, name): return self.archive.read(name) + def exists(self, path): + try: + self.archive.getinfo(path) + return True + except KeyError: + return False + def get_zip_reader(stream, root=None): try: @@ -206,12 +215,13 @@ def render_cover(cpage, zf, reader=None): return # In the case of manga, the first spine item may be an image - # already, so treat it as a raster cover - try: - PILImage.open(cpage) - except PIL.UnidentifiedImageError: - pass - else: + # already, so treat it as a raster cover. + file_format = what_image_type(cpage) + if file_format == "jpeg": + # Only JPEG is allowed since elsewhere we assume raster covers + # are JPEG. In principle we could convert other image formats + # but this is already an out-of-spec case that happens to + # arise in books from some stores. with open(cpage, "rb") as source: return source.read() From 64cb4b3fc6e4340bfd5bf25d1a3848bb10407b93 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 4 Aug 2024 10:14:38 +0530 Subject: [PATCH 3/5] Bump Qt to version 6.7.2 Also fix some pep8 warnings in recipes --- bypy/sources.json | 42 ++++++++++++++++++------------------ bypy/windows/util.c | 2 ++ recipes/liberation.recipe | 2 +- recipes/novaya_gazeta.recipe | 1 + recipes/paperpaper.recipe | 14 ++++++------ setup/build.py | 5 ++++- src/calibre/gui2/__init__.py | 2 +- 7 files changed, 38 insertions(+), 30 deletions(-) diff --git a/bypy/sources.json b/bypy/sources.json index e0b2c6d05c..a0cf30b02c 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -508,51 +508,51 @@ { "name": "qt-base", - "version": "6.5.3", + "version": "6.7.2", "hashes": { - "unix": "md5:9b3b8e45d0923bc6928833d185b29e40" + "unix": "sha256:c5f22a5e10fb162895ded7de0963328e7307611c688487b5d152c9ee64767599" } }, { "name": "qt-svg", "hashes": { - "unix": "md5:6e29e50c706d1ef870d232f260cf05c2" + "unix": "sha256:fb0d1286a35be3583fee34aeb5843c94719e07193bdf1d4d8b0dc14009caef01" } }, { "name": "qt-shadertools", "hashes": { - "unix": "md5:e751ba53872f74546e01277b51e0218d" + "unix": "sha256:edfa34c0ac8c00fcaa949df1d8e7a77d89dadd6386e683ce6c3e3b117e2f7cc1" } }, { "name": "qt-declarative", "hashes": { - "unix": "md5:c6f3840960803d8c1654ba48f9742814" + "unix": "sha256:4c29cba1af8c42d425d8eb6e01bad24cb80f4b983d71eef566a0542dfdb9b999" } }, { "name": "qt-imageformats", "hashes": { - "unix": "md5:47838764e104e7cb2390038989ba91ea" + "unix": "sha256:e1a1d8785fae67d16ad0a443b01d5f32663a6b68d275f1806ebab257485ce5d6" } }, { "name": "qt-webchannel", "hashes": { - "unix": "md5:d92af561c15f5af61656eabcdbaf1d27" + "unix": "sha256:ac5d96607b10e7de546eaf93bb9f65c0fd631ef9b91ef8a794e26fd57db4501c" } }, { "name": "qt-positioning", "hashes": { - "unix": "md5:c1eb22c2cdd18a8228e433e4e5d1577a" + "unix": "sha256:020009f0aa8f6b1042963a45b6e19f2784cc937949cdb673f3b876153e087b1c" } }, @@ -560,21 +560,21 @@ "name": "qt-wayland", "os": "linux", "hashes": { - "unix": "md5:b73feecf0fef221ed748ea7c984f45be" + "unix": "sha256:a2a057e1dd644bd44abb9990fecc194b2e25c2e0f39e81aa9fee4c1e5e2a8a5b" } }, { "name": "qt-sensors", "hashes": { - "unix": "md5:c4a48b39a3aaba6534f7fa5fbc363b0f" + "unix": "sha256:ead1dec787c56ed4dc02ed810af54ff1e1f6875cb5cc2506c74eeb9554e6bb59" } }, { "name": "qt-webengine", "hashes": { - "unix": "md5:b418c1b4e333a06609dac451c4484e23" + "unix": "sha256:c7755875594d8be382b07bf3634d44fd77012805794d8b588891709a6405ffd1" } }, @@ -1039,8 +1039,8 @@ "name": "sip", "comment": "build time dependency", "unix": { - "filename": "sip-6.7.11.tar.gz", - "hash": "sha256:f0dc3287a0b172e5664931c87847750d47e4fdcda4fe362b514af8edd655b469", + "filename": "sip-6.8.6.tar.gz", + "hash": "sha256:7fc959e48e6ec5d5af8bd026f69f5e24d08b3cb8abb342176f5ab8030cc07d7a", "urls": ["pypi"] } }, @@ -1049,8 +1049,8 @@ "name": "pyqt-builder", "comment": "build time dependency", "unix": { - "filename": "PyQt_builder-1.15.2-py3-none-any.whl", - "hash": "sha256:9e5acbe6ddffc2b8e7a619c5053d1f7da53f5db0faff8ce2b573f0e6604ad981", + "filename": "PyQt_builder-1.16.4-py3-none-any.whl", + "hash": "sha256:fe6e75bcf8426feee3eb73ae7f6aaa4e38edc2f8d670d8cf28638b21d9924eb7", "urls": ["pypi"] } }, @@ -1059,8 +1059,8 @@ "name": "pyqt-sip", "comment": "runtime sip module for PyQt", "unix": { - "filename": "PyQt6_sip-13.5.2.tar.gz", - "hash": "sha256:ebf6264b6feda01ba37d3b60a4bb87493bdb87be70f7b2a5384a7acd4902d88d", + "filename": "PyQt6_sip-13.8.0.tar.gz", + "hash": "sha256:2f74cf3d6d9cab5152bd9f49d570b2dfb87553ebb5c4919abfde27f5b9fd69d4", "urls": ["pypi"] } }, @@ -1068,8 +1068,8 @@ { "name": "pyqt", "unix": { - "filename": "PyQt6-6.5.2.tar.gz", - "hash": "sha256:1487ee7350f9ffb66d60ab4176519252c2b371762cbe8f8340fd951f63801280", + "filename": "PyQt6-6.7.1.tar.gz", + "hash": "sha256:3672a82ccd3a62e99ab200a13903421e2928e399fda25ced98d140313ad59cb9", "urls": ["pypi"] } }, @@ -1077,8 +1077,8 @@ { "name": "pyqt-webengine", "unix": { - "filename": "PyQt6_WebEngine-6.5.0.tar.gz", - "hash": "sha256:8ba9db56c4c181a2a2fab1673ca35e5b63dc69113f085027ddc43c710b6d6ee9", + "filename": "PyQt6_WebEngine-6.7.0.tar.gz", + "hash": "sha256:68edc7adb6d9e275f5de956881e79cca0d71fad439abeaa10d823bff5ac55001", "urls": ["pypi"] } }, diff --git a/bypy/windows/util.c b/bypy/windows/util.c index db889dd0d2..7c49e68ee4 100644 --- a/bypy/windows/util.c +++ b/bypy/windows/util.c @@ -45,6 +45,8 @@ get_install_locations(void) { get_app_dirw(); _snwprintf_s(qt_prefix_dir, MAX_PATH-1, _TRUNCATE, L"%ls\\ossl-modules", interpreter_data.executables_path); _wputenv_s(L"OPENSSL_MODULES", qt_prefix_dir); + // Have to disable sandbox or Qt WebEngine Render Process crashes as of Qt 6.6 + _wputenv_s(L"QTWEBENGINE_DISABLE_SANDBOX", "1"); _snwprintf_s(qt_prefix_dir, MAX_PATH-1, _TRUNCATE, L"%ls\\app", interpreter_data.app_dir); _wputenv_s(L"CALIBRE_QT_PREFIX", qt_prefix_dir); } diff --git a/recipes/liberation.recipe b/recipes/liberation.recipe index 3765fabaad..dd905bb940 100644 --- a/recipes/liberation.recipe +++ b/recipes/liberation.recipe @@ -86,7 +86,7 @@ class Liberation(BasicNewsRecipe): oldest_article = 1.15 remove_empty_feeds = True articles_are_obfuscated = True - timefmt = '[ %s]' % datetime.now().strftime(m_fr[datetime.now().month] '%d, %Y') + timefmt = '[ %s]' % datetime.now().strftime(m_fr[datetime.now().month] + ' %d, %Y') ignore_duplicate_articles = {'title', 'url'} key = 'ZWplZVBlaW5nZWl0YWVnaG8zd2VlbmdlZXlvaHB1' masthead_url = 'https://journal.liberation.fr/img/logo.svg' diff --git a/recipes/novaya_gazeta.recipe b/recipes/novaya_gazeta.recipe index efd2d63d67..59139cd828 100644 --- a/recipes/novaya_gazeta.recipe +++ b/recipes/novaya_gazeta.recipe @@ -3,6 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe + class NovayaGazta(BasicNewsRecipe): title = u'Novaya Gazeta' __author__ = 'muwa (with fixes by bugmen00t)' diff --git a/recipes/paperpaper.recipe b/recipes/paperpaper.recipe index 75dd5fc49e..1c7d045c79 100644 --- a/recipes/paperpaper.recipe +++ b/recipes/paperpaper.recipe @@ -1,14 +1,16 @@ #!/usr/bin/env python # vim:fileencoding=utf-8 -from __future__ import unicode_literals, division, absolute_import, print_function +from __future__ import absolute_import, division, print_function, unicode_literals + from calibre.web.feeds.news import BasicNewsRecipe + class PaperPaper(BasicNewsRecipe): title = '\u0411\u0443\u043C\u0430\u0433\u0430' __author__ = 'bugmen00t' - description = '\u0418\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u043F\u0440\u0435\u043A\u0440\u0430\u0441\u043D\u043E\u043C \u0433\u043E\u0440\u043E\u0434\u0435 \u0421\u0430\u043D\u043A\u0442-\u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0435, \u0432 \u043A\u043E\u0442\u043E\u0440\u043E\u043C, \u043A\u043E\u043D\u0435\u0447\u043D\u043E, \u0434\u0430\u043B\u0435\u043A\u043E \u043D\u0435 \u0432\u0441\u0451 \u0438\u0434\u0435\u0430\u043B\u044C\u043D\u043E, \u2014 \u0438 \u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u043C\u044B \u0437\u0430\u043D\u0438\u043C\u0430\u0435\u043C\u0441\u044F \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u043E\u0439, \u0447\u0442\u043E\u0431\u044B \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u044C \u0432\u043D\u0438\u043C\u0430\u043D\u0438\u0435 \u043A \u0432\u0430\u0436\u043D\u044B\u043C \u0434\u043B\u044F \u0432\u0441\u0435\u0445 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u0430\u043C \u0438 \u0432\u043B\u0438\u044F\u0442\u044C \u043D\u0430 \u0438\u0445 \u0440\u0435\u0448\u0435\u043D\u0438\u0435.' - publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u0410\u0440\u0442\u0451\u043C\u0435\u043D\u043A\u043E, \u0422\u0430\u0442\u044C\u044F\u043D\u0430 \u0418\u0432\u0430\u043D\u043E\u0432\u0430' + description = '\u0418\u0437\u0434\u0430\u043D\u0438\u0435 \u043E \u043F\u0440\u0435\u043A\u0440\u0430\u0441\u043D\u043E\u043C \u0433\u043E\u0440\u043E\u0434\u0435 \u0421\u0430\u043D\u043A\u0442-\u041F\u0435\u0442\u0435\u0440\u0431\u0443\u0440\u0433\u0435, \u0432 \u043A\u043E\u0442\u043E\u0440\u043E\u043C, \u043A\u043E\u043D\u0435\u0447\u043D\u043E, \u0434\u0430\u043B\u0435\u043A\u043E \u043D\u0435 \u0432\u0441\u0451 \u0438\u0434\u0435\u0430\u043B\u044C\u043D\u043E, \u2014 \u0438 \u043F\u043E\u044D\u0442\u043E\u043C\u0443 \u043C\u044B \u0437\u0430\u043D\u0438\u043C\u0430\u0435\u043C\u0441\u044F \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u0438\u043A\u043E\u0439, \u0447\u0442\u043E\u0431\u044B \u043F\u0440\u0438\u0432\u043B\u0435\u043A\u0430\u0442\u044C \u0432\u043D\u0438\u043C\u0430\u043D\u0438\u0435 \u043A \u0432\u0430\u0436\u043D\u044B\u043C \u0434\u043B\u044F \u0432\u0441\u0435\u0445 \u043F\u0440\u043E\u0431\u043B\u0435\u043C\u0430\u043C \u0438 \u0432\u043B\u0438\u044F\u0442\u044C \u043D\u0430 \u0438\u0445 \u0440\u0435\u0448\u0435\u043D\u0438\u0435.' # noqa + publisher = '\u041A\u0438\u0440\u0438\u043B\u043B \u0410\u0440\u0442\u0451\u043C\u0435\u043D\u043A\u043E, \u0422\u0430\u0442\u044C\u044F\u043D\u0430 \u0418\u0432\u0430\u043D\u043E\u0432\u0430' # noqa category = 'newspaper' cover_url = u'https://upload.wikimedia.org/wikipedia/commons/1/1f/Paperpaper_logo.jpg' language = 'ru' @@ -19,13 +21,13 @@ class PaperPaper(BasicNewsRecipe): max_articles_per_feed = 50 remove_tags_before = dict(name='article') - + remove_tags_after = dict(name='article') remove_tags = [ dict(name='div', attrs={'class': 'bottom-block '}), dict(name='div', attrs={'class': 'bottom-block news'}) - ] + ] feeds = [ ('\u0412\u0441\u0435 \u043C\u0430\u0442\u0435\u0440\u0438\u0430\u043B\u044B', 'https://paperpaper.ru/feed/?service'), @@ -41,4 +43,4 @@ class PaperPaper(BasicNewsRecipe): ('\u0412\u043E\u0435\u043D\u043D\u044B\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044F \u0432 \u0423\u043A\u0440\u0430\u0438\u043D\u0435', 'https://paperpaper.ru/tag/obostrenie-vokrug-ukrainy-2022/feed/'), ('\u0423\u043A\u0440\u0430\u0438\u043D\u0430', 'https://paperpaper.ru/tag/ukraine/feed/'), ('\u041D\u0430\u0443\u043A\u0430', 'https://paperpaper.ru/tag/science/feed/'), - ] \ No newline at end of file + ] diff --git a/setup/build.py b/setup/build.py index d0a6891902..4a94a4c93f 100644 --- a/setup/build.py +++ b/setup/build.py @@ -732,7 +732,10 @@ sip-file = {os.path.basename(sipf)!r} cwd = os.getcwd() try: os.chdir(os.path.join(src_dir, 'build')) - self.check_call([self.env.make] + ([] if iswindows else ['-j%d'%(os.cpu_count() or 1)])) + env = os.environ.copy() + if is_macos_universal_build: + env['ARCHS'] = 'x86_64 arm64' + self.check_call([self.env.make] + ([] if iswindows else ['-j%d'%(os.cpu_count() or 1)]), env=env) e = 'pyd' if iswindows else 'so' m = glob.glob(f'{ext.name}/{ext.name}.*{e}') if not m: diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 10adcd1b6a..2aad89decd 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -1475,7 +1475,7 @@ def sanitize_env_vars(): 'LD_LIBRARY_PATH':'/lib', 'OPENSSL_MODULES': '/lib/ossl-modules', } elif iswindows: - env_vars = {'OPENSSL_MODULES': None} + env_vars = {'OPENSSL_MODULES': None, 'QTWEBENGINE_DISABLE_SANDBOX': None} elif ismacos: env_vars = {k:None for k in ( 'FONTCONFIG_FILE FONTCONFIG_PATH SSL_CERT_FILE OPENSSL_ENGINES OPENSSL_MODULES').split()} From 0ce8e9af3ca01432157e4de0dce501744269389c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 4 Aug 2024 14:42:46 +0530 Subject: [PATCH 4/5] Cleanup previous PR --- src/calibre/ebooks/metadata/epub.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 2a7e0a3db6..ef6368a1ce 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -10,7 +10,7 @@ __copyright__ = '2008, Kovid Goyal ' import io import os import posixpath -from contextlib import closing +from contextlib import closing, suppress from calibre import CurrentDir from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf @@ -22,6 +22,7 @@ from calibre.utils.localunzip import LocalZipFile from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace + class EPubException(Exception): pass @@ -133,7 +134,7 @@ class OCFReader(OCF): def exists(self, path): try: - self.open(path) + self.open(path).close() return True except OSError: return False @@ -214,16 +215,17 @@ def render_cover(cpage, zf, reader=None): if not os.path.exists(cpage): return - # In the case of manga, the first spine item may be an image - # already, so treat it as a raster cover. - file_format = what_image_type(cpage) - if file_format == "jpeg": - # Only JPEG is allowed since elsewhere we assume raster covers - # are JPEG. In principle we could convert other image formats - # but this is already an out-of-spec case that happens to - # arise in books from some stores. - with open(cpage, "rb") as source: - return source.read() + with suppress(Exception): + # In the case of manga, the first spine item may be an image + # already, so treat it as a raster cover. + file_format = what_image_type(cpage) + if file_format == "jpeg": + # Only JPEG is allowed since elsewhere we assume raster covers + # are JPEG. In principle we could convert other image formats + # but this is already an out-of-spec case that happens to + # arise in books from some stores. + with open(cpage, "rb") as source: + return source.read() return render_html_svg_workaround(cpage, default_log, root=tdir) From 2e24c895bdde2042a77b818b6705566d6eabe730 Mon Sep 17 00:00:00 2001 From: LAntoine Date: Sun, 4 Aug 2024 12:00:51 +0200 Subject: [PATCH 5/5] Fix Mediapart recipe Google news feed was not working again, so I remved it and added a custom feed parser to keep the different sections of the journal even though only one RSS feed is available. --- recipes/mediapart.recipe | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe index 38ed1ad1b4..11f5e17782 100644 --- a/recipes/mediapart.recipe +++ b/recipes/mediapart.recipe @@ -82,13 +82,25 @@ class Mediapart(BasicNewsRecipe): feeds = [] sections = [ - 'france', 'international', 'economie', 'culture-idees', 'politique', 'ecologie', 'fil-dactualites' + 'france', 'international', 'economie', 'culture-et-idees', 'politique', 'ecologie', 'fil-dactualites' ] + mediapart_feed = 'https://www.mediapart.fr/articles/feed' for sec in sections: - a = 'https://news.google.com/rss/search?q=when:27h+allinurl:mediapart.fr%2Fjournal{}&hl=fr-FR&gl=FR&ceid=FR:fr' - feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) - feeds.append(('Autres', a.format(''))) + feeds.append((sec.capitalize(), mediapart_feed)) + feeds.append(('Autres', mediapart_feed)) + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + feed_name = feed.title.lower() + for article in feed.articles: + if feed_name != 'autres' and feed_name not in article.url: + feed.articles.remove(article) + if feed_name == 'autres' and any(section in article.url for section in self.sections): + feed.articles.remove(article) + + return feeds def get_browser(self): # -- Handle login @@ -99,11 +111,7 @@ class Mediapart(BasicNewsRecipe): br['email'] = self.username br['password'] = self.password br.submit() - - # -- Handle Google's cookies consent page - br.open('https://news.google.com') - br.select_form(action="https://consent.google.com/save") - br.submit() + return br def default_cover(self, cover_file):