diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe index 38ed1ad1b4..11f5e17782 100644 --- a/recipes/mediapart.recipe +++ b/recipes/mediapart.recipe @@ -82,13 +82,25 @@ class Mediapart(BasicNewsRecipe): feeds = [] sections = [ - 'france', 'international', 'economie', 'culture-idees', 'politique', 'ecologie', 'fil-dactualites' + 'france', 'international', 'economie', 'culture-et-idees', 'politique', 'ecologie', 'fil-dactualites' ] + mediapart_feed = 'https://www.mediapart.fr/articles/feed' for sec in sections: - a = 'https://news.google.com/rss/search?q=when:27h+allinurl:mediapart.fr%2Fjournal{}&hl=fr-FR&gl=FR&ceid=FR:fr' - feeds.append((sec.capitalize(), a.format('%2F' + sec + '%2F'))) - feeds.append(('Autres', a.format(''))) + feeds.append((sec.capitalize(), mediapart_feed)) + feeds.append(('Autres', mediapart_feed)) + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for feed in feeds: + feed_name = feed.title.lower() + for article in feed.articles: + if feed_name != 'autres' and feed_name not in article.url: + feed.articles.remove(article) + if feed_name == 'autres' and any(section in article.url for section in self.sections): + feed.articles.remove(article) + + return feeds def get_browser(self): # -- Handle login @@ -99,11 +111,7 @@ class Mediapart(BasicNewsRecipe): br['email'] = self.username br['password'] = self.password br.submit() - - # -- Handle Google's cookies consent page - br.open('https://news.google.com') - br.select_form(action="https://consent.google.com/save") - br.submit() + return br def default_cover(self, cover_file): diff --git a/setup/build.py b/setup/build.py index d0a6891902..4a94a4c93f 100644 --- a/setup/build.py +++ b/setup/build.py @@ -732,7 +732,10 @@ sip-file = {os.path.basename(sipf)!r} cwd = os.getcwd() try: os.chdir(os.path.join(src_dir, 'build')) - self.check_call([self.env.make] + ([] if iswindows else ['-j%d'%(os.cpu_count() or 1)])) + env = os.environ.copy() + if is_macos_universal_build: + env['ARCHS'] = 'x86_64 arm64' + self.check_call([self.env.make] + ([] if iswindows else ['-j%d'%(os.cpu_count() or 1)]), env=env) e = 'pyd' if iswindows else 'so' m = glob.glob(f'{ext.name}/{ext.name}.*{e}') if not m: diff --git a/src/calibre/ebooks/metadata/epub.py b/src/calibre/ebooks/metadata/epub.py index 1f3141b113..ef6368a1ce 100644 --- a/src/calibre/ebooks/metadata/epub.py +++ b/src/calibre/ebooks/metadata/epub.py @@ -10,13 +10,14 @@ __copyright__ = '2008, Kovid Goyal ' import io import os import posixpath -from contextlib import closing +from contextlib import closing, suppress from calibre import CurrentDir from calibre.ebooks.metadata.opf import get_metadata as get_metadata_from_opf from calibre.ebooks.metadata.opf import set_metadata as set_metadata_opf from calibre.ebooks.metadata.opf2 import OPF from calibre.ptempfile import TemporaryDirectory +from calibre.utils.imghdr import what as what_image_type from calibre.utils.localunzip import LocalZipFile from calibre.utils.xml_parse import safe_xml_fromstring from calibre.utils.zipfile import BadZipfile, ZipFile, safe_replace @@ -36,7 +37,7 @@ class ContainerException(OCFException): class Container(dict): - def __init__(self, stream=None): + def __init__(self, stream=None, file_exists=None): if not stream: return container = safe_xml_fromstring(stream.read()) @@ -49,6 +50,12 @@ class Container(dict): mt, fp = rootfile.get('media-type'), rootfile.get('full-path') if not mt or not fp: raise EPubException(" element malformed") + + if file_exists and not file_exists(fp): + # Some Kobo epubs have multiple rootfile entries, but only one + # exists. Ignore the ones that don't exist. + continue + self[mt] = fp @@ -95,7 +102,7 @@ class OCFReader(OCF): try: with closing(self.open(OCF.CONTAINER_PATH)) as f: - self.container = Container(f) + self.container = Container(f, self.exists) except KeyError: raise EPubException("missing OCF container.xml file") self.opf_path = self.container[OPF.MIMETYPE] @@ -125,6 +132,14 @@ class OCFReader(OCF): def read_bytes(self, name): return self.open(name).read() + def exists(self, path): + try: + self.open(path).close() + return True + except OSError: + return False + + class OCFZipReader(OCFReader): @@ -153,6 +168,13 @@ class OCFZipReader(OCFReader): def read_bytes(self, name): return self.archive.read(name) + def exists(self, path): + try: + self.archive.getinfo(path) + return True + except KeyError: + return False + def get_zip_reader(stream, root=None): try: @@ -192,6 +214,19 @@ def render_cover(cpage, zf, reader=None): cpage = os.path.join(tdir, cpage) if not os.path.exists(cpage): return + + with suppress(Exception): + # In the case of manga, the first spine item may be an image + # already, so treat it as a raster cover. + file_format = what_image_type(cpage) + if file_format == "jpeg": + # Only JPEG is allowed since elsewhere we assume raster covers + # are JPEG. In principle we could convert other image formats + # but this is already an out-of-spec case that happens to + # arise in books from some stores. + with open(cpage, "rb") as source: + return source.read() + return render_html_svg_workaround(cpage, default_log, root=tdir)