From c2c1deda1c91ed1a90ddfed725fa4c598ed6fdab Mon Sep 17 00:00:00 2001 From: rga5321 Date: Tue, 12 Aug 2025 17:57:55 +0000 Subject: [PATCH] Update todoist.recipe - recipe_specific_options - black formatting --- recipes/todoist.recipe | 432 +++++++++++++++++++++++------------------ 1 file changed, 246 insertions(+), 186 deletions(-) diff --git a/recipes/todoist.recipe b/recipes/todoist.recipe index 313f606f18..17a6724ae3 100644 --- a/recipes/todoist.recipe +++ b/recipes/todoist.recipe @@ -1,15 +1,18 @@ #!/usr/bin/env python -# vim:fileencoding=utf-8 - +# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4 from __future__ import print_function -__version__ = '0.0.2' + +__version__ = "0.0.3" """ -recipe repository and docs: https://github.com/rga5321/todoist2ebook - -0.0.2: Calibre footer with the source URL. (adapted for calibre) +0.0.3: Parameters in recipe_specific_options +0.0.2: Calibre footer with the source URL. QR points to the article URL. 0.0.1: First working version +# Calibre parameters + +Input them in command line as this example: ebook-convert Todoist.recipe output.epub --recipe-specific-option=ARCHIVE_DOWNLOADED:False --recipe-specific-option=TODOIST_PROJECT_ID:YOUR_PROJECT_ID --recipe-specific-option=TODOIST_API_KEY:YOUR_API_KEY --recipe-specific-option=URL_KEYWORD_EXCEPTIONS:jotdown,elpais.com/gastronomia + **URL_KEYWORD_EXCEPTIONS** (list of keywords such as, if the URL of the article contains any keyword, then the plugin will ignore the article) @@ -24,13 +27,25 @@ recipe repository and docs: https://github.com/rga5321/todoist2ebook """ # CONFIGURATION ########################################################### -URL_KEYWORD_EXCEPTIONS = ['XXXX','YYYYY'] -ARCHIVE_DOWNLOADED = False -TODOIST_PROJECT_ID = 'XXXXXXX' -TODOIST_API_KEY = 'YYYYYY' +import ast -SITE_PACKAGE_PATH = '' + +# Aux funcion. String to boolean +def parse_env_bool(val): + return str(val).strip().lower() in ("true", "1", "yes") + + +# Aux funcion. comma separated String to List +def parse_env_list(val): + try: + return ast.literal_eval(val) + except Exception: + return [] + + +SITE_PACKAGE_PATH = "" ############################################################################# + from calibre.web.feeds.news import BasicNewsRecipe from collections import namedtuple from os import path @@ -39,222 +54,267 @@ from urllib.parse import urlparse ############################################################################# -SITE_PACKAGE_PATH = '' +SITE_PACKAGE_PATH = "" import json import mechanize import re from datetime import datetime -__license__ = 'GPL v3' -__copyright__ = '2025, ARG' +__license__ = "GPL v3" +__copyright__ = "2025, ARG" class Todoist2ebook(BasicNewsRecipe): - __author__ = 'ARG' - description = 'prueba' - publisher = 'Todoist.com' - category = 'info, custom, Todoist' + recipe_specific_options = { + "ARCHIVE_DOWNLOADED": { + "short": "Mark as read", + "long": "Mark as read", + "default": False, + }, + "TODOIST_PROJECT_ID": {"short": "Proyect ID", "long": "Proyect ID"}, + "TODOIST_API_KEY": {"short": "API key", "long": "API KEY"}, + "URL_KEYWORD_EXCEPTIONS": { + "short": "URL keyword exceptions", + "long": 'List of keywords to ignore articles, e.g. ["example.com", "ignoreme.com"]', + "default": [], + }, + } - # User-configurable settings ----------------------------------------------- - archive_downloaded = ARCHIVE_DOWNLOADED - series_name = 'Todoist' + __author__ = "ARG" + description = "prueba" + publisher = "Todoist.com" + category = "info, custom, Todoist" + # User-configurable settings ----------------------------------------------- - todoist_project_id =TODOIST_PROJECT_ID - todoist_api_key = TODOIST_API_KEY + series_name = "Todoist" + publication_type = "magazine" + title = "Todoist" + # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]` + masthead_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png" + # will make square cover; this will replace text and cover of the default + cover_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png" + # -------------------------------------------------------------------------- - publication_type = 'magazine' - title = "Todoist" - # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]` - masthead_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png" - # will make square cover; this will replace text and cover of the default - cover_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png" - # -------------------------------------------------------------------------- - - # Inherited developer settings - auto_cleanup = True - no_stylesheets = True - use_embedded_content = False + # Inherited developer settings + auto_cleanup = True + no_stylesheets = True + use_embedded_content = False - # Custom developer settings - to_archive = [] + # Custom developer settings + to_archive = [] - simultaneous_downloads = 10 - - extra_css = '.touchscreen_navbar {display: none;}' - extra_css = '.calibre_navbar { visibility: hidden; }' + simultaneous_downloads = 10 - def parse_index(self): + extra_css = ".touchscreen_navbar {display: none;}" + extra_css = ".calibre_navbar { visibility: hidden; }" - articles = [] - section_dict = {} #dictionary with the domains and its articles. + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) - url = f"https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}" - headers = {"Authorization": f"Bearer {self.todoist_api_key}"} - request = mechanize.Request(url, headers=headers); - - response = self.browser.open(request) - if response.code != 200: - raise Exception("No se pudieron recuperar las tareas de Todoist") - data = response.read().decode("utf-8") - tasks = json.loads(data) - articles_todoist = [] - - url_regex = re.compile(r'\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)') - for task in tasks: - match = url_regex.search(task['content']) - if match: - title = match.group(1).strip() - url = match.group(2).strip() - date_added = task.get('created_at', datetime.now().isoformat()) - articles_todoist.append({ - 'title': title or url, - 'url': url, - 'date_added': date_added, - 'item_id': task['id'] - }) + # Init optional configuration parameters + self.archive_downloaded = parse_env_bool( + self.recipe_specific_options["ARCHIVE_DOWNLOADED"] + ) + self.keyword_exceptions = parse_env_list( + self.recipe_specific_options["URL_KEYWORD_EXCEPTIONS"] + ) + # Init mandatory configuration parameters + if ( + "TODOIST_PROJECT_ID" in self.recipe_specific_options + and self.recipe_specific_options["TODOIST_PROJECT_ID"] + ): + self.todoist_project_id = self.recipe_specific_options["TODOIST_PROJECT_ID"] + else: + self.abort_recipe_processing( + "TODOIST_PROJECT_ID mandatory parameter missing" + ) - if not articles_todoist: - self.abort_recipe_processing('No unread articles in the Todoist project "{}"'.format(self.todoist_project_id)) + if ( + "TODOIST_API_KEY" in self.recipe_specific_options + and self.recipe_specific_options["TODOIST_API_KEY"] + ): + self.todoist_api_key = self.recipe_specific_options["TODOIST_API_KEY"] + else: + self.abort_recipe_processing("TODOIST_API_KEY mandatory parameter missing") + + def parse_index(self): + + articles = [] + section_dict = {} # dictionary with the domains and its articles. + + url = f"https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}" + headers = {"Authorization": f"Bearer {self.todoist_api_key}"} + request = mechanize.Request(url, headers=headers) + + response = self.browser.open(request) + if response.code != 200: + raise Exception("No se pudieron recuperar las tareas de Todoist") + data = response.read().decode("utf-8") + tasks = json.loads(data) + articles_todoist = [] + + url_regex = re.compile(r"\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)") + for task in tasks: + match = url_regex.search(task["content"]) + if match: + title = match.group(1).strip() + url = match.group(2).strip() + date_added = task.get("created_at", datetime.now().isoformat()) + articles_todoist.append( + { + "title": title or url, + "url": url, + "date_added": date_added, + "item_id": task["id"], + } + ) + + if not articles_todoist: + self.abort_recipe_processing( + 'No unread articles in the Todoist project "{}"'.format( + self.todoist_project_id + ) + ) + else: + for item in articles_todoist: + + # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article + if any(pattern in item["url"] for pattern in self.keyword_exceptions): + print("Ignoring article due to keyword patterns:" + item["url"]) + del item else: - for item in articles_todoist: - - # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article - if any(pattern in item['url'] for pattern in URL_KEYWORD_EXCEPTIONS): - print("Ignoring article due to keyword patterns:" + item['url']) - del item - else: - # Extract domain from the URL - domain = urlparse(item['url']).netloc.replace('www.', '') + # Extract domain from the URL + domain = urlparse(item["url"]).netloc.replace("www.", "") - url = item['url'] + url = item["url"] - # Add the article under its domain - if domain not in section_dict: - section_dict[domain] = [item] - else: - section_dict[domain].append(item) + # Add the article under its domain + if domain not in section_dict: + section_dict[domain] = [item] + else: + section_dict[domain].append(item) - print("Adding article: " + item['url'] + " to section: " + domain) + print("Adding article: " + item["url"] + " to section: " + domain) - ############ APPEND ARTS FOR EACH DOMAIN ############# - # At this point the section_dict is completed + ############ APPEND ARTS FOR EACH DOMAIN ############# + # At this point the section_dict is completed - for section in section_dict: - arts = [] - for item in section_dict.get(section): - try: - title = item['title'] - except KeyError: - title = 'error: title' - try: - url = item['url'] - except KeyError: - url = 'error: url' - - arts.append({ - 'title': title, - 'url': url, - 'date': item['date_added']}) + for section in section_dict: + arts = [] + for item in section_dict.get(section): + try: + title = item["title"] + except KeyError: + title = "error: title" + try: + url = item["url"] + except KeyError: + url = "error: url" - if ( - self.archive_downloaded - and item['item_id'] not in self.to_archive - ): - self.to_archive.append(item['item_id'] ) + arts.append( + {"title": title, "url": url, "date": item["date_added"]} + ) + if ( + self.archive_downloaded + and item["item_id"] not in self.to_archive + ): + self.to_archive.append(item["item_id"]) - if arts: - articles.append((section, arts)) + if arts: + articles.append((section, arts)) - if not articles: - self.abort_recipe_processing('No articles in the Todoist project account %s to download' % (self.todoist_project_id)) - return articles - + if not articles: + self.abort_recipe_processing( + "No articles in the Todoist project account %s to download" + % (self.todoist_project_id) + ) + return articles - def get_browser(self, *args, **kwargs): - self.browser = BasicNewsRecipe.get_browser(self) - return self.browser + def get_browser(self, *args, **kwargs): + self.browser = BasicNewsRecipe.get_browser(self) + return self.browser - def cleanup(self): - if not self.to_archive: - return - - for task_id in self.to_archive: - url = f"https://api.todoist.com/rest/v2/tasks/{task_id}/close" - req = mechanize.Request( - url, - headers={ - "Authorization": f"Bearer {self.todoist_api_key}", - "Content-Type": "application/json" - } - ) - req.get_method = lambda: "POST" + def cleanup(self): + if not self.to_archive: + return - try: - br = mechanize.Browser() - response = br.open(req) - if response.code == 204: - print(f"Task {task_id} corectly closed.") - else: - print(f"Error while closing task {task_id}: {response.code}") - except Exception as e: - print(f"Exception while closing task {task_id}: {e}") + for task_id in self.to_archive: + url = f"https://api.todoist.com/rest/v2/tasks/{task_id}/close" + req = mechanize.Request( + url, + headers={ + "Authorization": f"Bearer {self.todoist_api_key}", + "Content-Type": "application/json", + }, + ) + req.get_method = lambda: "POST" + try: + br = mechanize.Browser() + response = br.open(req) + if response.code == 204: + print(f"Task {task_id} corectly closed.") + else: + print(f"Error while closing task {task_id}: {response.code}") + except Exception as e: + print(f"Exception while closing task {task_id}: {e}") - # TODO: This works with EPUB, but not mobi/azw3 - # BUG: https://bugs.launchpad.net/calibre/+bug/1838486 - def postprocess_book(self, oeb, opts, log): - oeb.metadata.add('series', self.series_name) + # TODO: This works with EPUB, but not mobi/azw3 + # BUG: https://bugs.launchpad.net/calibre/+bug/1838486 + def postprocess_book(self, oeb, opts, log): + oeb.metadata.add("series", self.series_name) + def _postprocess_html(self, soup, first_fetch, job_info): + title = soup.find("title").text # get title - def _postprocess_html(self, soup, first_fetch, job_info): + h1s = soup.findAll("h1") # get all h1 headers + for h1 in h1s: + if title in h1.text: + h1 = h1.clear() # clean this tag, so the h1 will be there only - title = soup.find('title').text # get title + h2s = soup.findAll("h2") # get all h2 headers + for h2 in h2s: + if title in h2.text: + h2 = h2.clear() # clean this tag, so the h1 will be there only + body = soup.find("body") + new_tag = soup.new_tag("h1") + new_tag.append(title) + body.insert(0, new_tag) - h1s = soup.findAll('h1') # get all h1 headers - for h1 in h1s: - if title in h1.text: - h1 = h1.clear() # clean this tag, so the h1 will be there only + return soup - h2s = soup.findAll('h2') # get all h2 headers - for h2 in h2s: - if title in h2.text: - h2 = h2.clear() # clean this tag, so the h1 will be there only + def default_cover(self, cover_file): + """ + Create a generic cover for recipes that don't have a cover + This override adds time to the cover + """ + try: + from calibre.ebooks import calibre_cover - body = soup.find('body') - new_tag = soup.new_tag('h1') - new_tag.append(title) - body.insert(0, new_tag) - - return soup - - def default_cover(self, cover_file): - """ - Create a generic cover for recipes that don't have a cover - This override adds time to the cover - """ - try: - from calibre.ebooks import calibre_cover - # Python 2/3 compatibility for unicode - try: - unicode_type = unicode - except NameError: - unicode_type = str - title = self.title if isinstance(self.title, unicode_type) else \ - self.title.encode('utf-8', 'replace').decode('utf-8', 'replace') - # print('>> title', title, file=sys.stderr) - date = strftime(self.timefmt) - time = strftime('%a %d %b %Y %-H:%M') - img_data = calibre_cover(title, date, time) - cover_file.write(img_data) - cover_file.flush() - except: - self.log.exception('Failed to generate default cover') - return False - return True + # Python 2/3 compatibility for unicode + try: + unicode_type = unicode + except NameError: + unicode_type = str + title = ( + self.title + if isinstance(self.title, unicode_type) + else self.title.encode("utf-8", "replace").decode("utf-8", "replace") + ) + # print('>> title', title, file=sys.stderr) + date = strftime(self.timefmt) + time = strftime("%a %d %b %Y %-H:%M") + img_data = calibre_cover(title, date, time) + cover_file.write(img_data) + cover_file.flush() + except: + self.log.exception("Failed to generate default cover") + return False + return True