Update todoist.recipe

- recipe_specific_options - black formatting
2025-11-07 23:33:12 -05:00 · 2025-08-12 17:57:55 +00:00 · 2025-08-12 17:57:55 +00:00 · c2c1deda1c
commit c2c1deda1c
parent f2bc31d77f
1 changed files with 246 additions and 186 deletions
--- a/recipes/todoist.recipe
+++ b/recipes/todoist.recipe
@ -1,15 +1,18 @@
 #!/usr/bin/env python
-# vim:fileencoding=utf-8
+# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4
 from __future__ import print_function
-__version__ = '0.0.2'
+
 __version__ = "0.0.3"
 """
-recipe repository and docs: https://github.com/rga5321/todoist2ebook
+0.0.3: Parameters in recipe_specific_options
-
+0.0.2: Calibre footer with the source URL. QR points to the article URL.
 0.0.2: Calibre footer with the source URL. (adapted for calibre)
 0.0.1: First working version
 # Calibre parameters
 Input them in command line as this example: ebook-convert Todoist.recipe output.epub --recipe-specific-option=ARCHIVE_DOWNLOADED:False --recipe-specific-option=TODOIST_PROJECT_ID:YOUR_PROJECT_ID --recipe-specific-option=TODOIST_API_KEY:YOUR_API_KEY --recipe-specific-option=URL_KEYWORD_EXCEPTIONS:jotdown,elpais.com/gastronomia
 **URL_KEYWORD_EXCEPTIONS** (list of keywords such as, if the URL of the article contains any keyword, then the plugin will ignore the article)
@ -24,13 +27,25 @@ recipe repository and docs: https://github.com/rga5321/todoist2ebook
 """
 # CONFIGURATION ###########################################################
-URL_KEYWORD_EXCEPTIONS = ['XXXX','YYYYY']
+import ast
 ARCHIVE_DOWNLOADED = False
 TODOIST_PROJECT_ID = 'XXXXXXX'
 TODOIST_API_KEY = 'YYYYYY'
-SITE_PACKAGE_PATH = ''
+
 # Aux funcion. String to boolean
 def parse_env_bool(val):
    return str(val).strip().lower() in ("true", "1", "yes")
 # Aux funcion. comma separated String to List
 def parse_env_list(val):
    try:
        return ast.literal_eval(val)
    except Exception:
        return []
 SITE_PACKAGE_PATH = ""
 #############################################################################
 from calibre.web.feeds.news import BasicNewsRecipe
 from collections import namedtuple
 from os import path
@ -39,222 +54,267 @@ from urllib.parse import urlparse
 #############################################################################
-SITE_PACKAGE_PATH = ''
+SITE_PACKAGE_PATH = ""
 import json
 import mechanize
 import re
 from datetime import datetime
-__license__ = 'GPL v3'
+__license__ = "GPL v3"
-__copyright__ = '2025, ARG'
+__copyright__ = "2025, ARG"
 class Todoist2ebook(BasicNewsRecipe):
-        __author__ = 'ARG'
+    recipe_specific_options = {
-        description = 'prueba'
+        "ARCHIVE_DOWNLOADED": {
-        publisher = 'Todoist.com'
+            "short": "Mark as read",
-        category = 'info, custom, Todoist'
+            "long": "Mark as read",
            "default": False,
        },
        "TODOIST_PROJECT_ID": {"short": "Proyect ID", "long": "Proyect ID"},
        "TODOIST_API_KEY": {"short": "API key", "long": "API KEY"},
        "URL_KEYWORD_EXCEPTIONS": {
            "short": "URL keyword exceptions",
            "long": 'List of keywords to ignore articles, e.g. ["example.com", "ignoreme.com"]',
            "default": [],
        },
    }
-        # User-configurable settings -----------------------------------------------
+    __author__ = "ARG"
-        archive_downloaded = ARCHIVE_DOWNLOADED
+    description = "prueba"
-        series_name = 'Todoist'
+    publisher = "Todoist.com"
    category = "info, custom, Todoist"
    # User-configurable settings -----------------------------------------------
-        todoist_project_id =TODOIST_PROJECT_ID
+    series_name = "Todoist"
-        todoist_api_key = TODOIST_API_KEY
+    publication_type = "magazine"
    title = "Todoist"
    # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]`
    masthead_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png"
    # will make square cover; this will replace text and cover of the default
    cover_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png"
    # --------------------------------------------------------------------------
-        publication_type = 'magazine'
+    # Inherited developer settings
-        title = "Todoist"
+    auto_cleanup = True
-        # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]`
+    no_stylesheets = True
-        masthead_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png"
+    use_embedded_content = False
        # will make square cover; this will replace text and cover of the default
        cover_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png"
        # --------------------------------------------------------------------------
-        # Inherited developer settings
+    # Custom developer settings
-        auto_cleanup = True
+    to_archive = []
        no_stylesheets = True
        use_embedded_content = False
-        # Custom developer settings
+    simultaneous_downloads = 10
        to_archive = []
-        simultaneous_downloads = 10
+    extra_css = ".touchscreen_navbar {display: none;}"
    extra_css = ".calibre_navbar { visibility: hidden; }"
-        extra_css = '.touchscreen_navbar {display: none;}'
+    def __init__(self, *args, **kwargs):
-        extra_css = '.calibre_navbar { visibility: hidden; }'
+        super().__init__(*args, **kwargs)
-        def parse_index(self):
+        # Init optional configuration parameters
        self.archive_downloaded = parse_env_bool(
            self.recipe_specific_options["ARCHIVE_DOWNLOADED"]
        )
        self.keyword_exceptions = parse_env_list(
            self.recipe_specific_options["URL_KEYWORD_EXCEPTIONS"]
        )
-                articles = []
+        # Init mandatory configuration parameters
-                section_dict = {} #dictionary with the domains and its articles.
+        if (
            "TODOIST_PROJECT_ID" in self.recipe_specific_options
            and self.recipe_specific_options["TODOIST_PROJECT_ID"]
        ):
            self.todoist_project_id = self.recipe_specific_options["TODOIST_PROJECT_ID"]
        else:
            self.abort_recipe_processing(
                "TODOIST_PROJECT_ID mandatory parameter missing"
            )
-                url = f"https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}"
+        if (
-                headers = {"Authorization": f"Bearer {self.todoist_api_key}"}
+            "TODOIST_API_KEY" in self.recipe_specific_options
-                request = mechanize.Request(url, headers=headers);
+            and self.recipe_specific_options["TODOIST_API_KEY"]
        ):
            self.todoist_api_key = self.recipe_specific_options["TODOIST_API_KEY"]
        else:
            self.abort_recipe_processing("TODOIST_API_KEY mandatory parameter missing")
-                response = self.browser.open(request)
+    def parse_index(self):
                if response.code != 200:
                        raise Exception("No se pudieron recuperar las tareas de Todoist")
                data = response.read().decode("utf-8")
                tasks = json.loads(data)
                articles_todoist = []
-                url_regex = re.compile(r'\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)')
+        articles = []
-                for task in tasks:
+        section_dict = {}  # dictionary with the domains and its articles.
                        match = url_regex.search(task['content'])
                        if match:
                                title = match.group(1).strip()
                                url = match.group(2).strip()
                                date_added = task.get('created_at', datetime.now().isoformat())
                                articles_todoist.append({
                                        'title': title or url,
                                        'url': url,
                                        'date_added': date_added,
                                        'item_id': task['id']
                                })
        url = f"https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}"
        headers = {"Authorization": f"Bearer {self.todoist_api_key}"}
        request = mechanize.Request(url, headers=headers)
-                if not articles_todoist:
+        response = self.browser.open(request)
-                    self.abort_recipe_processing('No unread articles in the Todoist project "{}"'.format(self.todoist_project_id))
+        if response.code != 200:
            raise Exception("No se pudieron recuperar las tareas de Todoist")
        data = response.read().decode("utf-8")
        tasks = json.loads(data)
        articles_todoist = []
        url_regex = re.compile(r"\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)")
        for task in tasks:
            match = url_regex.search(task["content"])
            if match:
                title = match.group(1).strip()
                url = match.group(2).strip()
                date_added = task.get("created_at", datetime.now().isoformat())
                articles_todoist.append(
                    {
                        "title": title or url,
                        "url": url,
                        "date_added": date_added,
                        "item_id": task["id"],
                    }
                )
        if not articles_todoist:
            self.abort_recipe_processing(
                'No unread articles in the Todoist project "{}"'.format(
                    self.todoist_project_id
                )
            )
        else:
            for item in articles_todoist:
                # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article
                if any(pattern in item["url"] for pattern in self.keyword_exceptions):
                    print("Ignoring article due to keyword patterns:" + item["url"])
                    del item
                else:
-                    for item in articles_todoist:
+                    # Extract domain from the URL
                    domain = urlparse(item["url"]).netloc.replace("www.", "")
-                        # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article
+                    url = item["url"]
                        if any(pattern in item['url'] for pattern in URL_KEYWORD_EXCEPTIONS):
                            print("Ignoring article due to keyword patterns:" + item['url'])
                            del item
                        else:
                            # Extract domain from the URL
                            domain =  urlparse(item['url']).netloc.replace('www.', '')
-                            url = item['url']                           
+                    # Add the article under its domain
                    if domain not in section_dict:
                        section_dict[domain] = [item]
                    else:
                        section_dict[domain].append(item)
-                            # Add the article under its domain
+                    print("Adding article: " + item["url"] + " to section: " + domain)
                            if domain not in section_dict:
                                    section_dict[domain] = [item]
                            else:
                                    section_dict[domain].append(item)
-                            print("Adding article: " + item['url'] + " to section: " + domain)
+            ############ APPEND ARTS FOR EACH DOMAIN #############
            # At this point the section_dict is completed
-                    ############ APPEND ARTS FOR EACH DOMAIN #############
+            for section in section_dict:
-                    # At this point the section_dict is completed
+                arts = []
                for item in section_dict.get(section):
                    try:
                        title = item["title"]
                    except KeyError:
                        title = "error: title"
                    try:
                        url = item["url"]
                    except KeyError:
                        url = "error: url"
-                    for section in section_dict:
+                    arts.append(
-                        arts = []
+                        {"title": title, "url": url, "date": item["date_added"]}
-                        for item in section_dict.get(section):
+                    )
                            try:
                                title = item['title']
                            except KeyError:
                                title = 'error: title'
                            try:
                                url =  item['url']
                            except KeyError:
                                url = 'error: url'
-                            arts.append({
+                    if (
-                                        'title': title,
+                        self.archive_downloaded
-                                        'url': url,
+                        and item["item_id"] not in self.to_archive
-                                        'date': item['date_added']})
+                    ):
                        self.to_archive.append(item["item_id"])
-                            if (
+                if arts:
-                                self.archive_downloaded
+                    articles.append((section, arts))
                                and item['item_id'] not in self.to_archive
                            ):
                                self.to_archive.append(item['item_id'] )
            if not articles:
                self.abort_recipe_processing(
                    "No articles in the Todoist project account %s to download"
                    % (self.todoist_project_id)
                )
            return articles
-                        if arts:
+    def get_browser(self, *args, **kwargs):
-                                articles.append((section, arts))
+        self.browser = BasicNewsRecipe.get_browser(self)
        return self.browser
-                    if not articles:
+    def cleanup(self):
-                        self.abort_recipe_processing('No articles in the Todoist project account %s to download' % (self.todoist_project_id))
+        if not self.to_archive:
-                    return articles
+            return
        for task_id in self.to_archive:
            url = f"https://api.todoist.com/rest/v2/tasks/{task_id}/close"
            req = mechanize.Request(
                url,
                headers={
                    "Authorization": f"Bearer {self.todoist_api_key}",
                    "Content-Type": "application/json",
                },
            )
            req.get_method = lambda: "POST"
-        def get_browser(self, *args, **kwargs):
+            try:
-                self.browser = BasicNewsRecipe.get_browser(self)
+                br = mechanize.Browser()
-                return self.browser
+                response = br.open(req)
                if response.code == 204:
                    print(f"Task {task_id} corectly closed.")
                else:
                    print(f"Error while closing task {task_id}: {response.code}")
            except Exception as e:
                print(f"Exception while closing task {task_id}: {e}")
-        def cleanup(self):
+    # TODO: This works with EPUB, but not mobi/azw3
-                if not self.to_archive:
+    # BUG: https://bugs.launchpad.net/calibre/+bug/1838486
-                        return
+    def postprocess_book(self, oeb, opts, log):
        oeb.metadata.add("series", self.series_name)
-                for task_id in self.to_archive:
+    def _postprocess_html(self, soup, first_fetch, job_info):
                        url = f"https://api.todoist.com/rest/v2/tasks/{task_id}/close"
                        req = mechanize.Request(
                                url,
                                headers={
                                        "Authorization": f"Bearer {self.todoist_api_key}",
                                        "Content-Type": "application/json"
                                }
                        )
                        req.get_method = lambda: "POST"
-                        try:
+        title = soup.find("title").text  # get title
                                br = mechanize.Browser()
                                response = br.open(req)
                                if response.code == 204:
                                        print(f"Task {task_id} corectly closed.")
                                else:
                                        print(f"Error while closing task {task_id}: {response.code}")
                        except Exception as e:
                                print(f"Exception while closing task {task_id}: {e}")
        h1s = soup.findAll("h1")  # get all h1 headers
        for h1 in h1s:
            if title in h1.text:
                h1 = h1.clear()  # clean this tag, so the h1 will be there only
-        # TODO: This works with EPUB, but not mobi/azw3
+        h2s = soup.findAll("h2")  # get all h2 headers
-        # BUG: https://bugs.launchpad.net/calibre/+bug/1838486
+        for h2 in h2s:
-        def postprocess_book(self, oeb, opts, log):
+            if title in h2.text:
-                oeb.metadata.add('series', self.series_name)
+                h2 = h2.clear()  # clean this tag, so the h1 will be there only
        body = soup.find("body")
        new_tag = soup.new_tag("h1")
        new_tag.append(title)
        body.insert(0, new_tag)
        return soup
-        def _postprocess_html(self, soup, first_fetch, job_info):
+    def default_cover(self, cover_file):
        """
        Create a generic cover for recipes that don't have a cover
        This override adds time to the cover
        """
        try:
            from calibre.ebooks import calibre_cover
-                title = soup.find('title').text # get title
+            # Python 2/3 compatibility for unicode
-
+            try:
-
+                unicode_type = unicode
-                h1s = soup.findAll('h1')  # get all h1 headers
+            except NameError:
-                for h1 in h1s:
+                unicode_type = str
-                        if title in h1.text:
+            title = (
-                                h1 = h1.clear()  # clean this tag, so the h1 will be there only
+                self.title
-
+                if isinstance(self.title, unicode_type)
-                h2s = soup.findAll('h2')  # get all h2 headers
+                else self.title.encode("utf-8", "replace").decode("utf-8", "replace")
-                for h2 in h2s:
+            )
-                        if title in h2.text:
+            # print('>> title', title, file=sys.stderr)
-                                h2 = h2.clear()  # clean this tag, so the h1 will be there only
+            date = strftime(self.timefmt)
-
+            time = strftime("%a %d %b %Y %-H:%M")
-                body = soup.find('body')
+            img_data = calibre_cover(title, date, time)
-                new_tag = soup.new_tag('h1')
+            cover_file.write(img_data)
-                new_tag.append(title)
+            cover_file.flush()
-                body.insert(0, new_tag)
+        except:
-
+            self.log.exception("Failed to generate default cover")
-                return soup
+            return False
-
+        return True
        def default_cover(self, cover_file):
                """
                Create a generic cover for recipes that don't have a cover
                This override adds time to the cover
                """
                try:
                        from calibre.ebooks import calibre_cover
                        # Python 2/3 compatibility for unicode
                        try:
                            unicode_type = unicode
                        except NameError:
                            unicode_type = str
                        title = self.title if isinstance(self.title, unicode_type) else \
                                self.title.encode('utf-8', 'replace').decode('utf-8', 'replace')
                        # print('>> title', title, file=sys.stderr)
                        date = strftime(self.timefmt)
                        time = strftime('%a %d %b %Y %-H:%M')
                        img_data = calibre_cover(title, date, time)
                        cover_file.write(img_data)
                        cover_file.flush()
                except:
                        self.log.exception('Failed to generate default cover')
                        return False
                return True