Merge branch 'feature_add_todoist_recipe' of https://github.com/rga5321/calibre

2025-11-03 19:17:02 -05:00 · 2025-08-13 05:26:13 +05:30 · 2025-08-13 05:26:13 +05:30 · f89a245641
commit f89a245641
parent 070e65fa4a c2c1deda1c
1 changed files with 320 additions and 0 deletions
--- a/recipes/todoist.recipe
+++ b/recipes/todoist.recipe
@ -0,0 +1,320 @@
+#!/usr/bin/env python
+# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4
+from __future__ import print_function
+
+__version__ = "0.0.3"
+
+"""
+0.0.3: Parameters in recipe_specific_options
+0.0.2: Calibre footer with the source URL. QR points to the article URL.
+0.0.1: First working version
+
+# Calibre parameters
+
+Input them in command line as this example: ebook-convert Todoist.recipe output.epub --recipe-specific-option=ARCHIVE_DOWNLOADED:False --recipe-specific-option=TODOIST_PROJECT_ID:YOUR_PROJECT_ID --recipe-specific-option=TODOIST_API_KEY:YOUR_API_KEY --recipe-specific-option=URL_KEYWORD_EXCEPTIONS:jotdown,elpais.com/gastronomia
+
+
+**URL_KEYWORD_EXCEPTIONS** (list of keywords such as, if the URL of the article contains any keyword, then the plugin will ignore the article)
+
+**ARCHIVE_DOWNLOADED** (True or False) do you want to archive articles after fetching
+
+**TODOIST_PROJECT_ID** (string) your Todoist project ID, you can find it in the URL of your Todoist project, e.g. https://todoist.com/app/project/1234567890abcdef12345678
+
+**TODOIST_API_KEY** (string) your Todoist API key, you can find it in your Todoist account settings under "Integrations" or "API tokens"
+
+
+
+"""
+# CONFIGURATION ###########################################################
+
+import ast
+
+
+# Aux funcion. String to boolean
+def parse_env_bool(val):
+    return str(val).strip().lower() in ("true", "1", "yes")
+
+
+# Aux funcion. comma separated String to List
+def parse_env_list(val):
+    try:
+        return ast.literal_eval(val)
+    except Exception:
+        return []
+
+
+SITE_PACKAGE_PATH = ""
+#############################################################################
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from collections import namedtuple
+from os import path
+from time import strftime
+from urllib.parse import urlparse
+
+#############################################################################
+
+SITE_PACKAGE_PATH = ""
+
+import json
+import mechanize
+import re
+from datetime import datetime
+
+__license__ = "GPL v3"
+__copyright__ = "2025, ARG"
+
+
+class Todoist2ebook(BasicNewsRecipe):
+
+    recipe_specific_options = {
+        "ARCHIVE_DOWNLOADED": {
+            "short": "Mark as read",
+            "long": "Mark as read",
+            "default": False,
+        },
+        "TODOIST_PROJECT_ID": {"short": "Proyect ID", "long": "Proyect ID"},
+        "TODOIST_API_KEY": {"short": "API key", "long": "API KEY"},
+        "URL_KEYWORD_EXCEPTIONS": {
+            "short": "URL keyword exceptions",
+            "long": 'List of keywords to ignore articles, e.g. ["example.com", "ignoreme.com"]',
+            "default": [],
+        },
+    }
+
+    __author__ = "ARG"
+    description = "prueba"
+    publisher = "Todoist.com"
+    category = "info, custom, Todoist"
+
+    # User-configurable settings -----------------------------------------------
+
+    series_name = "Todoist"
+    publication_type = "magazine"
+    title = "Todoist"
+    # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]`
+    masthead_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png"
+    # will make square cover; this will replace text and cover of the default
+    cover_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png"
+    # --------------------------------------------------------------------------
+
+    # Inherited developer settings
+    auto_cleanup = True
+    no_stylesheets = True
+    use_embedded_content = False
+
+    # Custom developer settings
+    to_archive = []
+
+    simultaneous_downloads = 10
+
+    extra_css = ".touchscreen_navbar {display: none;}"
+    extra_css = ".calibre_navbar { visibility: hidden; }"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # Init optional configuration parameters
+        self.archive_downloaded = parse_env_bool(
+            self.recipe_specific_options["ARCHIVE_DOWNLOADED"]
+        )
+        self.keyword_exceptions = parse_env_list(
+            self.recipe_specific_options["URL_KEYWORD_EXCEPTIONS"]
+        )
+
+        # Init mandatory configuration parameters
+        if (
+            "TODOIST_PROJECT_ID" in self.recipe_specific_options
+            and self.recipe_specific_options["TODOIST_PROJECT_ID"]
+        ):
+            self.todoist_project_id = self.recipe_specific_options["TODOIST_PROJECT_ID"]
+        else:
+            self.abort_recipe_processing(
+                "TODOIST_PROJECT_ID mandatory parameter missing"
+            )
+
+        if (
+            "TODOIST_API_KEY" in self.recipe_specific_options
+            and self.recipe_specific_options["TODOIST_API_KEY"]
+        ):
+            self.todoist_api_key = self.recipe_specific_options["TODOIST_API_KEY"]
+        else:
+            self.abort_recipe_processing("TODOIST_API_KEY mandatory parameter missing")
+
+    def parse_index(self):
+
+        articles = []
+        section_dict = {}  # dictionary with the domains and its articles.
+
+        url = f"https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}"
+        headers = {"Authorization": f"Bearer {self.todoist_api_key}"}
+        request = mechanize.Request(url, headers=headers)
+
+        response = self.browser.open(request)
+        if response.code != 200:
+            raise Exception("No se pudieron recuperar las tareas de Todoist")
+        data = response.read().decode("utf-8")
+        tasks = json.loads(data)
+        articles_todoist = []
+
+        url_regex = re.compile(r"\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)")
+        for task in tasks:
+            match = url_regex.search(task["content"])
+            if match:
+                title = match.group(1).strip()
+                url = match.group(2).strip()
+                date_added = task.get("created_at", datetime.now().isoformat())
+                articles_todoist.append(
+                    {
+                        "title": title or url,
+                        "url": url,
+                        "date_added": date_added,
+                        "item_id": task["id"],
+                    }
+                )
+
+        if not articles_todoist:
+            self.abort_recipe_processing(
+                'No unread articles in the Todoist project "{}"'.format(
+                    self.todoist_project_id
+                )
+            )
+        else:
+            for item in articles_todoist:
+
+                # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article
+                if any(pattern in item["url"] for pattern in self.keyword_exceptions):
+                    print("Ignoring article due to keyword patterns:" + item["url"])
+                    del item
+                else:
+                    # Extract domain from the URL
+                    domain = urlparse(item["url"]).netloc.replace("www.", "")
+
+                    url = item["url"]
+
+                    # Add the article under its domain
+                    if domain not in section_dict:
+                        section_dict[domain] = [item]
+                    else:
+                        section_dict[domain].append(item)
+
+                    print("Adding article: " + item["url"] + " to section: " + domain)
+
+            ############ APPEND ARTS FOR EACH DOMAIN #############
+            # At this point the section_dict is completed
+
+            for section in section_dict:
+                arts = []
+                for item in section_dict.get(section):
+                    try:
+                        title = item["title"]
+                    except KeyError:
+                        title = "error: title"
+                    try:
+                        url = item["url"]
+                    except KeyError:
+                        url = "error: url"
+
+                    arts.append(
+                        {"title": title, "url": url, "date": item["date_added"]}
+                    )
+
+                    if (
+                        self.archive_downloaded
+                        and item["item_id"] not in self.to_archive
+                    ):
+                        self.to_archive.append(item["item_id"])
+
+                if arts:
+                    articles.append((section, arts))
+
+            if not articles:
+                self.abort_recipe_processing(
+                    "No articles in the Todoist project account %s to download"
+                    % (self.todoist_project_id)
+                )
+            return articles
+
+    def get_browser(self, *args, **kwargs):
+        self.browser = BasicNewsRecipe.get_browser(self)
+        return self.browser
+
+    def cleanup(self):
+        if not self.to_archive:
+            return
+
+        for task_id in self.to_archive:
+            url = f"https://api.todoist.com/rest/v2/tasks/{task_id}/close"
+            req = mechanize.Request(
+                url,
+                headers={
+                    "Authorization": f"Bearer {self.todoist_api_key}",
+                    "Content-Type": "application/json",
+                },
+            )
+            req.get_method = lambda: "POST"
+
+            try:
+                br = mechanize.Browser()
+                response = br.open(req)
+                if response.code == 204:
+                    print(f"Task {task_id} corectly closed.")
+                else:
+                    print(f"Error while closing task {task_id}: {response.code}")
+            except Exception as e:
+                print(f"Exception while closing task {task_id}: {e}")
+
+    # TODO: This works with EPUB, but not mobi/azw3
+    # BUG: https://bugs.launchpad.net/calibre/+bug/1838486
+    def postprocess_book(self, oeb, opts, log):
+        oeb.metadata.add("series", self.series_name)
+
+    def _postprocess_html(self, soup, first_fetch, job_info):
+
+        title = soup.find("title").text  # get title
+
+        h1s = soup.findAll("h1")  # get all h1 headers
+        for h1 in h1s:
+            if title in h1.text:
+                h1 = h1.clear()  # clean this tag, so the h1 will be there only
+
+        h2s = soup.findAll("h2")  # get all h2 headers
+        for h2 in h2s:
+            if title in h2.text:
+                h2 = h2.clear()  # clean this tag, so the h1 will be there only
+
+        body = soup.find("body")
+        new_tag = soup.new_tag("h1")
+        new_tag.append(title)
+        body.insert(0, new_tag)
+
+        return soup
+
+    def default_cover(self, cover_file):
+        """
+        Create a generic cover for recipes that don't have a cover
+        This override adds time to the cover
+        """
+        try:
+            from calibre.ebooks import calibre_cover
+
+            # Python 2/3 compatibility for unicode
+            try:
+                unicode_type = unicode
+            except NameError:
+                unicode_type = str
+            title = (
+                self.title
+                if isinstance(self.title, unicode_type)
+                else self.title.encode("utf-8", "replace").decode("utf-8", "replace")
+            )
+            # print('>> title', title, file=sys.stderr)
+            date = strftime(self.timefmt)
+            time = strftime("%a %d %b %Y %-H:%M")
+            img_data = calibre_cover(title, date, time)
+            cover_file.write(img_data)
+            cover_file.flush()
+        except:
+            self.log.exception("Failed to generate default cover")
+            return False
+        return True