mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Merge branch 'feature_add_todoist_recipe' of https://github.com/rga5321/calibre
This commit is contained in:
		
						commit
						f89a245641
					
				
							
								
								
									
										320
									
								
								recipes/todoist.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										320
									
								
								recipes/todoist.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,320 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4
 | 
			
		||||
from __future__ import print_function
 | 
			
		||||
 | 
			
		||||
__version__ = "0.0.3"
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
0.0.3: Parameters in recipe_specific_options
 | 
			
		||||
0.0.2: Calibre footer with the source URL. QR points to the article URL.
 | 
			
		||||
0.0.1: First working version
 | 
			
		||||
 | 
			
		||||
# Calibre parameters
 | 
			
		||||
 | 
			
		||||
Input them in command line as this example: ebook-convert Todoist.recipe output.epub --recipe-specific-option=ARCHIVE_DOWNLOADED:False --recipe-specific-option=TODOIST_PROJECT_ID:YOUR_PROJECT_ID --recipe-specific-option=TODOIST_API_KEY:YOUR_API_KEY --recipe-specific-option=URL_KEYWORD_EXCEPTIONS:jotdown,elpais.com/gastronomia
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
**URL_KEYWORD_EXCEPTIONS** (list of keywords such as, if the URL of the article contains any keyword, then the plugin will ignore the article)
 | 
			
		||||
 | 
			
		||||
**ARCHIVE_DOWNLOADED** (True or False) do you want to archive articles after fetching
 | 
			
		||||
 | 
			
		||||
**TODOIST_PROJECT_ID** (string) your Todoist project ID, you can find it in the URL of your Todoist project, e.g. https://todoist.com/app/project/1234567890abcdef12345678
 | 
			
		||||
 | 
			
		||||
**TODOIST_API_KEY** (string) your Todoist API key, you can find it in your Todoist account settings under "Integrations" or "API tokens"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
# CONFIGURATION ###########################################################
 | 
			
		||||
 | 
			
		||||
import ast
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Aux funcion. String to boolean
 | 
			
		||||
def parse_env_bool(val):
 | 
			
		||||
    return str(val).strip().lower() in ("true", "1", "yes")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Aux funcion. comma separated String to List
 | 
			
		||||
def parse_env_list(val):
 | 
			
		||||
    try:
 | 
			
		||||
        return ast.literal_eval(val)
 | 
			
		||||
    except Exception:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
SITE_PACKAGE_PATH = ""
 | 
			
		||||
#############################################################################
 | 
			
		||||
 | 
			
		||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
			
		||||
from collections import namedtuple
 | 
			
		||||
from os import path
 | 
			
		||||
from time import strftime
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
 | 
			
		||||
#############################################################################
 | 
			
		||||
 | 
			
		||||
SITE_PACKAGE_PATH = ""
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import mechanize
 | 
			
		||||
import re
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
 | 
			
		||||
__license__ = "GPL v3"
 | 
			
		||||
__copyright__ = "2025, ARG"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Todoist2ebook(BasicNewsRecipe):
 | 
			
		||||
 | 
			
		||||
    recipe_specific_options = {
 | 
			
		||||
        "ARCHIVE_DOWNLOADED": {
 | 
			
		||||
            "short": "Mark as read",
 | 
			
		||||
            "long": "Mark as read",
 | 
			
		||||
            "default": False,
 | 
			
		||||
        },
 | 
			
		||||
        "TODOIST_PROJECT_ID": {"short": "Proyect ID", "long": "Proyect ID"},
 | 
			
		||||
        "TODOIST_API_KEY": {"short": "API key", "long": "API KEY"},
 | 
			
		||||
        "URL_KEYWORD_EXCEPTIONS": {
 | 
			
		||||
            "short": "URL keyword exceptions",
 | 
			
		||||
            "long": 'List of keywords to ignore articles, e.g. ["example.com", "ignoreme.com"]',
 | 
			
		||||
            "default": [],
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    __author__ = "ARG"
 | 
			
		||||
    description = "prueba"
 | 
			
		||||
    publisher = "Todoist.com"
 | 
			
		||||
    category = "info, custom, Todoist"
 | 
			
		||||
 | 
			
		||||
    # User-configurable settings -----------------------------------------------
 | 
			
		||||
 | 
			
		||||
    series_name = "Todoist"
 | 
			
		||||
    publication_type = "magazine"
 | 
			
		||||
    title = "Todoist"
 | 
			
		||||
    # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]`
 | 
			
		||||
    masthead_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png"
 | 
			
		||||
    # will make square cover; this will replace text and cover of the default
 | 
			
		||||
    cover_url = "https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png"
 | 
			
		||||
    # --------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
    # Inherited developer settings
 | 
			
		||||
    auto_cleanup = True
 | 
			
		||||
    no_stylesheets = True
 | 
			
		||||
    use_embedded_content = False
 | 
			
		||||
 | 
			
		||||
    # Custom developer settings
 | 
			
		||||
    to_archive = []
 | 
			
		||||
 | 
			
		||||
    simultaneous_downloads = 10
 | 
			
		||||
 | 
			
		||||
    extra_css = ".touchscreen_navbar {display: none;}"
 | 
			
		||||
    extra_css = ".calibre_navbar { visibility: hidden; }"
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
 | 
			
		||||
        # Init optional configuration parameters
 | 
			
		||||
        self.archive_downloaded = parse_env_bool(
 | 
			
		||||
            self.recipe_specific_options["ARCHIVE_DOWNLOADED"]
 | 
			
		||||
        )
 | 
			
		||||
        self.keyword_exceptions = parse_env_list(
 | 
			
		||||
            self.recipe_specific_options["URL_KEYWORD_EXCEPTIONS"]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Init mandatory configuration parameters
 | 
			
		||||
        if (
 | 
			
		||||
            "TODOIST_PROJECT_ID" in self.recipe_specific_options
 | 
			
		||||
            and self.recipe_specific_options["TODOIST_PROJECT_ID"]
 | 
			
		||||
        ):
 | 
			
		||||
            self.todoist_project_id = self.recipe_specific_options["TODOIST_PROJECT_ID"]
 | 
			
		||||
        else:
 | 
			
		||||
            self.abort_recipe_processing(
 | 
			
		||||
                "TODOIST_PROJECT_ID mandatory parameter missing"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if (
 | 
			
		||||
            "TODOIST_API_KEY" in self.recipe_specific_options
 | 
			
		||||
            and self.recipe_specific_options["TODOIST_API_KEY"]
 | 
			
		||||
        ):
 | 
			
		||||
            self.todoist_api_key = self.recipe_specific_options["TODOIST_API_KEY"]
 | 
			
		||||
        else:
 | 
			
		||||
            self.abort_recipe_processing("TODOIST_API_KEY mandatory parameter missing")
 | 
			
		||||
 | 
			
		||||
    def parse_index(self):
 | 
			
		||||
 | 
			
		||||
        articles = []
 | 
			
		||||
        section_dict = {}  # dictionary with the domains and its articles.
 | 
			
		||||
 | 
			
		||||
        url = f"https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}"
 | 
			
		||||
        headers = {"Authorization": f"Bearer {self.todoist_api_key}"}
 | 
			
		||||
        request = mechanize.Request(url, headers=headers)
 | 
			
		||||
 | 
			
		||||
        response = self.browser.open(request)
 | 
			
		||||
        if response.code != 200:
 | 
			
		||||
            raise Exception("No se pudieron recuperar las tareas de Todoist")
 | 
			
		||||
        data = response.read().decode("utf-8")
 | 
			
		||||
        tasks = json.loads(data)
 | 
			
		||||
        articles_todoist = []
 | 
			
		||||
 | 
			
		||||
        url_regex = re.compile(r"\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)")
 | 
			
		||||
        for task in tasks:
 | 
			
		||||
            match = url_regex.search(task["content"])
 | 
			
		||||
            if match:
 | 
			
		||||
                title = match.group(1).strip()
 | 
			
		||||
                url = match.group(2).strip()
 | 
			
		||||
                date_added = task.get("created_at", datetime.now().isoformat())
 | 
			
		||||
                articles_todoist.append(
 | 
			
		||||
                    {
 | 
			
		||||
                        "title": title or url,
 | 
			
		||||
                        "url": url,
 | 
			
		||||
                        "date_added": date_added,
 | 
			
		||||
                        "item_id": task["id"],
 | 
			
		||||
                    }
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        if not articles_todoist:
 | 
			
		||||
            self.abort_recipe_processing(
 | 
			
		||||
                'No unread articles in the Todoist project "{}"'.format(
 | 
			
		||||
                    self.todoist_project_id
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            for item in articles_todoist:
 | 
			
		||||
 | 
			
		||||
                # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article
 | 
			
		||||
                if any(pattern in item["url"] for pattern in self.keyword_exceptions):
 | 
			
		||||
                    print("Ignoring article due to keyword patterns:" + item["url"])
 | 
			
		||||
                    del item
 | 
			
		||||
                else:
 | 
			
		||||
                    # Extract domain from the URL
 | 
			
		||||
                    domain = urlparse(item["url"]).netloc.replace("www.", "")
 | 
			
		||||
 | 
			
		||||
                    url = item["url"]
 | 
			
		||||
 | 
			
		||||
                    # Add the article under its domain
 | 
			
		||||
                    if domain not in section_dict:
 | 
			
		||||
                        section_dict[domain] = [item]
 | 
			
		||||
                    else:
 | 
			
		||||
                        section_dict[domain].append(item)
 | 
			
		||||
 | 
			
		||||
                    print("Adding article: " + item["url"] + " to section: " + domain)
 | 
			
		||||
 | 
			
		||||
            ############ APPEND ARTS FOR EACH DOMAIN #############
 | 
			
		||||
            # At this point the section_dict is completed
 | 
			
		||||
 | 
			
		||||
            for section in section_dict:
 | 
			
		||||
                arts = []
 | 
			
		||||
                for item in section_dict.get(section):
 | 
			
		||||
                    try:
 | 
			
		||||
                        title = item["title"]
 | 
			
		||||
                    except KeyError:
 | 
			
		||||
                        title = "error: title"
 | 
			
		||||
                    try:
 | 
			
		||||
                        url = item["url"]
 | 
			
		||||
                    except KeyError:
 | 
			
		||||
                        url = "error: url"
 | 
			
		||||
 | 
			
		||||
                    arts.append(
 | 
			
		||||
                        {"title": title, "url": url, "date": item["date_added"]}
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
                    if (
 | 
			
		||||
                        self.archive_downloaded
 | 
			
		||||
                        and item["item_id"] not in self.to_archive
 | 
			
		||||
                    ):
 | 
			
		||||
                        self.to_archive.append(item["item_id"])
 | 
			
		||||
 | 
			
		||||
                if arts:
 | 
			
		||||
                    articles.append((section, arts))
 | 
			
		||||
 | 
			
		||||
            if not articles:
 | 
			
		||||
                self.abort_recipe_processing(
 | 
			
		||||
                    "No articles in the Todoist project account %s to download"
 | 
			
		||||
                    % (self.todoist_project_id)
 | 
			
		||||
                )
 | 
			
		||||
            return articles
 | 
			
		||||
 | 
			
		||||
    def get_browser(self, *args, **kwargs):
 | 
			
		||||
        self.browser = BasicNewsRecipe.get_browser(self)
 | 
			
		||||
        return self.browser
 | 
			
		||||
 | 
			
		||||
    def cleanup(self):
 | 
			
		||||
        if not self.to_archive:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        for task_id in self.to_archive:
 | 
			
		||||
            url = f"https://api.todoist.com/rest/v2/tasks/{task_id}/close"
 | 
			
		||||
            req = mechanize.Request(
 | 
			
		||||
                url,
 | 
			
		||||
                headers={
 | 
			
		||||
                    "Authorization": f"Bearer {self.todoist_api_key}",
 | 
			
		||||
                    "Content-Type": "application/json",
 | 
			
		||||
                },
 | 
			
		||||
            )
 | 
			
		||||
            req.get_method = lambda: "POST"
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                br = mechanize.Browser()
 | 
			
		||||
                response = br.open(req)
 | 
			
		||||
                if response.code == 204:
 | 
			
		||||
                    print(f"Task {task_id} corectly closed.")
 | 
			
		||||
                else:
 | 
			
		||||
                    print(f"Error while closing task {task_id}: {response.code}")
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print(f"Exception while closing task {task_id}: {e}")
 | 
			
		||||
 | 
			
		||||
    # TODO: This works with EPUB, but not mobi/azw3
 | 
			
		||||
    # BUG: https://bugs.launchpad.net/calibre/+bug/1838486
 | 
			
		||||
    def postprocess_book(self, oeb, opts, log):
 | 
			
		||||
        oeb.metadata.add("series", self.series_name)
 | 
			
		||||
 | 
			
		||||
    def _postprocess_html(self, soup, first_fetch, job_info):
 | 
			
		||||
 | 
			
		||||
        title = soup.find("title").text  # get title
 | 
			
		||||
 | 
			
		||||
        h1s = soup.findAll("h1")  # get all h1 headers
 | 
			
		||||
        for h1 in h1s:
 | 
			
		||||
            if title in h1.text:
 | 
			
		||||
                h1 = h1.clear()  # clean this tag, so the h1 will be there only
 | 
			
		||||
 | 
			
		||||
        h2s = soup.findAll("h2")  # get all h2 headers
 | 
			
		||||
        for h2 in h2s:
 | 
			
		||||
            if title in h2.text:
 | 
			
		||||
                h2 = h2.clear()  # clean this tag, so the h1 will be there only
 | 
			
		||||
 | 
			
		||||
        body = soup.find("body")
 | 
			
		||||
        new_tag = soup.new_tag("h1")
 | 
			
		||||
        new_tag.append(title)
 | 
			
		||||
        body.insert(0, new_tag)
 | 
			
		||||
 | 
			
		||||
        return soup
 | 
			
		||||
 | 
			
		||||
    def default_cover(self, cover_file):
 | 
			
		||||
        """
 | 
			
		||||
        Create a generic cover for recipes that don't have a cover
 | 
			
		||||
        This override adds time to the cover
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            from calibre.ebooks import calibre_cover
 | 
			
		||||
 | 
			
		||||
            # Python 2/3 compatibility for unicode
 | 
			
		||||
            try:
 | 
			
		||||
                unicode_type = unicode
 | 
			
		||||
            except NameError:
 | 
			
		||||
                unicode_type = str
 | 
			
		||||
            title = (
 | 
			
		||||
                self.title
 | 
			
		||||
                if isinstance(self.title, unicode_type)
 | 
			
		||||
                else self.title.encode("utf-8", "replace").decode("utf-8", "replace")
 | 
			
		||||
            )
 | 
			
		||||
            # print('>> title', title, file=sys.stderr)
 | 
			
		||||
            date = strftime(self.timefmt)
 | 
			
		||||
            time = strftime("%a %d %b %Y %-H:%M")
 | 
			
		||||
            img_data = calibre_cover(title, date, time)
 | 
			
		||||
            cover_file.write(img_data)
 | 
			
		||||
            cover_file.flush()
 | 
			
		||||
        except:
 | 
			
		||||
            self.log.exception("Failed to generate default cover")
 | 
			
		||||
            return False
 | 
			
		||||
        return True
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user