#!/usr/bin/env python # vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4 __license__ = 'GPL v3' __copyright__ = '2025, ARG' import json import re from datetime import datetime from time import strftime from urllib.parse import urlparse import mechanize from calibre.web.feeds.news import BasicNewsRecipe __version__ = '0.0.3' ''' 0.0.3: Parameters in recipe_specific_options 0.0.2: Calibre footer with the source URL. QR points to the article URL. 0.0.1: First working version # Calibre parameters Input them in command line as this example:: ebook-convert Todoist.recipe output.epub --recipe-specific-option=ARCHIVE_DOWNLOADED:False \ --recipe-specific-option=TODOIST_PROJECT_ID:YOUR_PROJECT_ID \ --recipe-specific-option=TODOIST_API_KEY:YOUR_API_KEY \ --recipe-specific-option=URL_KEYWORD_EXCEPTIONS:jotdown,elpais.com/gastronomia **URL_KEYWORD_EXCEPTIONS** (list of keywords such as, if the URL of the article contains any keyword, then the plugin will ignore the article) **ARCHIVE_DOWNLOADED** (True or False) do you want to archive articles after fetching **TODOIST_PROJECT_ID** (string) your Todoist project ID, you can find it in the URL of your Todoist project, e.g. https://todoist.com/app/project/1234567890abcdef12345678 **TODOIST_API_KEY** (string) your Todoist API key, you can find it in your Todoist account settings under "Integrations" or "API tokens" ''' # CONFIGURATION ########################################################### import ast # Aux funcion. String to boolean def parse_env_bool(val): return str(val).strip().lower() in ('true', '1', 'yes') # Aux funcion. comma separated String to List def parse_env_list(val): try: return ast.literal_eval(val) except Exception: return [] ############################################################################# class Todoist2ebook(BasicNewsRecipe): recipe_specific_options = { 'ARCHIVE_DOWNLOADED': { 'short': 'Mark as read', 'long': 'Mark as read', 'default': False, }, 'TODOIST_PROJECT_ID': {'short': 'Proyect ID', 'long': 'Proyect ID'}, 'TODOIST_API_KEY': {'short': 'API key', 'long': 'API KEY'}, 'URL_KEYWORD_EXCEPTIONS': { 'short': 'URL keyword exceptions', 'long': 'List of keywords to ignore articles, e.g. ["example.com", "ignoreme.com"]', 'default': [], }, } __author__ = 'ARG' description = 'prueba' publisher = 'Todoist.com' category = 'info, custom, Todoist' # User-configurable settings ----------------------------------------------- series_name = 'Todoist' publication_type = 'magazine' title = 'Todoist' # timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]` masthead_url = 'https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png' # will make square cover; this will replace text and cover of the default cover_url = 'https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png' # -------------------------------------------------------------------------- # Inherited developer settings auto_cleanup = True no_stylesheets = True use_embedded_content = False # Custom developer settings to_archive = [] simultaneous_downloads = 10 extra_css = '.calibre_navbar { visibility: hidden; }' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Init optional configuration parameters self.archive_downloaded = parse_env_bool( self.recipe_specific_options['ARCHIVE_DOWNLOADED'] ) self.keyword_exceptions = parse_env_list( self.recipe_specific_options['URL_KEYWORD_EXCEPTIONS'] ) # Init mandatory configuration parameters if ( self.recipe_specific_options.get('TODOIST_PROJECT_ID') ): self.todoist_project_id = self.recipe_specific_options['TODOIST_PROJECT_ID'] else: self.abort_recipe_processing( 'TODOIST_PROJECT_ID mandatory parameter missing' ) if ( self.recipe_specific_options.get('TODOIST_API_KEY') ): self.todoist_api_key = self.recipe_specific_options['TODOIST_API_KEY'] else: self.abort_recipe_processing('TODOIST_API_KEY mandatory parameter missing') def parse_index(self): articles = [] section_dict = {} # dictionary with the domains and its articles. url = f'https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}' headers = {'Authorization': f'Bearer {self.todoist_api_key}'} request = mechanize.Request(url, headers=headers) response = self.browser.open(request) if response.code != 200: raise Exception('No se pudieron recuperar las tareas de Todoist') data = response.read().decode('utf-8') tasks = json.loads(data) articles_todoist = [] url_regex = re.compile(r'\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)') for task in tasks: match = url_regex.search(task['content']) if match: title = match.group(1).strip() url = match.group(2).strip() date_added = task.get('created_at', datetime.now().isoformat()) articles_todoist.append( { 'title': title or url, 'url': url, 'date_added': date_added, 'item_id': task['id'], } ) if not articles_todoist: self.abort_recipe_processing( 'No unread articles in the Todoist project "{}"'.format( self.todoist_project_id ) ) else: for item in articles_todoist: # If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article if any(pattern in item['url'] for pattern in self.keyword_exceptions): print('Ignoring article due to keyword patterns:' + item['url']) del item else: # Extract domain from the URL domain = urlparse(item['url']).netloc.replace('www.', '') url = item['url'] # Add the article under its domain if domain not in section_dict: section_dict[domain] = [item] else: section_dict[domain].append(item) print('Adding article: ' + item['url'] + ' to section: ' + domain) ############ APPEND ARTS FOR EACH DOMAIN ############# # At this point the section_dict is completed for section in section_dict: arts = [] for item in section_dict.get(section): try: title = item['title'] except KeyError: title = 'error: title' try: url = item['url'] except KeyError: url = 'error: url' arts.append( {'title': title, 'url': url, 'date': item['date_added']} ) if ( self.archive_downloaded and item['item_id'] not in self.to_archive ): self.to_archive.append(item['item_id']) if arts: articles.append((section, arts)) if not articles: self.abort_recipe_processing( 'No articles in the Todoist project account %s to download' % (self.todoist_project_id) ) return articles def get_browser(self, *args, **kwargs): self.browser = BasicNewsRecipe.get_browser(self) return self.browser def cleanup(self): if not self.to_archive: return for task_id in self.to_archive: url = f'https://api.todoist.com/rest/v2/tasks/{task_id}/close' req = mechanize.Request( url, headers={ 'Authorization': f'Bearer {self.todoist_api_key}', 'Content-Type': 'application/json', }, ) req.get_method = lambda: 'POST' try: br = mechanize.Browser() response = br.open(req) if response.code == 204: print(f'Task {task_id} corectly closed.') else: print(f'Error while closing task {task_id}: {response.code}') except Exception as e: print(f'Exception while closing task {task_id}: {e}') # TODO: This works with EPUB, but not mobi/azw3 # BUG: https://bugs.launchpad.net/calibre/+bug/1838486 def postprocess_book(self, oeb, opts, log): oeb.metadata.add('series', self.series_name) def _postprocess_html(self, soup, first_fetch, job_info): title = soup.find('title').text # get title h1s = soup.findAll('h1') # get all h1 headers for h1 in h1s: if title in h1.text: h1 = h1.clear() # clean this tag, so the h1 will be there only h2s = soup.findAll('h2') # get all h2 headers for h2 in h2s: if title in h2.text: h2 = h2.clear() # clean this tag, so the h1 will be there only body = soup.find('body') new_tag = soup.new_tag('h1') new_tag.append(title) body.insert(0, new_tag) return soup def default_cover(self, cover_file): """ Create a generic cover for recipes that don't have a cover This override adds time to the cover """ try: from calibre.ebooks import calibre_cover title = self.title date = strftime(self.timefmt) time = strftime('%a %d %b %Y %-H:%M') img_data = calibre_cover(title, date, time) cover_file.write(img_data) cover_file.flush() except Exception: self.log.exception('Failed to generate default cover') return False return True