mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-10-19 21:10:30 -04:00
302 lines
10 KiB
Python
302 lines
10 KiB
Python
#!/usr/bin/env python
|
|
# vim:ft=python tabstop=8 expandtab shiftwidth=4 softtabstop=4
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2025, ARG'
|
|
|
|
|
|
import json
|
|
import re
|
|
from datetime import datetime
|
|
from time import strftime
|
|
from urllib.parse import urlparse
|
|
|
|
import mechanize
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
__version__ = '0.0.3'
|
|
|
|
'''
|
|
0.0.3: Parameters in recipe_specific_options
|
|
0.0.2: Calibre footer with the source URL. QR points to the article URL.
|
|
0.0.1: First working version
|
|
|
|
# Calibre parameters
|
|
|
|
Input them in command line as this example::
|
|
ebook-convert Todoist.recipe output.epub --recipe-specific-option=ARCHIVE_DOWNLOADED:False \
|
|
--recipe-specific-option=TODOIST_PROJECT_ID:YOUR_PROJECT_ID \
|
|
--recipe-specific-option=TODOIST_API_KEY:YOUR_API_KEY \
|
|
--recipe-specific-option=URL_KEYWORD_EXCEPTIONS:jotdown,elpais.com/gastronomia
|
|
|
|
|
|
**URL_KEYWORD_EXCEPTIONS** (list of keywords such as, if the URL of the article contains any keyword, then the plugin will ignore the article)
|
|
|
|
**ARCHIVE_DOWNLOADED** (True or False) do you want to archive articles after fetching
|
|
|
|
**TODOIST_PROJECT_ID** (string) your Todoist project ID, you can find it in the URL of your Todoist project, e.g. https://todoist.com/app/project/1234567890abcdef12345678
|
|
|
|
**TODOIST_API_KEY** (string) your Todoist API key, you can find it in your Todoist account settings under "Integrations" or "API tokens"
|
|
'''
|
|
# CONFIGURATION ###########################################################
|
|
|
|
import ast
|
|
|
|
|
|
# Aux funcion. String to boolean
|
|
def parse_env_bool(val):
|
|
return str(val).strip().lower() in ('true', '1', 'yes')
|
|
|
|
|
|
# Aux funcion. comma separated String to List
|
|
def parse_env_list(val):
|
|
try:
|
|
return ast.literal_eval(val)
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
#############################################################################
|
|
|
|
|
|
class Todoist2ebook(BasicNewsRecipe):
|
|
|
|
recipe_specific_options = {
|
|
'ARCHIVE_DOWNLOADED': {
|
|
'short': 'Mark as read',
|
|
'long': 'Mark as read',
|
|
'default': False,
|
|
},
|
|
'TODOIST_PROJECT_ID': {'short': 'Proyect ID', 'long': 'Proyect ID'},
|
|
'TODOIST_API_KEY': {'short': 'API key', 'long': 'API KEY'},
|
|
'URL_KEYWORD_EXCEPTIONS': {
|
|
'short': 'URL keyword exceptions',
|
|
'long': 'List of keywords to ignore articles, e.g. ["example.com", "ignoreme.com"]',
|
|
'default': [],
|
|
},
|
|
}
|
|
|
|
__author__ = 'ARG'
|
|
description = 'prueba'
|
|
publisher = 'Todoist.com'
|
|
category = 'info, custom, Todoist'
|
|
|
|
# User-configurable settings -----------------------------------------------
|
|
|
|
series_name = 'Todoist'
|
|
publication_type = 'magazine'
|
|
title = 'Todoist'
|
|
# timefmt = '' # uncomment to remove date from the filenames, if commented then you will get something like `Todoist [Wed, 13 May 2020]`
|
|
masthead_url = 'https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-logo.png'
|
|
# will make square cover; this will replace text and cover of the default
|
|
cover_url = 'https://raw.githubusercontent.com/rga5321/todoist2ebook/master/img/todoist-cover.png'
|
|
# --------------------------------------------------------------------------
|
|
|
|
# Inherited developer settings
|
|
auto_cleanup = True
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
|
|
# Custom developer settings
|
|
to_archive = []
|
|
|
|
simultaneous_downloads = 10
|
|
|
|
extra_css = '.calibre_navbar { visibility: hidden; }'
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
|
|
# Init optional configuration parameters
|
|
self.archive_downloaded = parse_env_bool(
|
|
self.recipe_specific_options['ARCHIVE_DOWNLOADED']
|
|
)
|
|
self.keyword_exceptions = parse_env_list(
|
|
self.recipe_specific_options['URL_KEYWORD_EXCEPTIONS']
|
|
)
|
|
|
|
# Init mandatory configuration parameters
|
|
if (
|
|
self.recipe_specific_options.get('TODOIST_PROJECT_ID')
|
|
):
|
|
self.todoist_project_id = self.recipe_specific_options['TODOIST_PROJECT_ID']
|
|
else:
|
|
self.abort_recipe_processing(
|
|
'TODOIST_PROJECT_ID mandatory parameter missing'
|
|
)
|
|
|
|
if (
|
|
self.recipe_specific_options.get('TODOIST_API_KEY')
|
|
):
|
|
self.todoist_api_key = self.recipe_specific_options['TODOIST_API_KEY']
|
|
else:
|
|
self.abort_recipe_processing('TODOIST_API_KEY mandatory parameter missing')
|
|
|
|
def parse_index(self):
|
|
|
|
articles = []
|
|
section_dict = {} # dictionary with the domains and its articles.
|
|
|
|
url = f'https://api.todoist.com/rest/v2/tasks?project_id={self.todoist_project_id}'
|
|
headers = {'Authorization': f'Bearer {self.todoist_api_key}'}
|
|
request = mechanize.Request(url, headers=headers)
|
|
|
|
response = self.browser.open(request)
|
|
if response.code != 200:
|
|
raise Exception('No se pudieron recuperar las tareas de Todoist')
|
|
data = response.read().decode('utf-8')
|
|
tasks = json.loads(data)
|
|
articles_todoist = []
|
|
|
|
url_regex = re.compile(r'\[([^\]]+)\]\(\s*(https?://[^\s\)]+)\s*\)')
|
|
for task in tasks:
|
|
match = url_regex.search(task['content'])
|
|
if match:
|
|
title = match.group(1).strip()
|
|
url = match.group(2).strip()
|
|
date_added = task.get('created_at', datetime.now().isoformat())
|
|
articles_todoist.append(
|
|
{
|
|
'title': title or url,
|
|
'url': url,
|
|
'date_added': date_added,
|
|
'item_id': task['id'],
|
|
}
|
|
)
|
|
|
|
if not articles_todoist:
|
|
self.abort_recipe_processing(
|
|
'No unread articles in the Todoist project "{}"'.format(
|
|
self.todoist_project_id
|
|
)
|
|
)
|
|
else:
|
|
for item in articles_todoist:
|
|
|
|
# If the URL contains any URL_KEYWORD_EXCEPTIONS, ignore article
|
|
if any(pattern in item['url'] for pattern in self.keyword_exceptions):
|
|
print('Ignoring article due to keyword patterns:' + item['url'])
|
|
del item
|
|
else:
|
|
# Extract domain from the URL
|
|
domain = urlparse(item['url']).netloc.replace('www.', '')
|
|
|
|
url = item['url']
|
|
|
|
# Add the article under its domain
|
|
if domain not in section_dict:
|
|
section_dict[domain] = [item]
|
|
else:
|
|
section_dict[domain].append(item)
|
|
|
|
print('Adding article: ' + item['url'] + ' to section: ' + domain)
|
|
|
|
############ APPEND ARTS FOR EACH DOMAIN #############
|
|
# At this point the section_dict is completed
|
|
|
|
for section in section_dict:
|
|
arts = []
|
|
for item in section_dict.get(section):
|
|
try:
|
|
title = item['title']
|
|
except KeyError:
|
|
title = 'error: title'
|
|
try:
|
|
url = item['url']
|
|
except KeyError:
|
|
url = 'error: url'
|
|
|
|
arts.append(
|
|
{'title': title, 'url': url, 'date': item['date_added']}
|
|
)
|
|
|
|
if (
|
|
self.archive_downloaded
|
|
and item['item_id'] not in self.to_archive
|
|
):
|
|
self.to_archive.append(item['item_id'])
|
|
|
|
if arts:
|
|
articles.append((section, arts))
|
|
|
|
if not articles:
|
|
self.abort_recipe_processing(
|
|
'No articles in the Todoist project account %s to download'
|
|
% (self.todoist_project_id)
|
|
)
|
|
return articles
|
|
|
|
def get_browser(self, *args, **kwargs):
|
|
self.browser = BasicNewsRecipe.get_browser(self)
|
|
return self.browser
|
|
|
|
def cleanup(self):
|
|
if not self.to_archive:
|
|
return
|
|
|
|
for task_id in self.to_archive:
|
|
url = f'https://api.todoist.com/rest/v2/tasks/{task_id}/close'
|
|
req = mechanize.Request(
|
|
url,
|
|
headers={
|
|
'Authorization': f'Bearer {self.todoist_api_key}',
|
|
'Content-Type': 'application/json',
|
|
},
|
|
)
|
|
req.get_method = lambda: 'POST'
|
|
|
|
try:
|
|
br = mechanize.Browser()
|
|
response = br.open(req)
|
|
if response.code == 204:
|
|
print(f'Task {task_id} corectly closed.')
|
|
else:
|
|
print(f'Error while closing task {task_id}: {response.code}')
|
|
except Exception as e:
|
|
print(f'Exception while closing task {task_id}: {e}')
|
|
|
|
# TODO: This works with EPUB, but not mobi/azw3
|
|
# BUG: https://bugs.launchpad.net/calibre/+bug/1838486
|
|
def postprocess_book(self, oeb, opts, log):
|
|
oeb.metadata.add('series', self.series_name)
|
|
|
|
def _postprocess_html(self, soup, first_fetch, job_info):
|
|
|
|
title = soup.find('title').text # get title
|
|
|
|
h1s = soup.findAll('h1') # get all h1 headers
|
|
for h1 in h1s:
|
|
if title in h1.text:
|
|
h1 = h1.clear() # clean this tag, so the h1 will be there only
|
|
|
|
h2s = soup.findAll('h2') # get all h2 headers
|
|
for h2 in h2s:
|
|
if title in h2.text:
|
|
h2 = h2.clear() # clean this tag, so the h1 will be there only
|
|
|
|
body = soup.find('body')
|
|
new_tag = soup.new_tag('h1')
|
|
new_tag.append(title)
|
|
body.insert(0, new_tag)
|
|
|
|
return soup
|
|
|
|
def default_cover(self, cover_file):
|
|
"""
|
|
Create a generic cover for recipes that don't have a cover
|
|
This override adds time to the cover
|
|
"""
|
|
try:
|
|
from calibre.ebooks import calibre_cover
|
|
|
|
title = self.title
|
|
date = strftime(self.timefmt)
|
|
time = strftime('%a %d %b %Y %-H:%M')
|
|
img_data = calibre_cover(title, date, time)
|
|
cover_file.write(img_data)
|
|
cover_file.flush()
|
|
except Exception:
|
|
self.log.exception('Failed to generate default cover')
|
|
return False
|
|
return True
|