Fetch news: Allow recipes to specify custom options

Available via the Advanced tab for the recipe in the Fetch news scheduler dialog or via
--recipe-specific-option flag to ebook-convert.

Fixes #2297 (Add support for passing custom configuration to recipies)
This commit is contained in:
Kovid Goyal 2024-07-13 10:49:23 +05:30
parent f1e57a86f1
commit 52714b6fd1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 134 additions and 43 deletions

View File

@ -95,7 +95,7 @@ def option_recommendation_to_cli_option(add_option, rec):
else: else:
if isinstance(rec.recommended_value, numbers.Integral): if isinstance(rec.recommended_value, numbers.Integral):
attrs['type'] = 'int' attrs['type'] = 'int'
if isinstance(rec.recommended_value, numbers.Real): elif isinstance(rec.recommended_value, numbers.Real):
attrs['type'] = 'float' attrs['type'] = 'float'
if opt.long_switch == 'verbose': if opt.long_switch == 'verbose':
@ -121,6 +121,14 @@ def option_recommendation_to_cli_option(add_option, rec):
' dialog. Once you create the rules, you can use the "Export" button' ' dialog. Once you create the rules, you can use the "Export" button'
' to save them to a file.' ' to save them to a file.'
) )
elif opt.name == 'recipe_specific_option':
attrs['action'] = 'append'
attrs['help'] = _(
'Recipe specific options. Syntax is option_name:value. For example:'
' {example}. Can be specified multiple'
' times to set different options. To see a list of all available options'
' for a recipe, use {list}.'
).format(example='--recipe-specific-option=date:2030-11-31', list='--recipe-specific-option=list')
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True: if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
switches = ['--disable-'+opt.long_switch] switches = ['--disable-'+opt.long_switch]
add_option(Option(*switches, **attrs)) add_option(Option(*switches, **attrs))

View File

@ -47,6 +47,8 @@ class RecipeInput(InputFormatPlugin):
OptionRecommendation(name='password', recommended_value=None, OptionRecommendation(name='password', recommended_value=None,
help=_('Password for sites that require a login to access ' help=_('Password for sites that require a login to access '
'content.')), 'content.')),
OptionRecommendation(name='recipe_specific_option',
help=_('Recipe specific options.')),
OptionRecommendation(name='dont_download_recipe', OptionRecommendation(name='dont_download_recipe',
recommended_value=False, recommended_value=False,
help=_('Do not download latest version of builtin recipes from the calibre server')), help=_('Do not download latest version of builtin recipes from the calibre server')),
@ -56,6 +58,7 @@ class RecipeInput(InputFormatPlugin):
def convert(self, recipe_or_file, opts, file_ext, log, def convert(self, recipe_or_file, opts, file_ext, log,
accelerators): accelerators):
listing_recipe_specific_options = 'list' in (opts.recipe_specific_option or ())
from calibre.web.feeds.recipes import compile_recipe from calibre.web.feeds.recipes import compile_recipe
opts.output_profile.flow_size = 0 opts.output_profile.flow_size = 0
orig_no_inline_navbars = opts.no_inline_navbars orig_no_inline_navbars = opts.no_inline_navbars
@ -80,7 +83,7 @@ class RecipeInput(InputFormatPlugin):
if rtype == 'custom': if rtype == 'custom':
self.recipe_source = get_custom_recipe(recipe_id) self.recipe_source = get_custom_recipe(recipe_id)
else: else:
self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=True) self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=not listing_recipe_specific_options)
if not self.recipe_source: if not self.recipe_source:
raise ValueError('Could not find recipe with urn: ' + urn) raise ValueError('Could not find recipe with urn: ' + urn)
if not isinstance(self.recipe_source, bytes): if not isinstance(self.recipe_source, bytes):
@ -101,7 +104,7 @@ class RecipeInput(InputFormatPlugin):
title = title.rpartition('.')[0] title = title.rpartition('.')[0]
raw = get_builtin_recipe_by_title(title, log=log, raw = get_builtin_recipe_by_title(title, log=log,
download_recipe=not opts.dont_download_recipe) download_recipe=not opts.dont_download_recipe and not listing_recipe_specific_options)
builtin = False builtin = False
try: try:
recipe = compile_recipe(raw) recipe = compile_recipe(raw)
@ -133,6 +136,20 @@ class RecipeInput(InputFormatPlugin):
disabled = getattr(recipe, 'recipe_disabled', None) disabled = getattr(recipe, 'recipe_disabled', None)
if disabled is not None: if disabled is not None:
raise RecipeDisabled(disabled) raise RecipeDisabled(disabled)
if listing_recipe_specific_options:
rso = (getattr(recipe, 'recipe_specific_options', None) or {})
if rso:
log(recipe.title, _('specific options:'))
name_maxlen = max(map(len, rso))
for name, meta in rso.items():
log(' ', name.ljust(name_maxlen), '-', meta.get('short'))
if 'long' in meta:
from textwrap import wrap
for line in wrap(meta['long'], 70 - name_maxlen + 5):
log(' '*(name_maxlen + 4), line)
else:
log(recipe.title, _('has no recipe specific options'))
raise SystemExit(0)
try: try:
ro = recipe(opts, log, self.report_progress) ro = recipe(opts, log, self.report_progress)
ro.download() ro.download()

View File

@ -18,6 +18,7 @@ from qt.core import (
QDialog, QDialog,
QDialogButtonBox, QDialogButtonBox,
QDoubleSpinBox, QDoubleSpinBox,
QFormLayout,
QFrame, QFrame,
QGridLayout, QGridLayout,
QGroupBox, QGroupBox,
@ -297,7 +298,6 @@ class SchedulerDialog(QDialog):
self.tab = QWidget() self.tab = QWidget()
self.detail_box.addTab(self.tab, _("&Schedule")) self.detail_box.addTab(self.tab, _("&Schedule"))
self.tab.v = vt = QVBoxLayout(self.tab) self.tab.v = vt = QVBoxLayout(self.tab)
vt.setContentsMargins(0, 0, 0, 0)
self.blurb = la = QLabel('blurb') self.blurb = la = QLabel('blurb')
la.setWordWrap(True), la.setOpenExternalLinks(True) la.setWordWrap(True), la.setOpenExternalLinks(True)
vt.addWidget(la) vt.addWidget(la)
@ -351,19 +351,15 @@ class SchedulerDialog(QDialog):
# Second tab (advanced settings) # Second tab (advanced settings)
self.tab2 = t2 = QWidget() self.tab2 = t2 = QWidget()
self.detail_box.addTab(self.tab2, _("&Advanced")) self.detail_box.addTab(self.tab2, _("&Advanced"))
self.tab2.g = g = QGridLayout(t2) self.tab2.g = g = QFormLayout(t2)
g.setContentsMargins(0, 0, 0, 0) g.setFieldGrowthPolicy(QFormLayout.FieldGrowthPolicy.AllNonFixedFieldsGrow)
self.add_title_tag = tt = QCheckBox(_("Add &title as tag"), t2) self.add_title_tag = tt = QCheckBox(_("Add &title as tag"), t2)
g.addWidget(tt, 0, 0, 1, 2) g.addRow(tt)
t2.la = la = QLabel(_("&Extra tags:"))
self.custom_tags = ct = QLineEdit(self) self.custom_tags = ct = QLineEdit(self)
la.setBuddy(ct) g.addRow(_("&Extra tags:"), ct)
g.addWidget(la), g.addWidget(ct, 1, 1)
t2.la2 = la = QLabel(_("&Keep at most:"))
la.setToolTip(_("Maximum number of copies (issues) of this recipe to keep. Set to 0 to keep all (disable)."))
self.keep_issues = ki = QSpinBox(t2) self.keep_issues = ki = QSpinBox(t2)
tt.toggled['bool'].connect(self.keep_issues.setEnabled) tt.toggled['bool'].connect(self.keep_issues.setEnabled)
ki.setMaximum(100000), la.setBuddy(ki) ki.setMaximum(100000)
ki.setToolTip(_( ki.setToolTip(_(
"<p>When set, this option will cause calibre to keep, at most, the specified number of issues" "<p>When set, this option will cause calibre to keep, at most, the specified number of issues"
" of this periodical. Every time a new issue is downloaded, the oldest one is deleted, if the" " of this periodical. Every time a new issue is downloaded, the oldest one is deleted, if the"
@ -371,9 +367,8 @@ class SchedulerDialog(QDialog):
" option to add the title as tag checked, above.\n<p>Also, the setting for deleting periodicals" " option to add the title as tag checked, above.\n<p>Also, the setting for deleting periodicals"
" older than a number of days, below, takes priority over this setting.")) " older than a number of days, below, takes priority over this setting."))
ki.setSpecialValueText(_("all issues")), ki.setSuffix(_(" issues")) ki.setSpecialValueText(_("all issues")), ki.setSuffix(_(" issues"))
g.addWidget(la), g.addWidget(ki, 2, 1) g.addRow(_("&Keep at most:"), ki)
si = QSpacerItem(20, 40, QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Expanding) self.recipe_specific_widgets = {}
g.addItem(si, 3, 1, 1, 1)
# Bottom area # Bottom area
self.hb = h = QHBoxLayout() self.hb = h = QHBoxLayout()
@ -506,7 +501,11 @@ class SchedulerDialog(QDialog):
keep_issues = str(self.keep_issues.value()) keep_issues = str(self.keep_issues.value())
custom_tags = str(self.custom_tags.text()).strip() custom_tags = str(self.custom_tags.text()).strip()
custom_tags = [x.strip() for x in custom_tags.split(',')] custom_tags = [x.strip() for x in custom_tags.split(',')]
self.recipe_model.customize_recipe(urn, add_title_tag, custom_tags, keep_issues) from calibre.web.feeds.recipes.collection import RecipeCustomization
recipe_specific_options = None
if self.recipe_specific_widgets:
recipe_specific_options = {name: w.text().strip() for name, w in self.recipe_specific_widgets.items() if w.text().strip()}
self.recipe_model.customize_recipe(urn, RecipeCustomization(add_title_tag, custom_tags, keep_issues, recipe_specific_options))
return True return True
def initialize_detail_box(self, urn): def initialize_detail_box(self, urn):
@ -578,16 +577,30 @@ class SchedulerDialog(QDialog):
rb.setChecked(True) rb.setChecked(True)
self.schedule_stack.setCurrentIndex(sch_widget) self.schedule_stack.setCurrentIndex(sch_widget)
self.schedule_stack.currentWidget().initialize(typ, sch) self.schedule_stack.currentWidget().initialize(typ, sch)
add_title_tag, custom_tags, keep_issues = customize_info self.add_title_tag.setChecked(customize_info.add_title_tag)
self.add_title_tag.setChecked(add_title_tag) self.custom_tags.setText(', '.join(customize_info.custom_tags))
self.custom_tags.setText(', '.join(custom_tags))
self.last_downloaded.setText(_('Last downloaded:') + ' ' + ld_text) self.last_downloaded.setText(_('Last downloaded:') + ' ' + ld_text)
try: self.keep_issues.setValue(customize_info.keep_issues)
keep_issues = int(keep_issues)
except:
keep_issues = 0
self.keep_issues.setValue(keep_issues)
self.keep_issues.setEnabled(self.add_title_tag.isChecked()) self.keep_issues.setEnabled(self.add_title_tag.isChecked())
g = self.tab2.layout()
for x in self.recipe_specific_widgets.values():
g.removeRow(x)
self.recipe_specific_widgets = {}
raw = recipe.get('options')
if raw:
import json
rsom = json.loads(raw)
rso = customize_info.recipe_specific_options
for name, metadata in rsom.items():
w = QLineEdit(self)
if 'default' in metadata:
w.setPlaceholderText(_('Default if unspecified: {}').format(metadata['default']))
w.setClearButtonEnabled(True)
w.setText(str(rso.get(name, '')).strip())
w.setToolTip(str(metadata.get('long', '')))
title = '&' + str(metadata.get('short') or name).replace('&', '&&') + ':'
g.addRow(title, w)
self.recipe_specific_widgets[name] = w
class Scheduler(QObject): class Scheduler(QObject):
@ -687,15 +700,15 @@ class Scheduler(QObject):
un = pw = None un = pw = None
if account_info is not None: if account_info is not None:
un, pw = account_info un, pw = account_info
add_title_tag, custom_tags, keep_issues = customize_info
arg = { arg = {
'username': un, 'username': un,
'password': pw, 'password': pw,
'add_title_tag':add_title_tag, 'add_title_tag':customize_info.add_title_tag,
'custom_tags':custom_tags, 'custom_tags':customize_info.custom_tags,
'title':recipe.get('title',''), 'title':recipe.get('title',''),
'urn':urn, 'urn':urn,
'keep_issues':keep_issues 'keep_issues':str(customize_info.keep_issues),
'recipe_specific_options': customize_info.recipe_specific_options,
} }
self.download_queue.add(urn) self.download_queue.add(urn)
self.start_recipe_fetch.emit(arg) self.start_recipe_fetch.emit(arg)

View File

@ -320,6 +320,11 @@ def fetch_scheduled_recipe(arg): # {{{
recs.append(('username', arg['username'], OptionRecommendation.HIGH)) recs.append(('username', arg['username'], OptionRecommendation.HIGH))
if arg['password'] is not None: if arg['password'] is not None:
recs.append(('password', arg['password'], OptionRecommendation.HIGH)) recs.append(('password', arg['password'], OptionRecommendation.HIGH))
if arg.get('recipe_specific_options', None):
serialized = []
for name, val in arg['recipe_specific_options'].items():
serialized.append(f'{name}:{val}')
recs.append(('recipe_specific_option', serialized, OptionRecommendation.HIGH))
return 'gui_convert_recipe', args, _('Fetch news from %s')%arg['title'], fmt.upper(), [pt] return 'gui_convert_recipe', args, _('Fetch news from %s')%arg['title'], fmt.upper(), [pt]

View File

@ -410,6 +410,23 @@ class BasicNewsRecipe(Recipe):
#: with the URL scheme of your particular website. #: with the URL scheme of your particular website.
resolve_internal_links = False resolve_internal_links = False
#: Specify options specific to this recipe. These will be available for the user to customize
#: in the Advanced tab of the Fetch News dialog or at the ebook-convert command line. The options
#: are specified as a dictionary mapping option name to metadata about the option. For example::
#:
#: recipe_specific_options = {
#: 'edition_date': {
#: 'short': 'The issue date to download',
#: 'long': 'Specify a date in the format YYYY-mm-dd to download the issue corresponding to that date',
#: 'default': 'current',
#: }
#: }
#:
#: When the recipe is run, self.recipe_specific_options will be a dict mapping option name to the option value
#: specified by the user. When the option is unspecified by the user, it will have the value specified by 'default'.
#: If no default is specified, the option will not be in the dict at all, when unspecified by the user.
recipe_specific_options = None
#: Set to False if you do not want to use gzipped transfers. Note that some old servers flake out with gzip #: Set to False if you do not want to use gzipped transfers. Note that some old servers flake out with gzip
handle_gzip = True handle_gzip = True
@ -988,6 +1005,19 @@ class BasicNewsRecipe(Recipe):
self.failed_downloads = [] self.failed_downloads = []
self.partial_failures = [] self.partial_failures = []
self.aborted_articles = [] self.aborted_articles = []
self.recipe_specific_options_metadata = rso = self.recipe_specific_options or {}
self.recipe_specific_options = {k: rso[k]['default'] for k in rso if 'default' in rso[k]}
for x in options.recipe_specific_option:
k, sep, v = x.partition(':')
if not sep:
raise ValueError(f'{x} is not a valid recipe specific option')
if k not in rso:
raise KeyError(f'{k} is not an option supported by: {self.title}')
self.recipe_specific_options[k] = v
if self.recipe_specific_options:
log('Recipe specific options:')
for k, v in self.recipe_specific_options.items():
log(' ', f'{k} = {v}')
def _postprocess_html(self, soup, first_fetch, job_info): def _postprocess_html(self, soup, first_fetch, job_info):
if self.no_stylesheets: if self.no_stylesheets:

View File

@ -6,10 +6,12 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import calendar import calendar
import json
import os import os
import zipfile import zipfile
from datetime import timedelta from datetime import timedelta
from threading import RLock from threading import RLock
from typing import Dict, NamedTuple, Optional, Sequence
from lxml import etree from lxml import etree
from lxml.builder import ElementMaker from lxml.builder import ElementMaker
@ -68,14 +70,19 @@ def serialize_recipe(urn, recipe_class):
ns = 'no' ns = 'no'
if ns is True: if ns is True:
ns = 'yes' ns = 'yes'
options = ''
rso = getattr(recipe_class, 'recipe_specific_options', None)
if rso:
options = f' options={quoteattr(json.dumps(rso))}'
return (' <recipe id={id} title={title} author={author} language={language}' return (' <recipe id={id} title={title} author={author} language={language}'
' needs_subscription={needs_subscription} description={description}/>').format(**{ ' needs_subscription={needs_subscription} description={description}{options}/>').format(**{
'id' : quoteattr(str(urn)), 'id' : quoteattr(str(urn)),
'title' : attr('title', _('Unknown')), 'title' : attr('title', _('Unknown')),
'author' : attr('__author__', default_author), 'author' : attr('__author__', default_author),
'language' : attr('language', 'und', normalize_language), 'language' : attr('language', 'und', normalize_language),
'needs_subscription' : quoteattr(ns), 'needs_subscription' : quoteattr(ns),
'description' : attr('description', '') 'description' : attr('description', ''),
'options' : options,
}) })
@ -287,6 +294,13 @@ def get_builtin_recipe_by_id(id_, log=None, download_recipe=False):
return get_builtin_recipe(urn) return get_builtin_recipe(urn)
class RecipeCustomization(NamedTuple):
add_title_tag: bool = False
custom_tags: Sequence[str] = ()
keep_issues: int = 0
recipe_specific_options: Optional[Dict[str, str]] = None
class SchedulerConfig: class SchedulerConfig:
def __init__(self): def __init__(self):
@ -345,16 +359,17 @@ class SchedulerConfig:
self.write_scheduler_file() self.write_scheduler_file()
# 'keep_issues' argument for recipe-specific number of copies to keep # 'keep_issues' argument for recipe-specific number of copies to keep
def customize_recipe(self, urn, add_title_tag, custom_tags, keep_issues): def customize_recipe(self, urn, val: RecipeCustomization):
with self.lock: with self.lock:
for x in list(self.iter_customization()): for x in list(self.iter_customization()):
if x.get('id') == urn: if x.get('id') == urn:
self.root.remove(x) self.root.remove(x)
cs = E.recipe_customization({ cs = E.recipe_customization({
'keep_issues' : keep_issues, 'keep_issues' : str(val.keep_issues),
'id' : urn, 'id' : urn,
'add_title_tag' : 'yes' if add_title_tag else 'no', 'add_title_tag' : 'yes' if val.add_title_tag else 'no',
'custom_tags' : ','.join(custom_tags), 'custom_tags' : ','.join(val.custom_tags),
'recipe_specific_options': json.dumps(val.recipe_specific_options or {}),
}) })
self.root.append(cs) self.root.append(cs)
self.write_scheduler_file() self.write_scheduler_file()
@ -525,16 +540,17 @@ class SchedulerConfig:
def get_customize_info(self, urn): def get_customize_info(self, urn):
keep_issues = 0 keep_issues = 0
add_title_tag = True add_title_tag = True
custom_tags = [] custom_tags = ()
recipe_specific_options = {}
with self.lock: with self.lock:
for x in self.iter_customization(): for x in self.iter_customization():
if x.get('id', False) == urn: if x.get('id', False) == urn:
keep_issues = x.get('keep_issues', '0') keep_issues = int(x.get('keep_issues', '0'))
add_title_tag = x.get('add_title_tag', 'yes') == 'yes' add_title_tag = x.get('add_title_tag', 'yes') == 'yes'
custom_tags = [i.strip() for i in x.get('custom_tags', custom_tags = tuple(i.strip() for i in x.get('custom_tags', '').split(','))
'').split(',')] recipe_specific_options = json.loads(x.get('recipe_specific_options', '{}'))
break break
return add_title_tag, custom_tags, keep_issues return RecipeCustomization(add_title_tag, custom_tags, keep_issues, recipe_specific_options)
def get_schedule_info(self, urn): def get_schedule_info(self, urn):
with self.lock: with self.lock:

View File

@ -309,6 +309,9 @@ class RecipeModel(QAbstractItemModel, AdaptSQP):
def get_customize_info(self, urn): def get_customize_info(self, urn):
return self.scheduler_config.get_customize_info(urn) return self.scheduler_config.get_customize_info(urn)
def get_recipe_specific_option_metadata(self, urn):
return self.scheduler_config.get_recipe_specific_option_metadata(urn)
def get_matches(self, location, query): def get_matches(self, location, query):
query = query.strip().lower() query = query.strip().lower()
if not query: if not query:
@ -424,9 +427,8 @@ class RecipeModel(QAbstractItemModel, AdaptSQP):
self.scheduler_config.schedule_recipe(self.recipe_from_urn(urn), self.scheduler_config.schedule_recipe(self.recipe_from_urn(urn),
sched_type, schedule) sched_type, schedule)
def customize_recipe(self, urn, add_title_tag, custom_tags, keep_issues): def customize_recipe(self, urn, val):
self.scheduler_config.customize_recipe(urn, add_title_tag, self.scheduler_config.customize_recipe(urn, val)
custom_tags, keep_issues)
def get_to_be_downloaded_recipes(self): def get_to_be_downloaded_recipes(self):
ans = self.scheduler_config.get_to_be_downloaded_recipes() ans = self.scheduler_config.get_to_be_downloaded_recipes()