Allow recipes to specify overrides for conversion options

This commit is contained in:
Kovid Goyal 2009-05-13 19:20:47 -07:00
parent 9b170e6c95
commit 2e0ad5d1e0
5 changed files with 74 additions and 68 deletions

View File

@ -54,6 +54,8 @@ Customizing e-book download
.. automember:: BasicNewsRecipe.timefmt
.. automember:: basicNewsRecipe.conversion_options
.. automember:: BasicNewsRecipe.feeds
.. automember:: BasicNewsRecipe.no_stylesheets

View File

@ -57,6 +57,8 @@ class RecipeInput(InputFormatPlugin):
ro = recipe(opts, log, self.report_progress)
ro.download()
for key, val in recipe.conversion_options.items():
setattr(opts, key, val)
opts.output_profile.flow_size = 0

View File

@ -156,13 +156,16 @@ class BasicNewsRecipe(Recipe):
#: :attr:`BasicNewsRecipe.filter_regexps` should be defined.
filter_regexps = []
#: List of options to pass to html2lrf, to customize generation of LRF ebooks.
html2lrf_options = []
#: Options to pass to html2epub to customize generation of EPUB ebooks.
html2epub_options = ''
#: Options to pass to oeb2mobi to customize generation of MOBI ebooks.
oeb2mobi_options = ''
#: Recipe specific options to control the conversion of the downloaded
#: content into an e-book. These will override any user or plugin specified
#: values, so only use if absolutely necessary. For example:
#: conversion_options = {
#: 'base_font_size' : 16,
#: 'tags' : 'mytag1,mytag2',
#: 'title' : 'My Title',
#: 'linearize_tables' : True,
#: }
conversion_options = {}
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
#: A tag is specified as a dictionary of the form::

View File

@ -22,7 +22,7 @@ class Barrons(BasicNewsRecipe):
use_embedded_content = False
no_stylesheets = False
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
html2lrf_options = [('--ignore-tables'),('--base-font-size=10')]
conversion_options = {'linearize_tables': True}
##delay = 1
## Don't grab articles more than 7 days old

View File

@ -13,8 +13,7 @@ class Winsupersite(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
html2lrf_options = ['--ignore-tables']
html2epub_options = 'linearize_tables = True'
conversion_options = {'linearize_tables' : True}
remove_tags_before = dict(name='h1')
preprocess_regexps = [
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),