mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Allow recipes to specify overrides for conversion options
This commit is contained in:
parent
9b170e6c95
commit
2e0ad5d1e0
@ -54,6 +54,8 @@ Customizing e-book download
|
|||||||
|
|
||||||
.. automember:: BasicNewsRecipe.timefmt
|
.. automember:: BasicNewsRecipe.timefmt
|
||||||
|
|
||||||
|
.. automember:: basicNewsRecipe.conversion_options
|
||||||
|
|
||||||
.. automember:: BasicNewsRecipe.feeds
|
.. automember:: BasicNewsRecipe.feeds
|
||||||
|
|
||||||
.. automember:: BasicNewsRecipe.no_stylesheets
|
.. automember:: BasicNewsRecipe.no_stylesheets
|
||||||
|
@ -57,6 +57,8 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
|
|
||||||
ro = recipe(opts, log, self.report_progress)
|
ro = recipe(opts, log, self.report_progress)
|
||||||
ro.download()
|
ro.download()
|
||||||
|
for key, val in recipe.conversion_options.items():
|
||||||
|
setattr(opts, key, val)
|
||||||
|
|
||||||
opts.output_profile.flow_size = 0
|
opts.output_profile.flow_size = 0
|
||||||
|
|
||||||
|
@ -156,13 +156,16 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: :attr:`BasicNewsRecipe.filter_regexps` should be defined.
|
#: :attr:`BasicNewsRecipe.filter_regexps` should be defined.
|
||||||
filter_regexps = []
|
filter_regexps = []
|
||||||
|
|
||||||
#: List of options to pass to html2lrf, to customize generation of LRF ebooks.
|
#: Recipe specific options to control the conversion of the downloaded
|
||||||
html2lrf_options = []
|
#: content into an e-book. These will override any user or plugin specified
|
||||||
|
#: values, so only use if absolutely necessary. For example:
|
||||||
#: Options to pass to html2epub to customize generation of EPUB ebooks.
|
#: conversion_options = {
|
||||||
html2epub_options = ''
|
#: 'base_font_size' : 16,
|
||||||
#: Options to pass to oeb2mobi to customize generation of MOBI ebooks.
|
#: 'tags' : 'mytag1,mytag2',
|
||||||
oeb2mobi_options = ''
|
#: 'title' : 'My Title',
|
||||||
|
#: 'linearize_tables' : True,
|
||||||
|
#: }
|
||||||
|
conversion_options = {}
|
||||||
|
|
||||||
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
|
#: List of tags to be removed. Specified tags are removed from downloaded HTML.
|
||||||
#: A tag is specified as a dictionary of the form::
|
#: A tag is specified as a dictionary of the form::
|
||||||
|
@ -1,76 +1,76 @@
|
|||||||
##
|
##
|
||||||
## web2lrf profile to download articles from Barrons.com
|
## web2lrf profile to download articles from Barrons.com
|
||||||
## can download subscriber-only content if username and
|
## can download subscriber-only content if username and
|
||||||
## password are supplied.
|
## password are supplied.
|
||||||
##
|
##
|
||||||
'''
|
'''
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Barrons(BasicNewsRecipe):
|
class Barrons(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Barron\'s'
|
title = 'Barron\'s'
|
||||||
max_articles_per_feed = 50
|
max_articles_per_feed = 50
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
language = _('English')
|
language = _('English')
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
|
description = 'Weekly publication for investors from the publisher of the Wall Street Journal'
|
||||||
timefmt = ' [%a, %b %d, %Y]'
|
timefmt = ' [%a, %b %d, %Y]'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
no_stylesheets = False
|
no_stylesheets = False
|
||||||
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
match_regexps = ['http://online.barrons.com/.*?html\?mod=.*?|file:.*']
|
||||||
html2lrf_options = [('--ignore-tables'),('--base-font-size=10')]
|
conversion_options = {'linearize_tables': True}
|
||||||
##delay = 1
|
##delay = 1
|
||||||
|
|
||||||
## Don't grab articles more than 7 days old
|
## Don't grab articles more than 7 days old
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
|
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||||
[
|
[
|
||||||
## Remove anything before the body of the article.
|
## Remove anything before the body of the article.
|
||||||
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
(r'<body.*?<!-- article start', lambda match: '<body><!-- article start'),
|
||||||
|
|
||||||
## Remove any insets from the body of the article.
|
|
||||||
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
|
||||||
|
|
||||||
## Remove any reprint info from the body of the article.
|
## Remove any insets from the body of the article.
|
||||||
(r'<hr size.*?<p', lambda match : '<p'),
|
(r'<div id="inset".*?</div>.?</div>.?<p', lambda match : '<p'),
|
||||||
|
|
||||||
## Remove anything after the end of the article.
|
## Remove any reprint info from the body of the article.
|
||||||
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
(r'<hr size.*?<p', lambda match : '<p'),
|
||||||
]
|
|
||||||
]
|
## Remove anything after the end of the article.
|
||||||
|
(r'<!-- article end.*?</body>', lambda match : '</body>'),
|
||||||
def get_browser(self):
|
]
|
||||||
br = BasicNewsRecipe.get_browser()
|
]
|
||||||
if self.username is not None and self.password is not None:
|
|
||||||
br.open('http://commerce.barrons.com/auth/login')
|
def get_browser(self):
|
||||||
br.select_form(name='login_form')
|
br = BasicNewsRecipe.get_browser()
|
||||||
br['user'] = self.username
|
if self.username is not None and self.password is not None:
|
||||||
br['password'] = self.password
|
br.open('http://commerce.barrons.com/auth/login')
|
||||||
br.submit()
|
br.select_form(name='login_form')
|
||||||
return br
|
br['user'] = self.username
|
||||||
|
br['password'] = self.password
|
||||||
## Use the print version of a page when available.
|
br.submit()
|
||||||
|
return br
|
||||||
def print_version(self, url):
|
|
||||||
return url.replace('/article/', '/article_print/')
|
## Use the print version of a page when available.
|
||||||
|
|
||||||
## Comment out the feeds you don't want retrieved.
|
def print_version(self, url):
|
||||||
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
|
return url.replace('/article/', '/article_print/')
|
||||||
|
|
||||||
def get_feeds(self):
|
## Comment out the feeds you don't want retrieved.
|
||||||
return [
|
## Because these feeds are sorted alphabetically when converted to LRF, you may want to number them to put them in the order you desire
|
||||||
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
|
|
||||||
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
|
def get_feeds(self):
|
||||||
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
|
return [
|
||||||
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
|
('This Week\'s Magazine', 'http://online.barrons.com/xml/rss/3_7510.xml'),
|
||||||
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
|
('Online Exclusives', 'http://online.barrons.com/xml/rss/3_7515.xml'),
|
||||||
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
|
('Companies', 'http://online.barrons.com/xml/rss/3_7516.xml'),
|
||||||
|
('Markets', 'http://online.barrons.com/xml/rss/3_7517.xml'),
|
||||||
|
('Technology', 'http://online.barrons.com/xml/rss/3_7518.xml'),
|
||||||
|
('Funds/Q&A', 'http://online.barrons.com/xml/rss/3_7519.xml'),
|
||||||
]
|
]
|
||||||
|
|
||||||
## Logout of website
|
## Logout of website
|
||||||
|
@ -13,8 +13,7 @@ class Winsupersite(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
html2lrf_options = ['--ignore-tables']
|
conversion_options = {'linearize_tables' : True}
|
||||||
html2epub_options = 'linearize_tables = True'
|
|
||||||
remove_tags_before = dict(name='h1')
|
remove_tags_before = dict(name='h1')
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
|
(re.compile(r'<p>--Paul Thurrott.*</body>', re.DOTALL|re.IGNORECASE),
|
||||||
@ -24,5 +23,5 @@ class Winsupersite(BasicNewsRecipe):
|
|||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
br.open('http://www.winsupersite.com')
|
br.open('http://www.winsupersite.com')
|
||||||
return br
|
return br
|
||||||
|
|
||||||
feeds = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]
|
feeds = [(u'Supersite for Windows', u'http://www.winsupersite.com/supersite.xml')]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user