Input plugin for recipes

This commit is contained in:
Kovid Goyal 2009-04-27 15:41:10 -07:00
parent 996dda3ffe
commit 2da5589964
8 changed files with 108 additions and 31 deletions

View File

@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.comic.input import ComicInput
from calibre.web.feeds.input import RecipeInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.txt.output import TXTOutput
@ -296,7 +297,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log):
raise SystemExit(1)
input = os.path.abspath(args[1])
if not os.access(input, os.R_OK):
if not input.endswith('.recipe') and not os.access(input, os.R_OK):
log.error('Cannot read from', input)
raise SystemExit(1)
@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
if rec.level < rec.HIGH:
option_recommendation_to_cli_option(add_option, rec)
option_recommendation_to_cli_option(parser.add_option,
plumber.get_option_by_name('list_recipes'))
def option_parser():
return OptionParser(usage=USAGE)

View File

@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer',
OptionRecommendation(name='language',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the language.')),
OptionRecommendation(name='list_recipes',
recommended_value=False, help=_('List available recipes.')),
]
input_fmt = os.path.splitext(self.input)[1]
@ -525,6 +529,13 @@ OptionRecommendation(name='language',
self.setup_options()
if self.opts.verbose:
self.log.filter_level = self.log.DEBUG
if self.opts.list_recipes:
from calibre.web.feeds.recipes import titles
self.log('Available recipes:')
for title in sorted(titles):
self.log('\t'+title)
self.log('%d recipes available'%len(titles))
raise SystemExit(0)
# Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess
@ -535,8 +546,13 @@ OptionRecommendation(name='language',
accelerators = {}
tdir = PersistentTemporaryDirectory('_plumber')
stream = self.input if self.input_fmt == 'recipe' else \
open(self.input, 'rb')
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
self.opts.lrf = True
self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log,
accelerators, tdir)
if self.opts.debug_input is not None:

View File

@ -1578,15 +1578,15 @@ class OEBBook(object):
return data.decode('utf-16')
except UnicodeDecodeError:
pass
try:
return data.decode('utf-8')
except UnicodeDecodeError:
pass
if self.encoding is not None:
try:
return data.decode(self.encoding)
except UnicodeDecodeError:
pass
try:
return data.decode('utf-8')
except UnicodeDecodeError:
pass
data, _ = xml_to_unicode(data)
data = data.replace('\r\n', '\n')
data = data.replace('\r', '\n')

View File

@ -59,6 +59,7 @@ class Split(object):
self.fix_links()
def split_item(self, item):
page_breaks, page_break_ids = [], []
if self.split_on_page_breaks:
page_breaks, page_break_ids = self.find_page_breaks(item)

View File

@ -2,5 +2,6 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class Recipe(object):
pass

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
class RecipeInput(InputFormatPlugin):
name = 'Recipe Input'
author = 'Kovid Goyal'
description = _('Download periodical content from the internet')
file_types = set(['recipe'])
recommendations = set([
('chapter_mark', 'none', OptionRecommendation.HIGH),
('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
('use_auto_toc', False, OptionRecommendation.HIGH),
])
options = set([
OptionRecommendation(name='test', recommended_value=False,
help=_('Useful for recipe development. Forces '
'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
OptionRecommendation(name='username', recommended_value=None,
help=_('Username for sites that require a login to access '
'content.')),
OptionRecommendation(name='password', recommended_value=None,
help=_('Password for sites that require a login to access '
'content.')),
OptionRecommendation(name='lrf', recommended_value=False,
help='Optimize fetching for subsequent conversion to LRF.'),
])
def convert(self, recipe_or_file, opts, file_ext, log,
accelerators, progress=lambda x, y: x):
from calibre.web.feeds.recipes import \
get_builtin_recipe, compile_recipe
if os.access(recipe_or_file, os.R_OK):
recipe = compile_recipe(open(recipe_or_file, 'rb').read())
else:
title = os.path.basename(recipe_or_file).rpartition('.')[0]
recipe = get_builtin_recipe(title)
if recipe is None:
raise ValueError('%s is not a valid recipe file or builtin recipe' %
recipe_or_file)
ro = recipe(opts, log, progress)
ro.download()
opts.output_profile.flow_size = 0
for f in os.listdir('.'):
if f.endswith('.opf'):
return os.path.abspath(f)

View File

@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.lrf import entity_to_unicode
from calibre.web import Recipe
from calibre.ebooks import render_html
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
from calibre.web.fetch.simple import RecursiveFetcher
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
from calibre.utils.logging import Log
from calibre.ptempfile import PersistentTemporaryFile, \
PersistentTemporaryDirectory
class BasicNewsRecipe(object):
class BasicNewsRecipe(Recipe):
'''
Abstract base class that contains logic needed in all feed fetchers.
'''
@ -443,40 +443,34 @@ class BasicNewsRecipe(object):
'''
raise NotImplementedError
def __init__(self, options, parser, progress_reporter):
def __init__(self, options, log, progress_reporter):
'''
Initialize the recipe.
:param options: Parsed commandline options
:param parser: Command line option parser. Used to intelligently merge options.
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
'''
self.log = Log()
if options.verbose:
self.log.filter_level = self.log.DEBUG
self.log = log
if not isinstance(self.title, unicode):
self.title = unicode(self.title, 'utf-8', 'replace')
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
setattr(self, attr, getattr(options, attr))
self.debug = options.verbose > 1
self.output_dir = os.getcwd()
self.verbose = options.verbose
self.test = options.test
self.username = options.username
self.password = options.password
self.lrf = options.lrf
self.output_dir = os.path.abspath(self.output_dir)
if options.test:
self.max_articles_per_feed = 2
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
if self.debug:
self.verbose = True
self.report_progress = progress_reporter
self.username = self.password = None
#: If True optimize downloading for eventual conversion to LRF
self.lrf = False
defaults = parser.get_default_values()
for opt in options.__dict__.keys():
if getattr(options, opt) != getattr(defaults, opt, None):
setattr(self, opt, getattr(options, opt))
if isinstance(self.feeds, basestring):
self.feeds = eval(self.feeds)
if isinstance(self.feeds, basestring):
@ -493,7 +487,6 @@ class BasicNewsRecipe(object):
'--timeout', str(self.timeout),
'--max-recursions', str(self.recursions),
'--delay', str(self.delay),
'--timeout', str(self.timeout),
]
if self.encoding is not None:
web2disk_cmdline.extend(['--encoding', self.encoding])
@ -520,9 +513,6 @@ class BasicNewsRecipe(object):
self.simultaneous_downloads = 1
self.navbar = templates.NavBarTemplate()
self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
if '--base-font-size' not in self.html2lrf_options:
self.html2lrf_options.extend(['--base-font-size', '12'])
self.failed_downloads = []
self.partial_failures = []
@ -557,7 +547,7 @@ class BasicNewsRecipe(object):
return self.postprocess_html(soup, first_fetch)
def download(self, for_lrf=False):
def download(self):
'''
Download and pre-process all articles from the feeds in this recipe.
This method should be called only one on a particular Recipe instance.