mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Input plugin for recipes
This commit is contained in:
parent
996dda3ffe
commit
2da5589964
@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput
|
||||
from calibre.ebooks.rtf.input import RTFInput
|
||||
from calibre.ebooks.html.input import HTMLInput
|
||||
from calibre.ebooks.comic.input import ComicInput
|
||||
from calibre.web.feeds.input import RecipeInput
|
||||
from calibre.ebooks.oeb.output import OEBOutput
|
||||
from calibre.ebooks.epub.output import EPUBOutput
|
||||
from calibre.ebooks.txt.output import TXTOutput
|
||||
@ -296,7 +297,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
|
||||
|
||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
||||
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
|
||||
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
x.__name__.endswith('MetadataReader')]
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||
|
@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log):
|
||||
raise SystemExit(1)
|
||||
|
||||
input = os.path.abspath(args[1])
|
||||
if not os.access(input, os.R_OK):
|
||||
if not input.endswith('.recipe') and not os.access(input, os.R_OK):
|
||||
log.error('Cannot read from', input)
|
||||
raise SystemExit(1)
|
||||
|
||||
@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
|
||||
if rec.level < rec.HIGH:
|
||||
option_recommendation_to_cli_option(add_option, rec)
|
||||
|
||||
option_recommendation_to_cli_option(parser.add_option,
|
||||
plumber.get_option_by_name('list_recipes'))
|
||||
|
||||
def option_parser():
|
||||
return OptionParser(usage=USAGE)
|
||||
|
||||
|
@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer',
|
||||
OptionRecommendation(name='language',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the language.')),
|
||||
|
||||
OptionRecommendation(name='list_recipes',
|
||||
recommended_value=False, help=_('List available recipes.')),
|
||||
|
||||
]
|
||||
|
||||
input_fmt = os.path.splitext(self.input)[1]
|
||||
@ -525,6 +529,13 @@ OptionRecommendation(name='language',
|
||||
self.setup_options()
|
||||
if self.opts.verbose:
|
||||
self.log.filter_level = self.log.DEBUG
|
||||
if self.opts.list_recipes:
|
||||
from calibre.web.feeds.recipes import titles
|
||||
self.log('Available recipes:')
|
||||
for title in sorted(titles):
|
||||
self.log('\t'+title)
|
||||
self.log('%d recipes available'%len(titles))
|
||||
raise SystemExit(0)
|
||||
|
||||
# Run any preprocess plugins
|
||||
from calibre.customize.ui import run_plugins_on_preprocess
|
||||
@ -535,8 +546,13 @@ OptionRecommendation(name='language',
|
||||
accelerators = {}
|
||||
|
||||
tdir = PersistentTemporaryDirectory('_plumber')
|
||||
stream = self.input if self.input_fmt == 'recipe' else \
|
||||
open(self.input, 'rb')
|
||||
|
||||
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
||||
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
|
||||
self.opts.lrf = True
|
||||
|
||||
self.oeb = self.input_plugin(stream, self.opts,
|
||||
self.input_fmt, self.log,
|
||||
accelerators, tdir)
|
||||
if self.opts.debug_input is not None:
|
||||
|
@ -1578,15 +1578,15 @@ class OEBBook(object):
|
||||
return data.decode('utf-16')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
try:
|
||||
return data.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
if self.encoding is not None:
|
||||
try:
|
||||
return data.decode(self.encoding)
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
try:
|
||||
return data.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
data, _ = xml_to_unicode(data)
|
||||
data = data.replace('\r\n', '\n')
|
||||
data = data.replace('\r', '\n')
|
||||
|
@ -59,6 +59,7 @@ class Split(object):
|
||||
self.fix_links()
|
||||
|
||||
def split_item(self, item):
|
||||
page_breaks, page_break_ids = [], []
|
||||
if self.split_on_page_breaks:
|
||||
page_breaks, page_break_ids = self.find_page_breaks(item)
|
||||
|
||||
|
@ -2,5 +2,6 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
class Recipe(object):
|
||||
pass
|
||||
|
||||
|
65
src/calibre/web/feeds/input.py
Normal file
65
src/calibre/web/feeds/input.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
|
||||
class RecipeInput(InputFormatPlugin):
|
||||
|
||||
name = 'Recipe Input'
|
||||
author = 'Kovid Goyal'
|
||||
description = _('Download periodical content from the internet')
|
||||
file_types = set(['recipe'])
|
||||
|
||||
recommendations = set([
|
||||
('chapter_mark', 'none', OptionRecommendation.HIGH),
|
||||
('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
|
||||
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||
])
|
||||
|
||||
options = set([
|
||||
OptionRecommendation(name='test', recommended_value=False,
|
||||
help=_('Useful for recipe development. Forces '
|
||||
'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
|
||||
OptionRecommendation(name='username', recommended_value=None,
|
||||
help=_('Username for sites that require a login to access '
|
||||
'content.')),
|
||||
OptionRecommendation(name='password', recommended_value=None,
|
||||
help=_('Password for sites that require a login to access '
|
||||
'content.')),
|
||||
OptionRecommendation(name='lrf', recommended_value=False,
|
||||
help='Optimize fetching for subsequent conversion to LRF.'),
|
||||
])
|
||||
|
||||
def convert(self, recipe_or_file, opts, file_ext, log,
|
||||
accelerators, progress=lambda x, y: x):
|
||||
from calibre.web.feeds.recipes import \
|
||||
get_builtin_recipe, compile_recipe
|
||||
if os.access(recipe_or_file, os.R_OK):
|
||||
recipe = compile_recipe(open(recipe_or_file, 'rb').read())
|
||||
else:
|
||||
title = os.path.basename(recipe_or_file).rpartition('.')[0]
|
||||
recipe = get_builtin_recipe(title)
|
||||
|
||||
if recipe is None:
|
||||
raise ValueError('%s is not a valid recipe file or builtin recipe' %
|
||||
recipe_or_file)
|
||||
|
||||
ro = recipe(opts, log, progress)
|
||||
ro.download()
|
||||
|
||||
opts.output_profile.flow_size = 0
|
||||
|
||||
for f in os.listdir('.'):
|
||||
if f.endswith('.opf'):
|
||||
return os.path.abspath(f)
|
||||
|
||||
|
||||
|
||||
|
@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.lrf import entity_to_unicode
|
||||
from calibre.web import Recipe
|
||||
from calibre.ebooks import render_html
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
||||
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
|
||||
from calibre.web.fetch.simple import RecursiveFetcher
|
||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.ptempfile import PersistentTemporaryFile, \
|
||||
PersistentTemporaryDirectory
|
||||
|
||||
|
||||
class BasicNewsRecipe(object):
|
||||
class BasicNewsRecipe(Recipe):
|
||||
'''
|
||||
Abstract base class that contains logic needed in all feed fetchers.
|
||||
'''
|
||||
@ -443,40 +443,34 @@ class BasicNewsRecipe(object):
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
def __init__(self, options, parser, progress_reporter):
|
||||
def __init__(self, options, log, progress_reporter):
|
||||
'''
|
||||
Initialize the recipe.
|
||||
:param options: Parsed commandline options
|
||||
:param parser: Command line option parser. Used to intelligently merge options.
|
||||
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
||||
'''
|
||||
self.log = Log()
|
||||
if options.verbose:
|
||||
self.log.filter_level = self.log.DEBUG
|
||||
self.log = log
|
||||
if not isinstance(self.title, unicode):
|
||||
self.title = unicode(self.title, 'utf-8', 'replace')
|
||||
|
||||
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
|
||||
setattr(self, attr, getattr(options, attr))
|
||||
self.debug = options.verbose > 1
|
||||
self.output_dir = os.getcwd()
|
||||
self.verbose = options.verbose
|
||||
self.test = options.test
|
||||
self.username = options.username
|
||||
self.password = options.password
|
||||
self.lrf = options.lrf
|
||||
|
||||
self.output_dir = os.path.abspath(self.output_dir)
|
||||
if options.test:
|
||||
self.max_articles_per_feed = 2
|
||||
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
|
||||
|
||||
|
||||
if self.debug:
|
||||
self.verbose = True
|
||||
self.report_progress = progress_reporter
|
||||
|
||||
self.username = self.password = None
|
||||
#: If True optimize downloading for eventual conversion to LRF
|
||||
self.lrf = False
|
||||
defaults = parser.get_default_values()
|
||||
|
||||
for opt in options.__dict__.keys():
|
||||
if getattr(options, opt) != getattr(defaults, opt, None):
|
||||
setattr(self, opt, getattr(options, opt))
|
||||
|
||||
if isinstance(self.feeds, basestring):
|
||||
self.feeds = eval(self.feeds)
|
||||
if isinstance(self.feeds, basestring):
|
||||
@ -493,7 +487,6 @@ class BasicNewsRecipe(object):
|
||||
'--timeout', str(self.timeout),
|
||||
'--max-recursions', str(self.recursions),
|
||||
'--delay', str(self.delay),
|
||||
'--timeout', str(self.timeout),
|
||||
]
|
||||
if self.encoding is not None:
|
||||
web2disk_cmdline.extend(['--encoding', self.encoding])
|
||||
@ -520,9 +513,6 @@ class BasicNewsRecipe(object):
|
||||
self.simultaneous_downloads = 1
|
||||
|
||||
self.navbar = templates.NavBarTemplate()
|
||||
self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
|
||||
if '--base-font-size' not in self.html2lrf_options:
|
||||
self.html2lrf_options.extend(['--base-font-size', '12'])
|
||||
self.failed_downloads = []
|
||||
self.partial_failures = []
|
||||
|
||||
@ -557,7 +547,7 @@ class BasicNewsRecipe(object):
|
||||
return self.postprocess_html(soup, first_fetch)
|
||||
|
||||
|
||||
def download(self, for_lrf=False):
|
||||
def download(self):
|
||||
'''
|
||||
Download and pre-process all articles from the feeds in this recipe.
|
||||
This method should be called only one on a particular Recipe instance.
|
||||
|
Loading…
x
Reference in New Issue
Block a user