mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Input plugin for recipes
This commit is contained in:
parent
996dda3ffe
commit
2da5589964
@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput
|
|||||||
from calibre.ebooks.rtf.input import RTFInput
|
from calibre.ebooks.rtf.input import RTFInput
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.comic.input import ComicInput
|
from calibre.ebooks.comic.input import ComicInput
|
||||||
|
from calibre.web.feeds.input import RecipeInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
@ -296,7 +297,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
|
|||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
|
||||||
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
|
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log):
|
|||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
input = os.path.abspath(args[1])
|
input = os.path.abspath(args[1])
|
||||||
if not os.access(input, os.R_OK):
|
if not input.endswith('.recipe') and not os.access(input, os.R_OK):
|
||||||
log.error('Cannot read from', input)
|
log.error('Cannot read from', input)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
|
|||||||
if rec.level < rec.HIGH:
|
if rec.level < rec.HIGH:
|
||||||
option_recommendation_to_cli_option(add_option, rec)
|
option_recommendation_to_cli_option(add_option, rec)
|
||||||
|
|
||||||
|
option_recommendation_to_cli_option(parser.add_option,
|
||||||
|
plumber.get_option_by_name('list_recipes'))
|
||||||
|
|
||||||
def option_parser():
|
def option_parser():
|
||||||
return OptionParser(usage=USAGE)
|
return OptionParser(usage=USAGE)
|
||||||
|
|
||||||
|
@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer',
|
|||||||
OptionRecommendation(name='language',
|
OptionRecommendation(name='language',
|
||||||
recommended_value=None, level=OptionRecommendation.LOW,
|
recommended_value=None, level=OptionRecommendation.LOW,
|
||||||
help=_('Set the language.')),
|
help=_('Set the language.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='list_recipes',
|
||||||
|
recommended_value=False, help=_('List available recipes.')),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
input_fmt = os.path.splitext(self.input)[1]
|
input_fmt = os.path.splitext(self.input)[1]
|
||||||
@ -525,6 +529,13 @@ OptionRecommendation(name='language',
|
|||||||
self.setup_options()
|
self.setup_options()
|
||||||
if self.opts.verbose:
|
if self.opts.verbose:
|
||||||
self.log.filter_level = self.log.DEBUG
|
self.log.filter_level = self.log.DEBUG
|
||||||
|
if self.opts.list_recipes:
|
||||||
|
from calibre.web.feeds.recipes import titles
|
||||||
|
self.log('Available recipes:')
|
||||||
|
for title in sorted(titles):
|
||||||
|
self.log('\t'+title)
|
||||||
|
self.log('%d recipes available'%len(titles))
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
# Run any preprocess plugins
|
# Run any preprocess plugins
|
||||||
from calibre.customize.ui import run_plugins_on_preprocess
|
from calibre.customize.ui import run_plugins_on_preprocess
|
||||||
@ -535,8 +546,13 @@ OptionRecommendation(name='language',
|
|||||||
accelerators = {}
|
accelerators = {}
|
||||||
|
|
||||||
tdir = PersistentTemporaryDirectory('_plumber')
|
tdir = PersistentTemporaryDirectory('_plumber')
|
||||||
|
stream = self.input if self.input_fmt == 'recipe' else \
|
||||||
|
open(self.input, 'rb')
|
||||||
|
|
||||||
self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
|
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
|
||||||
|
self.opts.lrf = True
|
||||||
|
|
||||||
|
self.oeb = self.input_plugin(stream, self.opts,
|
||||||
self.input_fmt, self.log,
|
self.input_fmt, self.log,
|
||||||
accelerators, tdir)
|
accelerators, tdir)
|
||||||
if self.opts.debug_input is not None:
|
if self.opts.debug_input is not None:
|
||||||
|
@ -1578,15 +1578,15 @@ class OEBBook(object):
|
|||||||
return data.decode('utf-16')
|
return data.decode('utf-16')
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
try:
|
|
||||||
return data.decode('utf-8')
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
pass
|
|
||||||
if self.encoding is not None:
|
if self.encoding is not None:
|
||||||
try:
|
try:
|
||||||
return data.decode(self.encoding)
|
return data.decode(self.encoding)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
return data.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
data, _ = xml_to_unicode(data)
|
data, _ = xml_to_unicode(data)
|
||||||
data = data.replace('\r\n', '\n')
|
data = data.replace('\r\n', '\n')
|
||||||
data = data.replace('\r', '\n')
|
data = data.replace('\r', '\n')
|
||||||
|
@ -59,6 +59,7 @@ class Split(object):
|
|||||||
self.fix_links()
|
self.fix_links()
|
||||||
|
|
||||||
def split_item(self, item):
|
def split_item(self, item):
|
||||||
|
page_breaks, page_break_ids = [], []
|
||||||
if self.split_on_page_breaks:
|
if self.split_on_page_breaks:
|
||||||
page_breaks, page_break_ids = self.find_page_breaks(item)
|
page_breaks, page_break_ids = self.find_page_breaks(item)
|
||||||
|
|
||||||
|
@ -2,5 +2,6 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
|
||||||
|
class Recipe(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
65
src/calibre/web/feeds/input.py
Normal file
65
src/calibre/web/feeds/input.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
|
||||||
|
class RecipeInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'Recipe Input'
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
description = _('Download periodical content from the internet')
|
||||||
|
file_types = set(['recipe'])
|
||||||
|
|
||||||
|
recommendations = set([
|
||||||
|
('chapter_mark', 'none', OptionRecommendation.HIGH),
|
||||||
|
('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
|
||||||
|
('use_auto_toc', False, OptionRecommendation.HIGH),
|
||||||
|
])
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
OptionRecommendation(name='test', recommended_value=False,
|
||||||
|
help=_('Useful for recipe development. Forces '
|
||||||
|
'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
|
||||||
|
OptionRecommendation(name='username', recommended_value=None,
|
||||||
|
help=_('Username for sites that require a login to access '
|
||||||
|
'content.')),
|
||||||
|
OptionRecommendation(name='password', recommended_value=None,
|
||||||
|
help=_('Password for sites that require a login to access '
|
||||||
|
'content.')),
|
||||||
|
OptionRecommendation(name='lrf', recommended_value=False,
|
||||||
|
help='Optimize fetching for subsequent conversion to LRF.'),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, recipe_or_file, opts, file_ext, log,
|
||||||
|
accelerators, progress=lambda x, y: x):
|
||||||
|
from calibre.web.feeds.recipes import \
|
||||||
|
get_builtin_recipe, compile_recipe
|
||||||
|
if os.access(recipe_or_file, os.R_OK):
|
||||||
|
recipe = compile_recipe(open(recipe_or_file, 'rb').read())
|
||||||
|
else:
|
||||||
|
title = os.path.basename(recipe_or_file).rpartition('.')[0]
|
||||||
|
recipe = get_builtin_recipe(title)
|
||||||
|
|
||||||
|
if recipe is None:
|
||||||
|
raise ValueError('%s is not a valid recipe file or builtin recipe' %
|
||||||
|
recipe_or_file)
|
||||||
|
|
||||||
|
ro = recipe(opts, log, progress)
|
||||||
|
ro.download()
|
||||||
|
|
||||||
|
opts.output_profile.flow_size = 0
|
||||||
|
|
||||||
|
for f in os.listdir('.'):
|
||||||
|
if f.endswith('.opf'):
|
||||||
|
return os.path.abspath(f)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \
|
|||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.lrf import entity_to_unicode
|
from calibre.ebooks.lrf import entity_to_unicode
|
||||||
|
from calibre.web import Recipe
|
||||||
from calibre.ebooks import render_html
|
from calibre.ebooks import render_html
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
|
|||||||
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
|
from calibre.web.fetch.simple import option_parser as web2disk_option_parser
|
||||||
from calibre.web.fetch.simple import RecursiveFetcher
|
from calibre.web.fetch.simple import RecursiveFetcher
|
||||||
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
|
||||||
from calibre.utils.logging import Log
|
|
||||||
from calibre.ptempfile import PersistentTemporaryFile, \
|
from calibre.ptempfile import PersistentTemporaryFile, \
|
||||||
PersistentTemporaryDirectory
|
PersistentTemporaryDirectory
|
||||||
|
|
||||||
|
|
||||||
class BasicNewsRecipe(object):
|
class BasicNewsRecipe(Recipe):
|
||||||
'''
|
'''
|
||||||
Abstract base class that contains logic needed in all feed fetchers.
|
Abstract base class that contains logic needed in all feed fetchers.
|
||||||
'''
|
'''
|
||||||
@ -443,40 +443,34 @@ class BasicNewsRecipe(object):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def __init__(self, options, parser, progress_reporter):
|
def __init__(self, options, log, progress_reporter):
|
||||||
'''
|
'''
|
||||||
Initialize the recipe.
|
Initialize the recipe.
|
||||||
:param options: Parsed commandline options
|
:param options: Parsed commandline options
|
||||||
:param parser: Command line option parser. Used to intelligently merge options.
|
:param parser: Command line option parser. Used to intelligently merge options.
|
||||||
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
:param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
|
||||||
'''
|
'''
|
||||||
self.log = Log()
|
self.log = log
|
||||||
if options.verbose:
|
|
||||||
self.log.filter_level = self.log.DEBUG
|
|
||||||
if not isinstance(self.title, unicode):
|
if not isinstance(self.title, unicode):
|
||||||
self.title = unicode(self.title, 'utf-8', 'replace')
|
self.title = unicode(self.title, 'utf-8', 'replace')
|
||||||
|
|
||||||
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
|
self.debug = options.verbose > 1
|
||||||
setattr(self, attr, getattr(options, attr))
|
self.output_dir = os.getcwd()
|
||||||
|
self.verbose = options.verbose
|
||||||
|
self.test = options.test
|
||||||
|
self.username = options.username
|
||||||
|
self.password = options.password
|
||||||
|
self.lrf = options.lrf
|
||||||
|
|
||||||
self.output_dir = os.path.abspath(self.output_dir)
|
self.output_dir = os.path.abspath(self.output_dir)
|
||||||
if options.test:
|
if options.test:
|
||||||
self.max_articles_per_feed = 2
|
self.max_articles_per_feed = 2
|
||||||
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
|
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
|
||||||
|
|
||||||
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
self.verbose = True
|
self.verbose = True
|
||||||
self.report_progress = progress_reporter
|
self.report_progress = progress_reporter
|
||||||
|
|
||||||
self.username = self.password = None
|
|
||||||
#: If True optimize downloading for eventual conversion to LRF
|
|
||||||
self.lrf = False
|
|
||||||
defaults = parser.get_default_values()
|
|
||||||
|
|
||||||
for opt in options.__dict__.keys():
|
|
||||||
if getattr(options, opt) != getattr(defaults, opt, None):
|
|
||||||
setattr(self, opt, getattr(options, opt))
|
|
||||||
|
|
||||||
if isinstance(self.feeds, basestring):
|
if isinstance(self.feeds, basestring):
|
||||||
self.feeds = eval(self.feeds)
|
self.feeds = eval(self.feeds)
|
||||||
if isinstance(self.feeds, basestring):
|
if isinstance(self.feeds, basestring):
|
||||||
@ -493,7 +487,6 @@ class BasicNewsRecipe(object):
|
|||||||
'--timeout', str(self.timeout),
|
'--timeout', str(self.timeout),
|
||||||
'--max-recursions', str(self.recursions),
|
'--max-recursions', str(self.recursions),
|
||||||
'--delay', str(self.delay),
|
'--delay', str(self.delay),
|
||||||
'--timeout', str(self.timeout),
|
|
||||||
]
|
]
|
||||||
if self.encoding is not None:
|
if self.encoding is not None:
|
||||||
web2disk_cmdline.extend(['--encoding', self.encoding])
|
web2disk_cmdline.extend(['--encoding', self.encoding])
|
||||||
@ -520,9 +513,6 @@ class BasicNewsRecipe(object):
|
|||||||
self.simultaneous_downloads = 1
|
self.simultaneous_downloads = 1
|
||||||
|
|
||||||
self.navbar = templates.NavBarTemplate()
|
self.navbar = templates.NavBarTemplate()
|
||||||
self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
|
|
||||||
if '--base-font-size' not in self.html2lrf_options:
|
|
||||||
self.html2lrf_options.extend(['--base-font-size', '12'])
|
|
||||||
self.failed_downloads = []
|
self.failed_downloads = []
|
||||||
self.partial_failures = []
|
self.partial_failures = []
|
||||||
|
|
||||||
@ -557,7 +547,7 @@ class BasicNewsRecipe(object):
|
|||||||
return self.postprocess_html(soup, first_fetch)
|
return self.postprocess_html(soup, first_fetch)
|
||||||
|
|
||||||
|
|
||||||
def download(self, for_lrf=False):
|
def download(self):
|
||||||
'''
|
'''
|
||||||
Download and pre-process all articles from the feeds in this recipe.
|
Download and pre-process all articles from the feeds in this recipe.
|
||||||
This method should be called only one on a particular Recipe instance.
|
This method should be called only one on a particular Recipe instance.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user