Input plugin for recipes

2025-12-06 05:05:03 -05:00 · 2009-04-27 15:41:10 -07:00 · 2009-04-27 15:41:10 -07:00 · 2da5589964
commit 2da5589964
parent 996dda3ffe
8 changed files with 108 additions and 31 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput
 from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.comic.input import ComicInput
 from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.txt.output import TXTOutput
@ -296,7 +297,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
-        FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
+        FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput, RecipeInput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -52,7 +52,7 @@ def check_command_line_options(parser, args, log):
        raise SystemExit(1)
    input = os.path.abspath(args[1])
-    if not os.access(input, os.R_OK):
+    if not input.endswith('.recipe') and not os.access(input, os.R_OK):
        log.error('Cannot read from', input)
        raise SystemExit(1)
@ -169,6 +169,9 @@ def add_pipeline_options(parser, plumber):
            if rec.level < rec.HIGH:
                option_recommendation_to_cli_option(add_option, rec)
    option_recommendation_to_cli_option(parser.add_option,
            plumber.get_option_by_name('list_recipes'))
 def option_parser():
    return OptionParser(usage=USAGE)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -360,6 +360,10 @@ OptionRecommendation(name='book_producer',
 OptionRecommendation(name='language',
    recommended_value=None, level=OptionRecommendation.LOW,
    help=_('Set the language.')),
 OptionRecommendation(name='list_recipes',
    recommended_value=False, help=_('List available recipes.')),
 ]
        input_fmt = os.path.splitext(self.input)[1]
@ -525,6 +529,13 @@ OptionRecommendation(name='language',
        self.setup_options()
        if self.opts.verbose:
            self.log.filter_level = self.log.DEBUG
        if self.opts.list_recipes:
            from calibre.web.feeds.recipes import titles
            self.log('Available recipes:')
            for title in sorted(titles):
                self.log('\t'+title)
            self.log('%d recipes available'%len(titles))
            raise SystemExit(0)
        # Run any preprocess plugins
        from calibre.customize.ui import run_plugins_on_preprocess
@ -535,8 +546,13 @@ OptionRecommendation(name='language',
        accelerators = {}
        tdir = PersistentTemporaryDirectory('_plumber')
        stream = self.input if self.input_fmt == 'recipe' else \
                open(self.input, 'rb')
-        self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
+        if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
            self.opts.lrf = True
        self.oeb = self.input_plugin(stream, self.opts,
                                    self.input_fmt, self.log,
                                    accelerators, tdir)
        if self.opts.debug_input is not None:
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -1578,15 +1578,15 @@ class OEBBook(object):
                return data.decode('utf-16')
            except UnicodeDecodeError:
                pass
        try:
            return data.decode('utf-8')
        except UnicodeDecodeError:
            pass
        if self.encoding is not None:
            try:
                return data.decode(self.encoding)
            except UnicodeDecodeError:
                pass
        try:
            return data.decode('utf-8')
        except UnicodeDecodeError:
            pass
        data, _ = xml_to_unicode(data)
        data = data.replace('\r\n', '\n')
        data = data.replace('\r', '\n')
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -59,6 +59,7 @@ class Split(object):
        self.fix_links()
    def split_item(self, item):
        page_breaks, page_break_ids = [], []
        if self.split_on_page_breaks:
            page_breaks, page_break_ids = self.find_page_breaks(item)
--- a/src/calibre/web/init.py
+++ b/src/calibre/web/init.py
@ -2,5 +2,6 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 class Recipe(object):
    pass
--- a/src/calibre/web/feeds/input.py
+++ b/src/calibre/web/feeds/input.py
@ -0,0 +1,65 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 class RecipeInput(InputFormatPlugin):
    name        = 'Recipe Input'
    author      = 'Kovid Goyal'
    description = _('Download periodical content from the internet')
    file_types  = set(['recipe'])
    recommendations = set([
        ('chapter_mark', 'none', OptionRecommendation.HIGH),
        ('dont_split_on_page_breaks', True, OptionRecommendation.HIGH),
        ('use_auto_toc', False, OptionRecommendation.HIGH),
        ])
    options = set([
        OptionRecommendation(name='test', recommended_value=False,
            help=_('Useful for recipe development. Forces '
            'max_articles_per_feed to 2 and downloads at most 2 feeds.')),
        OptionRecommendation(name='username', recommended_value=None,
            help=_('Username for sites that require a login to access '
                'content.')),
        OptionRecommendation(name='password', recommended_value=None,
            help=_('Password for sites that require a login to access '
                'content.')),
        OptionRecommendation(name='lrf', recommended_value=False,
            help='Optimize fetching for subsequent conversion to LRF.'),
        ])
    def convert(self, recipe_or_file, opts, file_ext, log,
            accelerators, progress=lambda x, y: x):
        from calibre.web.feeds.recipes import \
                get_builtin_recipe, compile_recipe
        if os.access(recipe_or_file, os.R_OK):
            recipe = compile_recipe(open(recipe_or_file, 'rb').read())
        else:
            title = os.path.basename(recipe_or_file).rpartition('.')[0]
            recipe = get_builtin_recipe(title)
        if recipe is None:
            raise ValueError('%s is not a valid recipe file or builtin recipe' %
                    recipe_or_file)
        ro = recipe(opts, log, progress)
        ro.download()
        opts.output_profile.flow_size = 0
        for f in os.listdir('.'):
            if f.endswith('.opf'):
                return os.path.abspath(f)
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -20,6 +20,7 @@ from calibre import browser, __appname__, iswindows, \
 from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.lrf import entity_to_unicode
 from calibre.web import Recipe
 from calibre.ebooks import render_html
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata import MetaInformation
@ -27,12 +28,11 @@ from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
 from calibre.web.fetch.simple import option_parser as web2disk_option_parser
 from calibre.web.fetch.simple import RecursiveFetcher
 from calibre.utils.threadpool import WorkRequest, ThreadPool, NoResultsPending
 from calibre.utils.logging import Log
 from calibre.ptempfile import PersistentTemporaryFile, \
                              PersistentTemporaryDirectory
-class BasicNewsRecipe(object):
+class BasicNewsRecipe(Recipe):
    '''
    Abstract base class that contains logic needed in all feed fetchers.
    '''
@ -443,40 +443,34 @@ class BasicNewsRecipe(object):
        '''
        raise NotImplementedError
-    def __init__(self, options, parser, progress_reporter):
+    def __init__(self, options, log, progress_reporter):
        '''
        Initialize the recipe.
        :param options: Parsed commandline options
        :param parser:  Command line option parser. Used to intelligently merge options.
        :param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
        '''
-        self.log = Log()
+        self.log = log
        if options.verbose:
            self.log.filter_level = self.log.DEBUG
        if not isinstance(self.title, unicode):
            self.title = unicode(self.title, 'utf-8', 'replace')
-        for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
+        self.debug = options.verbose > 1
-            setattr(self, attr, getattr(options, attr))
+        self.output_dir = os.getcwd()
        self.verbose = options.verbose
        self.test = options.test
        self.username = options.username
        self.password = options.password
        self.lrf = options.lrf
        self.output_dir = os.path.abspath(self.output_dir)
        if options.test:
            self.max_articles_per_feed = 2
            self.simultaneous_downloads = min(4, self.simultaneous_downloads)
        if self.debug:
            self.verbose = True
        self.report_progress = progress_reporter
        self.username = self.password = None
        #: If True optimize downloading for eventual conversion to LRF
        self.lrf = False
        defaults = parser.get_default_values()
        for opt in options.__dict__.keys():
            if getattr(options, opt) != getattr(defaults, opt, None):
                setattr(self, opt, getattr(options, opt))
        if isinstance(self.feeds, basestring):
            self.feeds = eval(self.feeds)
            if isinstance(self.feeds, basestring):
@ -493,7 +487,6 @@ class BasicNewsRecipe(object):
            '--timeout', str(self.timeout),
            '--max-recursions', str(self.recursions),
            '--delay', str(self.delay),
            '--timeout', str(self.timeout),
            ]
        if self.encoding is not None:
            web2disk_cmdline.extend(['--encoding', self.encoding])
@ -520,9 +513,6 @@ class BasicNewsRecipe(object):
            self.simultaneous_downloads = 1
        self.navbar = templates.NavBarTemplate()
        self.html2lrf_options.extend(['--page-break-before', '$', '--use-spine', '--header', '--encoding', 'utf-8'])
        if '--base-font-size' not in self.html2lrf_options:
            self.html2lrf_options.extend(['--base-font-size', '12'])
        self.failed_downloads = []
        self.partial_failures = []
@ -557,7 +547,7 @@ class BasicNewsRecipe(object):
        return self.postprocess_html(soup, first_fetch)
-    def download(self, for_lrf=False):
+    def download(self):
        '''
        Download and pre-process all articles from the feeds in this recipe.
        This method should be called only one on a particular Recipe instance.